找回密码
 注册
查看: 2558|回复: 0

自动化kolla-ansible部署openstack+GPU透传方法

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2021-6-25 11:36:52 | 显示全部楼层 |阅读模式
1. CentOS7.x-8.x系列为虚拟机配置GPU直通
4 y, h! [" Y4 p$ p0 }* J复制代码' V8 M) X2 k  J) ^2 F( Z5 }4 y: U4 Z
1. 编辑文件vim  /etc/modules, 添加以下内容:' T5 e& N3 S4 R/ o+ S
pci_stub
( ~3 S7 z  a* \" f) p' G8 I3 Uvfio
$ g- h6 v5 p8 }% g0 rvfio_iommu_type1% |2 R2 S8 I+ C1 Y9 n' L4 O
vfio_pci0 m, U. D" @! P6 \! ]- u: |( {& `( t7 W8 }
kvm2 T. T! b" X5 W5 c, d6 j* u
kvm_intel1 X9 `- \6 ?1 t! R' V6 P  ~
; }. Q( D, R  e" M5 H# v: u6 ?
2. 在KVM主机上启用IOMMU
& U- x. E/ f& P* w; l7 R0 j#对于Intel芯片:+ z* m/ Y. `1 ]2 M
GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"3 r  F$ C% o0 I2 |2 ], P- g, Z' @
#对于AMD芯片:2 N: W% h5 n# _
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"
* k& s( {& B- P8 K% {
# m8 S  X' H6 c3 g9 n1 C, Dvim /etc/default/grub! ~: V" t9 @) Q% O1 K4 a; O
- |+ Z5 J4 z" M4 b9 w- C
GRUB_TIMEOUT=5
5 y/ t2 f6 e  O( {/ S/ e7 o* Z: x- PGRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"4 ~1 i1 _! O0 X% z
GRUB_DEFAULT=saved# a6 J, i  p$ L1 ~. t0 k
GRUB_DISABLE_SUBMENU=true
- p( g' q9 Y- x+ t/ p  nGRUB_TERMINAL_OUTPUT="console"
7 _/ e- `3 d- v# `+ [, m& z  X1 ~GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet intel_iommu=on"
* h5 k0 I+ e, m2 I7 w1 fGRUB_DISABLE_RECOVERY="true"
6 N% A/ |, p2 i4 {5 B7 h! ]0 z' j7 s" @6 {+ l# k. G# P
   3.  重新生成grub
$ _, R9 G  ]1 ?# z   EFI
/ Z0 z- N1 {& M. \   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
4 |: e0 ^" m* J( `- h. g+ P8 y7 ?  x   非EFI
7 W' B0 M. U% t; j0 z   grub2-mkconfig -o /boot/grub2/grub.cfg6 w* Y+ ~6 L5 V8 X
4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件# ?% {( S1 A) i2 c% p
vim  /etc/modprobe.d/blacklist.conf. D1 t6 O( a5 M& j( h
blacklist snd_hda_intel
7 G: {. \2 {( Lblacklist amd76x_edac
; g6 J% Q. Z/ M& f( bblacklist vga16fb' |* \: ?1 H/ f% z5 Q7 k
blacklist nouveau2 N3 q9 K" w4 W( v. ~" i
blacklist rivafb' G6 z" k3 e/ r6 }/ t
blacklist nvidiafb
# o- C$ P; l/ sblacklist rivatv  u3 ^8 y3 R$ f
blacklist nvidia" {1 y3 Q$ @$ U. ~4 r. F- a

  j% a6 Q( G7 |9 X5.  查找显卡的Product ID 以及 Vendor ID:
! x& s/ ?8 Y1 t! o( Q% Xyum install pciutils -y
: t# @8 b% U5 Plspci -nn | grep NVIDIA( i7 m9 d8 A; V$ t
如下:; V5 j/ `5 M# E6 D
[root@stein-a ~]# 6 ^' z2 R! b  `3 C+ j
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)& `6 y- b( S8 E7 f+ K/ k
03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)& o- d) P( m5 i4 R
6 m; W* Q* j; f( S% v% O+ f
6.  编辑
, ]3 _2 ^* S/ Z5 {4 e' nvim /etc/modprobe.d/vfio.conf5 P& V- V2 x0 e/ i/ G! D! N
# create new: for [ids=***], specify [vendor-ID:device-ID]  ^' y& f8 R# i& R: u" B) G8 \
options vfio-pci ids=10de:1bb1,10de:10f0
2 {$ v6 [+ O! }& k# L* v! o* h
' G2 S5 t% `6 k! ]7.  写入到系统启动项
3 I- f5 k6 z1 Q! C- ?echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf ; v( H) P8 E( n5 O& {2 q2 N
' B* f) p, a0 k1 f" I
8.  重新生成initramfs7 _3 N7 z3 D1 Z8 h: ?. D1 B' Y6 r
mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak
3 @4 B0 C7 E2 g$ u; T- c' edracut -v /boot/initramfs-$(uname -r).img $(uname -r): i4 p- U5 \- ]# [4 y3 ~, A: X

! x# x8 F6 f5 s$ v0 D4 r- V6 E" ^9.  重启系统5 m& S' X) ]# K3 i, S3 ?% m2 n
reboot" X5 e4 c* K/ d5 a! D4 r" J8 W
7 A! X& W" t# Q' n1 E* E- z# Y4 h
10. 验证8 j9 K1 E8 J, ?. m
lspci -nnk -d 10de:1bb1, ]& M: I6 a) H; u' E2 L. j
dmesg | grep -i vfio
: o. Q! @% M( P' o. c; ~[root@stein-a ~]# lspci -nnk -d 10de:1bb1
' y9 n$ ~1 }' C& k( w6 m) x( ]03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
: i7 d! d- o. `. X1 }; z' _% W        Subsystem: NVIDIA Corporation Device [10de:11a3]5 a( t1 U, V' U
        Kernel driver in use: vfio-pci$ [4 J# I) k! K) f
        Kernel modules: nouveau! i8 \$ Z( M; H" f5 p  O
[root@stein-a ~]# dmesg | grep -i vfio8 K# l- |! V" o, c0 U
[    2.503115] VFIO - User Level meta-driver version: 0.3
0 [* z# `6 h& C# G2 P- B[    2.515645] vfio_pci: add [10de:1bb1[ffff:ffff]] class 0x000000/00000000
+ @8 ^, w0 o: J8 Z: e/ s[    2.515752] vfio_pci: add [10de:10f0[ffff:ffff]] class 0x000000/000000003 g7 [! P' f) Q
[root@stein-a ~]# * E+ c. V+ p  ?
复制代码
3 @& v, z! w& U4 g 4 q! L. m: T5 P; ~
2. Ubuntu18.04系列为虚拟机配置GPU直通
- T3 Y- M4 w% \复制代码
! X' k5 u  y/ H: W4 \2 H" e1. 编辑文件vim  /etc/modules, 添加以下内容:
. S: `1 j# m2 d# tpci_stub
( D# U2 i( E  c- l; t; ?, yvfio# N- p, X1 I, ]6 o$ T, q
vfio_iommu_type16 ^# Q1 t8 P0 u% t7 i- Y5 }
vfio_pci
) r! O% a6 f) gkvm
' U' `! m& o  ~, m9 D) z4 ?kvm_intel
  I8 \% j0 K3 B. o  C! I
+ q7 k9 ]! A' i; J, G2. 在KVM主机上启用IOMMU $ t6 L1 {( H) U+ W7 q% z
#对于Intel芯片:
* [) X1 ]  g0 M1 e. [7 K# OGRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"
& \& U  F8 U% Z- ~6 t; Y#对于AMD芯片:% `/ I: j; B  s0 t% t% f
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"
* v- B7 y, }( H/ W6 l# c$ D) t: q* v& Y; J- h$ e) p/ `
vim /etc/default/grub& ?1 r% O: z8 j5 F8 K
% z' b# ]8 ]" s
GRUB_DEFAULT=0! L: S  R& K$ t
GRUB_TIMEOUT_STYLE=hidden
5 Y, m- j. p2 e/ h/ iGRUB_TIMEOUT=0
1 o: V4 l  _4 n5 Y/ u7 lGRUB_DISTRIBUTOR=`lsb_release -i -s 2> /dev/null || echo Debian`5 f; l- _& |3 D- P
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"* O+ Q! k; G$ `' Z: T1 l6 O% h
GRUB_CMDLINE_LINUX=""
/ V, p  Y( D3 d( X4 {4 t; D" O
6 ?( s/ [; A. Z4 G   3.  重新生成grub" g; @2 z  E: ~2 Z$ U6 N$ F
   EFI
) H5 v  V/ e! Z+ h. N2 c, a   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg0 U. t- u& b: J9 k0 v
   非EFI 6 y$ C% D1 K7 U4 t  M
   grub2-mkconfig -o /boot/grub2/grub.cfg
- V  M4 i9 a2 ~" M$ S9 y4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件6 c8 n' }0 B/ g; v
vim  /etc/modprobe.d/blacklist.conf3 S) I9 V# u4 {. y' Y
blacklist snd_hda_intel: b0 b3 h! s! H3 V3 V) l; O- }
blacklist amd76x_edac
* h, q( }7 R1 ^: Fblacklist vga16fb0 o- m' g) ]6 H- ?) L. I+ ?2 z' U
blacklist nouveau- ]% F1 N; k1 @% x8 {
blacklist rivafb6 x5 [- z* T$ X# ?4 V! X
blacklist nvidiafb
( N! t- n7 b9 c) G( c6 rblacklist rivatv
5 K2 g% ?3 C( w; k5 Z) M: b" yblacklist nvidia
: l8 ^0 w# w* n( n* h5 w/ {/ S: J" h( w5 d
5.  查找显卡的Product ID 以及 Vendor ID:$ H: F) o  s" T% C
apt install pciutils -y
) |) ~; x8 ]' P* v, J, klspci -nn | grep NVIDIA+ [2 B, B3 |' ~) i6 Q' n
如下:/ K, ^0 L% X4 P! J0 u
[root@stein-a ~]# lspci -nn | grep NVIDIA
, X2 y. h* n1 Q! B  y& s/ ]03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
! U* s1 \5 m9 g8 r/ t" j03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)- u0 |  O2 ]9 O8 J- H

4 a( f, n  p; E3 L1 `* B6.  编辑3 O* N' Y3 t$ a4 _$ P
vim /etc/modprobe.d/vfio.conf
) ]/ r2 p8 c# z2 C4 ?# create new: for [ids=***], specify [vendor-ID:device-ID]6 l$ f0 V- ~1 J
options vfio-pci ids=10de:1bb1,10de:10f0
* N- O0 t! ~; E! U: g- C; }* P. N# ^' D# a2 H5 n: i6 V1 e
7.  写入到系统启动项
2 C6 p: |8 o% [& L9 C8 Gecho 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf
' X/ U: a+ j2 P8 N8 X
: E- I0 l7 m+ j3 M6 m1 }8.  重新生成initramfs& q+ {3 Z6 p7 z/ x8 N" _
dracut -v /boot/initramfs-$(uname -r).img $(uname -r)
  w) u5 x" |" ]; _$ ?0 z4 {0 B+ F8 T1 H8 p; \0 [0 a
9.  重启系统
3 v4 c) d5 m5 U$ h0 ]% freboot4 \% ~: O" O+ ?7 M. a* `

& x: `' E5 z) y. y  H10. 验证  }! t4 S, `3 G* t7 C. X9 V
lspci -nnk -d 10de:1bb1  h) q1 B1 C" ~  g. ]8 H; n
dmesg | grep -i vfio1 A# y2 O1 u$ \0 Q, M! Q: b- \1 F
root@kvm:~# lspci -nnk -d 10de:1bb1+ F. G& k) `) Z- G$ c3 `
dmesg | grep -i vfio
; v  y0 U( @. g  Y& U4 ~$ t. b03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)& H; ?( V. K5 E
        Subsystem: NVIDIA Corporation GP104GL [Quadro P4000] [10de:11a3]+ r; n. ^+ z# C( A7 B' ^2 J, e% p
        Kernel driver in use: vfio-pci+ C7 u  T2 Q' V6 p6 k! o
        Kernel modules: nvidiafb, nouveau& B& _% l" i: M1 Z. Y4 F1 P& W( b
root@kvm:~# dmesg | grep -i vfio
+ T$ _3 N: L4 f9 `- Z# L[    3.838714] VFIO - User Level meta-driver version: 0.3/ E4 {) l$ v9 i4 T! u
[    3.846238] vfio-pci 0000:03:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none  R: [% T/ _3 l7 t
[    3.866370] vfio_pci: add [10de:1bb1[ffffffff:ffffffff]] class 0x000000/00000000
# Y4 h' ]; N/ l0 V4 X; x( [[    3.886375] vfio_pci: add [10de:10f0[ffffffff:ffffffff]] class 0x000000/00000000
* l% E" E1 U( v% E( D9 X5 {复制代码
% P: ^! d) R3 p1 [6 F) O : b$ d  w' j6 Y" c1 g
复制代码
9 }, l! L& z0 g$ r#如果你单机部署的,在单机下配置。* U( _# F$ [; E# T0 S' |2 i
#如果你是高可用部署的,在三台控制节点配置5 e6 _3 m$ q7 L: c/ j
1. 添加pci
. \1 O  K9 M7 N+ a$ c, kvim /etc/kolla/config/nova/nova-compute.conf
) g: w/ a+ e* ~6 O3 p0 y8 z[libvirt]
/ u% p9 ^: |' j4 ^% cinject_password=true
6 y/ ^' {1 e7 n' a3 b# Z) |cpu_mode=host-passthrough) U, _' o7 _8 _) ]8 A. ]5 O8 h) k
virt_type = kvm
% r$ p% j6 O- E' M- B7 V[pci]7 o$ y; M/ S% F8 Q! n  _; g* z
passthrough_whitelist: { "vendor_id": "10de", "product_id": "1bb1" }
; {3 I& U* J1 a' P5 f; K$ g' s) P. X2 s: o; N9 V: ]7 {
2. 修改nova.conf
6 _% M8 H  ^- ~7 J% K2 T$ gvim /etc/kolla/config/nova.conf
  A9 C' T% }* _. x5 K! `4 ]0 C[DEFAULT]
- V) P7 n/ P2 O* Eservice_down_time = 120
( m$ @, `/ C& l. a; ^cpu_allocation_ratio = 4.0   " ~$ |2 ~, B/ Y6 D# w( `7 D) f+ J
disk_allocation_ratio=1.0
! s( S0 n7 V6 i* Xram_allocation_ratio = 1.0   1 Z. R8 \1 n' Y/ D1 k
reserved_host_disk_mb = 4096   C3 y% I7 [" |) u2 l
reserved_host_memory_mb = 4096 # ~/ m& Q  h: R- \* U
allow_resize_to_same_host = True   
0 ?9 n% n8 H" e' n' B" Uremove_unused_base_images = False: @6 ~, s. V8 E7 k
image_cache_manager_interval = 01 q$ g1 l3 o( M& U
resume_guests_state_on_host_boot = True
: V" u2 V0 s+ ~7 l/ s1 b4 U) T. l& c
[PCI]; [  R1 j$ t, q' s% ~+ Y
alias: { "vendor_id":"10de", "product_id":"1bb1", "device_type":"type-PCI", "name":"quadro-p4000" }
! o: }' @9 I1 d[filter_scheduler]6 f' P# A* q, f# P
enabled_filters = RetryFilter, AvailabilityZoneFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter
/ \  P' Q) i( j; V1 {. g. O- t* Navailable_filters = nova.scheduler.filters.all_filters
+ X# i& @7 i( i7 M2 \: L% G8 u" u. ?* V% Q
3. GPU 类型实例创建  ) e: \8 e# m9 k$ c5 V8 ]: P$ X3 l% ?
openstack flavor create --vcpus 4 --ram 8192 --disk 30 --property "pci_passthrough:alias"="quadro-p400:1" g1.4c.8m.p400
$ i: \' p5 F& E' h! n( @% F复制代码$ s! F* q$ i! Q; ~  R/ }" X

3 {, |! t% }8 x3. CentOS7.x系列 安装显卡驱动
# N1 |1 t& Z! s4 q4 G0 E* w复制代码
( s/ G( C, v' w4 C! |$ A1.  查看是否含有英伟达显卡
+ V  D4 U  t% f$ o0 ~4 d. qlspci | grep -i NVIDIA2 J- z' K! ?) ~9 W' s$ X+ Q
#下面说明有1块英伟达的显卡1 \; M: ]# b+ w2 X" v
[root@train-all ~]#  lspci | grep -i NVIDIA# w4 T( _5 r2 e+ `% q, c4 j
04:00.0 VGA compatible controller: NVIDIA Corporation GP104GL [Quadro P4000] (rev a1)
; C: Y  v( x/ ^( ^! D* `' g% G( G1 `04:00.1 Audio device: NVIDIA Corporation GP104 High Definition Audio Controller (rev a1)
: D! Z: o% h6 ]  m[root@train-all ~]#
% x1 L& y" e* G2 ^1 A3 s6 w
. \7 Z2 W: A+ h2 d" j3 v2 Y# u2.  添加ELRepo源" y2 I8 N9 v$ _/ o+ u/ b% ^
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org ) q. e( |% B" K5 i$ K' A
$ {. h9 G( Y3 L. K$ n7 L( _
3.  安装ELRepo
% \6 q( ^0 S& v. l/ mrpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
: M- w  y: M6 M- f. `5 f- u5 @- @: m! i' _! g& v  k: T% F. R. `1 B
4.  安装nvidia-detect2 i, r3 l( O1 d, z
yum install nvidia-detect -y
0 }! B, r& Z/ G- N9 U* j) \9 D3 w/ T8 ^) y4 ?4 ~
5.  运行nvidia-detect' j5 x! }7 b8 C
nvidia-detect -v
% M6 [1 o" T2 T4 c  E+ F0 O! P* a
2 y# N" M8 @2 Q( d6.  查找驱动程序
* y4 B2 s3 {% ]# E6 jyum search kmod-nvidia
6 G/ c! T' l8 O; ]1 V
, E" l2 E  b, S$ ]6 s7.  安装驱动程序
& X# A% U+ b6 hyum install kmod-nvidia.x86_64 -y/ [) G; J' W. w! N% e

& k" C8 n9 O5 A9 K, R8.  查看禁用Nouveau0 S/ T$ L2 b+ u# l) ^& q  o( X
lsmod | grep nouveau + U# Q! G6 m; t/ [* ~
#若没有输出 则说明禁用成功,否则执行下面的命令7 o  c3 |! T# h* w5 }: i) B6 P0 {
. I6 B7 c3 S" z  d2 u8 O
9.  在/etc/modprobe.d/blacklist-nouveau.conf中创建一个文件,其内容如下:
1 g' |0 {; [  b# w2 w# {8 J/ l7 hvi /etc/modprobe.d/blacklist-nouveau.conf5 }1 s# @& s& h
添加2 q$ r* U1 u4 X, [
blacklist nouveau6 z% E' ?4 I9 c, R% w( }
options nouveau modeset=0- T3 ?% G; s8 @) A6 l

. p+ ]+ q  n, ^# D7 G! p! @10. 重新生成内核initramfs5 e2 I! H7 w) p/ L. r2 p
dracut --force
# ?% S" j! T6 q! L. v# G3 \
; n8 w; \! [3 i+ ]11.  重启系统
1 m9 R& d, A2 |) {2 u, q) Creboot+ z- X0 `7 w* p- f5 |; N& l, s
9 Q: f+ y$ }* z- @- q; `
12.  测试, E7 E" d" L# _; G
nvidia-smi# S% ~5 ^$ ?, R; m& Y
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 02:05 , Processed in 0.020857 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表