找回密码
 注册
查看: 2560|回复: 0

自动化kolla-ansible部署openstack+GPU透传方法

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2021-6-25 11:36:52 | 显示全部楼层 |阅读模式
1. CentOS7.x-8.x系列为虚拟机配置GPU直通, Y0 e; K. M2 r$ t: \3 p8 a$ a3 t
复制代码
8 z. ~/ B6 E) y( Z0 p( n" |: [1. 编辑文件vim  /etc/modules, 添加以下内容:
" o8 `& ~/ B8 s; Rpci_stub- \$ {2 r: n- H* R$ x6 U
vfio8 {- v8 ?* L0 }' i
vfio_iommu_type1! @* b$ y) I9 i! }( M6 v: J( }% I
vfio_pci, U* F$ K% x1 D& N5 X/ z' ?. @9 w
kvm
8 J( h: u) [, K' t" j4 W6 fkvm_intel
! S* q* k+ S3 P4 R! r, K, B
6 A  h, L) r1 Z2. 在KVM主机上启用IOMMU
$ B% E; K& [4 t#对于Intel芯片:
) n' ~0 K9 X0 ~+ m- KGRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"7 L: r1 G- O: B! Q$ U7 l
#对于AMD芯片:
. |9 y4 B1 _/ LGRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"( a/ m& H- }- q
: J+ w" D0 y5 {
vim /etc/default/grub
% n* v8 r- l) o; w
5 W+ Q) I+ `5 ^GRUB_TIMEOUT=5
, B* E; O* m% M+ _# a! mGRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"7 s& V8 _6 O- c- Z' F/ A
GRUB_DEFAULT=saved/ Z% u# r  K9 X0 ?: O
GRUB_DISABLE_SUBMENU=true
7 d& J& Q2 o: W' N8 N; b" g7 l  GGRUB_TERMINAL_OUTPUT="console") f$ k5 U4 H+ j9 z  e/ B
GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet intel_iommu=on"
: j! Y; `# V9 C& R9 D, |) lGRUB_DISABLE_RECOVERY="true") R- @4 L0 I  f/ ~* S5 z2 i
6 c) g" n  Z, V, {9 g
   3.  重新生成grub
) I9 z# E; `0 N6 T4 x/ B   EFI
- I, O6 D+ [" J9 O5 V6 r+ i7 N   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
; b" X4 V$ ]& f- k* I   非EFI # S' [, G( k1 r4 Y  r
   grub2-mkconfig -o /boot/grub2/grub.cfg. P6 N5 a+ x/ z! P& @
4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件- {" P- R5 C+ J3 g8 Y
vim  /etc/modprobe.d/blacklist.conf. V2 C6 U0 y7 y4 h0 v- r# R1 _
blacklist snd_hda_intel
0 l9 Y2 l* ]0 j3 k* c1 g' V2 Yblacklist amd76x_edac) E3 r: l! G; P0 Y+ [
blacklist vga16fb5 A9 l( o' q: r6 W; ?
blacklist nouveau
! s( A, P& E) Z6 ^( `, _) Oblacklist rivafb+ k+ S  ?3 ~- B! @
blacklist nvidiafb7 W) _5 \0 g1 Y
blacklist rivatv6 m1 r- J2 l! I
blacklist nvidia! E3 K2 e2 n; Q; N
) v* e0 v% W& C: S2 t  t" V7 ^% r/ d
5.  查找显卡的Product ID 以及 Vendor ID:" r& F2 @5 E9 i2 E7 n
yum install pciutils -y4 A! E1 ?% B$ y0 f2 l* |6 Z% v' B
lspci -nn | grep NVIDIA
- \' a/ M. f# d2 _如下:/ B. ~, q+ S) r5 @: K! I; k# P" h
[root@stein-a ~]#
5 ]; H. n, o1 \6 H, A+ e03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)0 }, [. n0 r5 R
03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)
' ^- z) j# l  M' z1 v$ T$ t  M) d5 H+ z" m  Z8 ~: f+ m: i7 a& h
6.  编辑! a3 m; f  H3 n5 \
vim /etc/modprobe.d/vfio.conf
. F1 ]6 ]( Y7 X, j/ d# create new: for [ids=***], specify [vendor-ID:device-ID]% Y- O% W" I' O
options vfio-pci ids=10de:1bb1,10de:10f0( O2 |9 B8 |; f# a5 r+ P+ g
0 g7 Q) u  Y. T# a# y
7.  写入到系统启动项7 A7 y0 ~( _; U$ F
echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf
  A& H9 v. Y8 j; g+ @# Z% u& y3 J
8.  重新生成initramfs
/ m- M1 D' G7 g; `* |- Vmv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak
$ Z; e, V2 n- p& C% C/ fdracut -v /boot/initramfs-$(uname -r).img $(uname -r)
9 S  u" O9 o3 ^, D5 i3 n6 e/ L: I$ Q
9.  重启系统# r7 Y" p* v2 e0 `# G
reboot
* h0 @0 [# W$ x' K) j
" `0 j# H* _8 ~% Y10. 验证
) |1 v. k6 r* k6 R" S  A/ z+ Mlspci -nnk -d 10de:1bb1
) A! M! W$ ~( y: t8 J  s9 gdmesg | grep -i vfio
( l2 k$ l2 e( `- X4 K[root@stein-a ~]# lspci -nnk -d 10de:1bb1
# Y$ g" i. k& G. g03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)/ V$ ^0 _0 j" s5 R. b) V
        Subsystem: NVIDIA Corporation Device [10de:11a3]
8 L0 i! Q7 m" p% i1 ^1 i! E6 O2 M        Kernel driver in use: vfio-pci
/ b! L3 O9 W5 ^( B! F; G, d        Kernel modules: nouveau' F; ], r% p5 H+ `% C# |7 u, F* P9 u0 l
[root@stein-a ~]# dmesg | grep -i vfio
" ^0 M) e3 C4 h! m4 T[    2.503115] VFIO - User Level meta-driver version: 0.3
+ U! I/ ~' o6 y) N/ P' p[    2.515645] vfio_pci: add [10de:1bb1[ffff:ffff]] class 0x000000/00000000
5 [! k3 U' @% K. o[    2.515752] vfio_pci: add [10de:10f0[ffff:ffff]] class 0x000000/00000000; Y. O9 M, w# n2 B* o
[root@stein-a ~]#
* h$ n: \# v( j8 h5 }% L复制代码
+ G9 n; w1 N3 A5 }+ j
/ a2 @: j! o- F+ W: I- Z5 \2. Ubuntu18.04系列为虚拟机配置GPU直通
- z$ p; r" F8 G3 N复制代码0 B9 O6 m( x& G
1. 编辑文件vim  /etc/modules, 添加以下内容:
0 }; k# w( S+ y2 \pci_stub1 B5 O7 r7 v: ]* l- F
vfio) y+ A: s7 u% t  ~  j
vfio_iommu_type1
$ M0 b: o' c( p9 E+ G2 nvfio_pci
; C8 q+ I+ M  E5 L5 {; n0 hkvm
+ m1 @4 V% e7 t6 Mkvm_intel. q7 P" _" ]: m1 T5 h, P

/ y: S3 e4 n( P. @/ n" X3 \! Y7 Y2. 在KVM主机上启用IOMMU 4 j5 {+ }$ ~  }  F
#对于Intel芯片:
3 a8 Z8 ]* D" g" O% x7 A4 ~' O) lGRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"
4 U. a! O# H* k; Z% V" j  a#对于AMD芯片:3 r. x* m2 @: S" a! {! ^7 f
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"
, M% i4 e" u8 r1 q  L8 ]9 g6 \
& \3 V; q; m- O3 h) c- N7 yvim /etc/default/grub
- L8 y8 u; x6 K: H) i- d
1 B3 O3 G0 U( o/ q8 xGRUB_DEFAULT=0
3 q5 L, W6 C$ [' F) l% l: dGRUB_TIMEOUT_STYLE=hidden
, T9 d* J: z" c$ b- WGRUB_TIMEOUT=0
* o1 c' y/ w7 UGRUB_DISTRIBUTOR=`lsb_release -i -s 2> /dev/null || echo Debian`
, ~' _8 s9 y; O$ J( Z0 ^GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"
/ J4 E0 p. p$ u8 ?GRUB_CMDLINE_LINUX=""
; b6 A1 t" n0 J' ~* Q& @2 v
6 Q( Y! [# `3 V% e' O   3.  重新生成grub+ J, u4 j( u- ^$ M1 d4 T
   EFI/ v3 c; E8 H/ E3 J9 a9 x- U! \
   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
" L( i' e% B& C. [# e7 A: U   非EFI % E& j  }( T# \6 p5 c: E! E
   grub2-mkconfig -o /boot/grub2/grub.cfg( v  S0 u' N' _6 s- S4 }- ]* `
4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件6 v& Q% m9 {% ]& E
vim  /etc/modprobe.d/blacklist.conf
( ~1 [) l; |: _8 g0 T' i0 g% ublacklist snd_hda_intel
6 D  n4 k8 D+ c8 P7 D0 Rblacklist amd76x_edac
( F; @* T8 q7 m! z" B( _4 C+ s4 B; `. eblacklist vga16fb) W: K( |% B! p" ?+ T# U
blacklist nouveau# Y1 E, y* n! m' y4 [& K+ p
blacklist rivafb
* W9 `  Q- h. V+ L* X/ {blacklist nvidiafb/ a* p/ H" R4 Z* z9 q9 h% A
blacklist rivatv# B  H3 O& u4 D% K3 a' G
blacklist nvidia: [# m* q* i/ D  m
, Q0 F; B% v4 Y; @
5.  查找显卡的Product ID 以及 Vendor ID:
* R3 [: `0 |+ {( u- e$ uapt install pciutils -y! Y1 N) Y( u5 q1 t' f
lspci -nn | grep NVIDIA
* Q# G9 x- @4 B' I3 x如下:; y7 t6 A& o  v& B/ |) I% d' N8 B
[root@stein-a ~]# lspci -nn | grep NVIDIA
$ ^. f+ d8 x1 Z4 J03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
$ J; S) n5 Y! E% }03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)
' v% l- B  O+ E* p7 s/ U: S8 a
5 x% F/ w! X' A  y! R9 l' ~6.  编辑
0 P' g2 T) m; rvim /etc/modprobe.d/vfio.conf7 _8 O' V& p2 o9 r& ^  R
# create new: for [ids=***], specify [vendor-ID:device-ID]8 \# h% g/ ?2 J# Y9 Y
options vfio-pci ids=10de:1bb1,10de:10f0( l! ?4 v7 b$ x( n: w- m

, G8 C7 Q2 O4 D; m- \  `, q; J7.  写入到系统启动项
  ]" i; y. T2 y1 Z7 r$ [$ [echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf
% ^4 Q8 N  z5 C& L4 q; y/ w& p7 L. [: y" d! p
8.  重新生成initramfs
0 j1 }2 k! U" }$ G' r0 Bdracut -v /boot/initramfs-$(uname -r).img $(uname -r)" I5 g" G' O+ l% C' p5 U, k

" r/ ]4 \. h! N5 l; Z. p8 g8 J$ Y9.  重启系统
3 O2 I4 P4 J( E. F7 jreboot. O: T' h) x: H7 F: D! k5 z% S% C

* c  G+ E$ |2 _& S6 y6 @10. 验证
$ z: `2 B6 j/ Y& I1 `; vlspci -nnk -d 10de:1bb1
/ P2 U3 b/ ]3 M" o/ |% t) L$ _. \dmesg | grep -i vfio& K+ [0 i( i2 o
root@kvm:~# lspci -nnk -d 10de:1bb1
" [% a9 k! J7 ]dmesg | grep -i vfio
5 q& H2 A. y. {- V; g  c  f03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
9 A1 R; i3 S' l9 N        Subsystem: NVIDIA Corporation GP104GL [Quadro P4000] [10de:11a3]0 l! ^9 Y- h8 M/ B# G/ g
        Kernel driver in use: vfio-pci
6 N6 \- T- h$ N% ^( q        Kernel modules: nvidiafb, nouveau
1 z2 h; W4 ^! `7 X. |root@kvm:~# dmesg | grep -i vfio& Z: [4 W2 f" i
[    3.838714] VFIO - User Level meta-driver version: 0.3
' x5 ]0 c+ h3 w" Z[    3.846238] vfio-pci 0000:03:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
5 c2 }4 C) B9 |4 G( J4 m[    3.866370] vfio_pci: add [10de:1bb1[ffffffff:ffffffff]] class 0x000000/00000000
# ?8 F8 E) z/ Z" H* A[    3.886375] vfio_pci: add [10de:10f0[ffffffff:ffffffff]] class 0x000000/00000000
1 ?% }& n6 L3 U: v) ]4 ]5 T复制代码
- p% ]/ ^! [" M& [1 q6 ?8 o8 F
# {) n8 t9 G: L复制代码3 ]9 k/ f. C6 P" _3 D. S/ @( a6 [
#如果你单机部署的,在单机下配置。9 R- r3 D. t' C( T
#如果你是高可用部署的,在三台控制节点配置1 b$ e; }9 ?5 S4 P2 p* Z  y4 Y' t
1. 添加pci
4 U% }# N8 V7 E! {$ K+ Q* i" x. {vim /etc/kolla/config/nova/nova-compute.conf
! j; O# u4 z0 _, K* f4 o[libvirt]$ P  h8 G1 A# b
inject_password=true
  J+ n7 R2 k  s! r# wcpu_mode=host-passthrough
9 _+ D& W2 S. n5 @0 a5 lvirt_type = kvm
0 {! E$ W" T: ^% w[pci]
, q; P' `0 @- N; u! p; S8 e3 Cpassthrough_whitelist: { "vendor_id": "10de", "product_id": "1bb1" }
) ^4 M7 @; W8 G6 m& Q) J
- B+ W! n* |8 V2. 修改nova.conf
+ f+ u, r" _" g0 L3 i- O( ^% Ivim /etc/kolla/config/nova.conf
2 w0 g" G) B( R% }) O[DEFAULT]
. A( O8 e- h, L6 W5 M- K3 ~' [service_down_time = 120
, R! `5 `, F  l9 W: |4 H5 pcpu_allocation_ratio = 4.0   ; F; Y7 `4 `' j# z) u
disk_allocation_ratio=1.0( j9 x  _# ~# I: v% u
ram_allocation_ratio = 1.0   
1 P  @/ z7 p; r% o- W7 Rreserved_host_disk_mb = 4096
! j" _* ?  n1 |2 }8 m) Ureserved_host_memory_mb = 4096
/ M, X9 {" U5 _allow_resize_to_same_host = True    " T. }, C: U) [+ t  p& z
remove_unused_base_images = False
. _) k( ^$ s; r4 I) Y% b2 b/ e: Gimage_cache_manager_interval = 0
9 I( ~% a" o  _* D# L! O2 Lresume_guests_state_on_host_boot = True" _: T/ _5 ^3 c; p
& c' g" z$ w' s% k1 @( V+ e# B
[PCI]% K+ X% Q; |; K1 D: V; ?
alias: { "vendor_id":"10de", "product_id":"1bb1", "device_type":"type-PCI", "name":"quadro-p4000" }
5 \+ H* O2 S# e6 m, D9 A[filter_scheduler]
3 u9 K% w/ M6 I7 qenabled_filters = RetryFilter, AvailabilityZoneFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter+ W. @+ J" }- \/ ?; K
available_filters = nova.scheduler.filters.all_filters2 x% f" l; i* P

9 z7 S/ T; @& `7 p2 u3. GPU 类型实例创建  0 P. ?. X0 I" Y# q/ |4 O1 y
openstack flavor create --vcpus 4 --ram 8192 --disk 30 --property "pci_passthrough:alias"="quadro-p400:1" g1.4c.8m.p400$ n: N  I# ~0 s" w3 x
复制代码2 A+ x( U- I+ u' v! c! B# y

& `$ Q* j  w( }: Y: e2 a; k4 g, m! W3. CentOS7.x系列 安装显卡驱动
2 k1 s" c4 ~* ~/ F. H  R' H复制代码
0 \! A) W( l! V% e$ J) g$ L* P1.  查看是否含有英伟达显卡
: ]% k2 ~5 W( S; t& @lspci | grep -i NVIDIA7 ?: \+ p  L0 {8 Q
#下面说明有1块英伟达的显卡
9 i' W8 k% R- D, ^[root@train-all ~]#  lspci | grep -i NVIDIA9 A3 E) L0 A3 @/ H! k, Q
04:00.0 VGA compatible controller: NVIDIA Corporation GP104GL [Quadro P4000] (rev a1)
) B5 Y( z* M' j- B/ ], }04:00.1 Audio device: NVIDIA Corporation GP104 High Definition Audio Controller (rev a1)7 [4 H4 |4 C1 n% N9 a2 q  w
[root@train-all ~]# ( [! p2 F. {$ m/ p' ]5 R1 d' X. p1 }

% g  G( B" r, Z+ P$ c9 B1 s7 M4 e/ _& z2.  添加ELRepo源
7 z4 n, G2 o; w* u# J: e7 u2 r. mrpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org 3 a) |3 q4 ~. i. n

+ w7 K, N* w+ l8 v' E3.  安装ELRepo
# W# G3 L' G4 V- N9 E8 \% |/ Z; z6 Prpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm& }, U: @) b* t! T

, o# j# ~& w7 j1 `  X' K4.  安装nvidia-detect1 ~7 h% X" J0 u) j- D% Y* ]; O
yum install nvidia-detect -y3 B  F1 T- l$ [7 r
: P2 I: R! \$ E
5.  运行nvidia-detect$ h# G2 d6 d0 X+ `% ^/ G
nvidia-detect -v2 q# Z+ f5 y: p- V$ f- p% H. {3 R" O
4 d; G5 G' Q9 k, r/ @
6.  查找驱动程序
. E6 ]& b7 B( }+ Z: m( k( Zyum search kmod-nvidia
) X, i3 x4 ^$ _% C' C
2 M7 e' n4 r  n! s5 Y7.  安装驱动程序$ ~3 X( o) a5 z' ?4 C
yum install kmod-nvidia.x86_64 -y" ^- z7 o( J) ^& q" O' t, ]9 I
2 u2 Z% G. f9 o8 L* r+ L
8.  查看禁用Nouveau( E* \; H# o. ~/ N
lsmod | grep nouveau 2 ?/ H+ B$ h1 F6 e
#若没有输出 则说明禁用成功,否则执行下面的命令
+ O& a0 ~& F) P6 m5 D3 O6 r3 c8 w# g9 z1 @& h" I* k* I
9.  在/etc/modprobe.d/blacklist-nouveau.conf中创建一个文件,其内容如下:
+ p$ A1 u3 v6 t" U3 G' M# |vi /etc/modprobe.d/blacklist-nouveau.conf) E& b5 D( Y9 ^
添加7 |1 \& {4 f" ]9 }( E. [
blacklist nouveau$ r, f; X2 `/ W" ?
options nouveau modeset=0
2 L! h; O8 x: n2 n' ^& i3 U& O
+ l5 D6 }% n6 A/ O" I% t) t10. 重新生成内核initramfs, N; j$ N. z! |/ r
dracut --force
- j" K5 \3 Y% s
) c3 s( h; {3 A11.  重启系统
+ R1 P3 w0 v% d* v; ]reboot# R+ F  Y$ L) u

1 g! ]9 m* M9 C5 k12.  测试
. X: ]6 Z+ Y3 z0 _9 n9 jnvidia-smi& X% R- {# G% Q3 ?- T4 Q! n
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 03:20 , Processed in 0.023123 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表