找回密码
 注册
查看: 872|回复: 2

openstack 相关gpu配置

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-6-10 22:02:28 | 显示全部楼层 |阅读模式
OpenStack的相关配置
9 F& {/ ]4 }# h1. 配置nova-scheduler (controller节点),编辑文件 /etc/nova/nova.conf:
% ?  |! k1 X$ k7 T$ \% Y[DEFAULT]
6 X  s% X' x: rscheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter6 ]# |7 I1 z$ ^% ?& U$ H* p( e
scheduler_available_filters = nova.scheduler.filters.all_filters
4 ]/ O8 M; ?' T# `# V重启nova-scheduler服务0 ^$ `$ k$ o) q: N- D1 e
[root@controller ~]# systemctl restart openstack-nova-scheduler.service 9 Q% C7 y, ?, J. ^

2 s" G3 i  W5 O* P5 ^/ q0 m9 e[root@controller ~]# systemctl status openstack-nova-scheduler.service
! Z# K' x( g: s; L+ L5 l7 j● openstack-nova-scheduler.service - OpenStack Nova Scheduler Server2 g, E" F3 {$ @: R& d8 l
   Loaded: loaded (/usr/lib/systemd/system/openstack-nova-scheduler.service; enabled; vendor preset: disabled)" h8 x. N* |+ s3 C+ a
   Active: active (running) since Fri 2022-06-10 21:50:56 CST; 22s ago; d2 D5 |- ^* T
Main PID: 105509 (nova-scheduler)
! U" ]! d, z# `! V7 F) T    Tasks: 9 (limit: 100963)8 X. V6 ^( ?: S4 v! A% Z0 P4 g
   Memory: 276.0M3 o: z2 T, q7 W/ q! C" _% q2 v
   CGroup: /system.slice/openstack-nova-scheduler.service* \5 q/ L# x' d1 G- M. p7 q+ ^( C
           ├─105509 /usr/bin/python3 /usr/bin/nova-scheduler# e7 {8 o/ m* X- x9 F% S. r* O
           ├─105528 /usr/bin/python3 /usr/bin/nova-scheduler8 V5 v  E1 e6 Z/ D* H* r
           ├─105529 /usr/bin/python3 /usr/bin/nova-scheduler% U) Z( w0 L/ l, E  O8 N. X* R2 |3 i
           ├─105530 /usr/bin/python3 /usr/bin/nova-scheduler; Q1 l4 h6 }/ s" l9 k9 l
           ├─105531 /usr/bin/python3 /usr/bin/nova-scheduler) i7 b2 B9 [3 n9 H+ }% H+ v4 L
           ├─105532 /usr/bin/python3 /usr/bin/nova-scheduler9 x: O' i7 C% f; Y! f
           ├─105533 /usr/bin/python3 /usr/bin/nova-scheduler/ N1 c& I2 w" |: P
           ├─105534 /usr/bin/python3 /usr/bin/nova-scheduler% o- ~: p5 u( ?& F% \" d/ f- O
           └─105535 /usr/bin/python3 /usr/bin/nova-scheduler
% n( s% ~  M7 S  |1 TJun 10 21:50:52 controller systemd[1]: Starting OpenStack Nova Scheduler Server...1 G, h  p* b! W% V
Jun 10 21:50:56 controller systemd[1]: Started OpenStack Nova Scheduler Server.
. t9 ~2 J% j1 Y5 s6 \! S
4 T3 ~3 f: w# a/ B- l
& Z$ J& i* B5 \/ {8 w% H& f2. 配置nova-api (controller节点),编辑文件 /etc/nova/nova.conf:
9 V! ?+ O5 f9 N5 h2 G! K[pci]
; ?) e; [1 c, g' w7 ^( F9 Aalias = { "name": "nvidia1080", "product_id": "1b06", "vendor_id": "10de", "device_type": "type-PCI" }
4 |" ]0 N+ J6 U, V# f# I. n$ Y  e[pci]
$ c' P% K+ D. v5 _; u& l& ~alias = { "name": "nvidiaGF119","product_id": "104a","vendor_id": "10de","device_type": "type-PCI" }9 ]1 v9 x/ z, b) x: n% Q

8 M6 E) F3 O& S+ x+ x6 I- M重启nova-api服务
( Z. R& l3 F" X8 ^; p
# ~# r% F; Q1 i% z[root@controller ~]# systemctl restart openstack-nova-api.service
# B$ C  Q; |" `- l, }, I( ?; d$ x! z7 p+ b' z7 {- {7 o/ u6 V
3. 配置nova-compute(compute 节点),编辑文件/etc/nova/nova.conf:: Y* a. C2 q3 D8 h+ o
[pci]% d4 W  q, F% j1 ?9 a2 v; e" O# c; @, r
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }/ W7 y1 R( p1 t" V1 E
alias = {
9 n- ]7 x4 H7 R       "name": "nvidiaGF119",
( n4 e* v& X  [5 q% w5 Y       "product_id": "104a",
' l/ ?) T: D+ [       "vendor_id": "10de",- @5 [3 w3 c8 d8 D' c+ P9 y; S
       "device_type": "type-PCI"& _- e+ N, c0 w$ K* H- D$ b
}
% q6 t* x9 |6 `5 j7 ^% n" b; p* q# |2 H0 @. x0 |( B0 V6 O8 U, _

* b) B+ r) L5 r( a6 v2 Z+ N[pci]6 x4 ?! I; }5 C) j; y
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }. k1 D3 Z6 `8 W
alias = { "name": "nvidiaGF119", "product_id": "104a", "vendor_id": "104a", "device_type": "type_PCI" }5 i: }" s# z( J1 D

$ L! v2 J7 T; N6 \; E. v; s重启nova-compute服务3 x. m5 c1 N- o; {+ ]& O
[root@compute01 ~]# systemctl restart openstack-nova-compute.service ) M) C4 @4 N9 s' e& G0 w) o

: z; s" O* m6 I8 A5 g7 E[root@compute01 ~]# systemctl restart openstack-nova-compute.service   m/ M6 B6 u7 j/ c) l
[root@compute01 ~]# tail -f /var/log/nova/0 K$ h" f+ Y/ w& T
nova-compute.log    privsep-helper.log  4 }0 @9 q; {; e6 ^
[root@compute01 ~]# tail -f /var/log/nova/nova-compute.log ! ~8 E) `. S( V: M
2022-06-10 22:10:51.891 12258 INFO oslo.privsep.daemon [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Spawned new privsep daemon via rootwrap
* T) D- }2 M$ B- a3 R8 L% f2022-06-10 22:10:51.796 12299 INFO oslo.privsep.daemon [-] privsep daemon starting4 [, D( D; a8 h& I$ {: _) k8 ]
2022-06-10 22:10:51.800 12299 INFO oslo.privsep.daemon [-] privsep process running with uid/gid: 0/0
9 [" e1 ^3 J/ C8 j2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep process running with capabilities (eff/prm/inh): CAP_NET_ADMIN/CAP_NET_ADMIN/none
: X+ ?& s$ O: \, c2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep daemon running as pid 12299" Y+ G0 Q+ f8 L+ l
2022-06-10 22:10:52.437 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bf:2a:4e,bridge_name='qbr24719437-3e',has_traffic_filtering=True,id=24719437-3ee6-469b-af02-c1fcea041be2,network=Network(b83e2ffc-eaad-455f-b299-18e09d58be32),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24719437-3e')& P: S/ A3 O) ?7 z
2022-06-10 22:10:52.459 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:fe:4c:d1,bridge_name='qbr58f2e526-38',has_traffic_filtering=True,id=58f2e526-386b-43da-9818-208b6a34b6e8,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap58f2e526-38')8 u+ J0 z+ g7 A- e' ?, S8 X$ R
2022-06-10 22:10:52.478 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bd:8b:42,bridge_name='qbr24c6e701-e5',has_traffic_filtering=True,id=24c6e701-e5b4-4277-9895-cc67a4097280,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24c6e701-e5')
# z+ _  E( V6 m* }2022-06-10 22:10:52.481 12258 INFO nova.compute.manager [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Looking for unclaimed instances stuck in BUILDING status for nodes managed by this host# {! y# z  l  S- p
2022-06-10 22:10:54.740 12258 INFO nova.virt.libvirt.host [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] kernel doesn't support AMD SEV$ T1 o$ g4 W& ?% r

, q5 h, C' |  I3 i  N
3 r% e* n4 Q" Z) L# B三 验证4 f% {6 h1 L# Y4 |5 X
1. 创建设置flavor:* `& w: E. c4 T4 J
openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large& y5 h0 O8 w" A# t9 x
openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'! y0 b! o9 a& x, X( \1 {  G
nvidia1080 即为alias中的那么, 2为GPU的数量。% w, X) E7 I. T# N/ _
2. 创建instance:
' n: U5 e5 T% P, @1 W+ |+ ?0 n' v  lopenstack server create --flavor m1.large --image cirros-0.3.5-x86_64-uec --wait test-pci$ V, ^8 m: O* w# Y
3. 在cirros下查看GPU信息如下:
, w2 M+ U0 g  j5 M$ lspci -k
" I7 h& _3 i1 [& r! U! Q/ x...2 C0 b! n! j  _
00:05.0 Class 0300: 10de:1b06
% K( A( \6 ~* y/ z+ ^% U00:06.0 Class 0300: 10de:1b069 ?3 x/ O; a' N" t$ v/ G8 [
...5 p0 f( r6 V$ F! c
四 NVIDIA显卡的问题
$ e: l' [, }- x1 x. f: d. t9 P因为NIVIDIA显卡的驱动会检测是否跑在虚拟机里,如果在虚拟机里驱动就会出错,所以我们需要对显卡驱动隐藏hypervisor id。在OpenStack的Pile版本中的Glance 镜像引入了img_hide_hypervisor_id=true的property,所以可以对镜像执行如下的命令隐藏hupervisor id:* ]' _" t& w; \, I, \4 f
$ openstack image set IMG-UUID --property img_hide_hypervisor_id=true; A8 o3 X6 q6 u$ N
通过此镜像安装的instance就会隐藏hypervisor id。
. o( P9 v# k" M7 x+ m如果是Pike之前的版本, 可以参考Consumer-grade GPUs in an OpenStack system (NVIDIA GPUs)这篇文章的做法。7 i, k/ R* r* @& F4 H
可以通过下边的命令查看hypervisor id是否隐藏:! C! o0 z# J) f" D8 t% w
$ cpuid | grep hypervisor_id7 s4 a! |- m) z2 f6 n% P
   hypervisor_id = "KVMKVMKVM   "
! k+ `& x5 U9 s3 x' |' C   hypervisor_id = "KVMKVMKVM   "
, g2 j3 _" ~* L6 N上边的显示结果说明没有隐藏,下边的显示结果说明已经隐藏:  |) s% l" I" u3 T: F
$ cpuid | grep hypervisor_id
& w8 V" B1 E8 e- Z0 H! L! [   hypervisor_id = "  @  @    "
! b0 z% E9 A; Q   hypervisor_id = "  @  @    "
( N& P0 V, H# H4 H. \* b
/ y; ]% R% R- r" S- ]1 r) y" J
  P% a  R( q- C' L4 W" h6 h/ g
$ H5 U: M- T* {. g9 V- d
$ I  r% y" G/ ]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:13:07 | 显示全部楼层
[root@controller ~]# openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large
4 u" ~! @# r8 ?8 a3 K3 O# F) i  v1 H+----------------------------+--------------------------------------+
5 T* F  f" `' H4 @, f' i& M| Field                      | Value                                |
! c9 a% B/ ^: g5 A6 g1 j+ w+----------------------------+--------------------------------------+3 t% F, F% C$ X# Z3 ~8 a7 o
| OS-FLV-DISABLED:disabled   | False                                |3 E3 i7 N4 ?/ \1 R  k, D% j
| OS-FLV-EXT-DATA:ephemeral  | 0                                    |
6 s# {+ s6 q- ]! v| description                | None                                 |
+ @  `) p+ U# [. E* v| disk                       | 20                                   |
# f8 q: @8 l8 _| id                         | a56773dd-2ab1-453b-ab94-95c559334567 |
8 ?1 g* S, G- p9 X) m/ Q9 `  ~; u| name                       | m1.large                             |
+ L) v4 T% c; ]; X) S& \2 `| os-flavor-access:is_public | True                                 |1 s$ P0 M: X9 t+ K6 i6 M$ I9 d( n1 k
| properties                 |                                      |. [# F8 `, n" Q5 S# s2 E
| ram                        | 2048                                 |
4 o- n3 [$ _6 \5 D$ l' O3 ^| rxtx_factor                | 1.0                                  |
: w  I4 }* e4 g7 h8 o& C| swap                       |                                      |
# ^2 x. _- Z" Q5 z9 Q# d) w| vcpus                      | 2                                    |9 Q4 B* E* l- R1 K
+----------------------------+--------------------------------------+8 H2 R  t. N6 z8 C, [; |
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:17:35 | 显示全部楼层
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidiaGF119:1'
0 Z$ {9 R6 Z+ A: O* u: K
) K0 l' e2 |" G  R, k  |6 Y1 E这里的值必须和nova.conf中的值一样  ; m! `/ O* z+ [; s$ {
否则报错  [! Z9 \6 ]3 V. e( r, J
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:01 , Processed in 0.017935 second(s), 21 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表