找回密码
 注册
查看: 875|回复: 2

openstack 相关gpu配置

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-6-10 22:02:28 | 显示全部楼层 |阅读模式
OpenStack的相关配置
1 h/ ?, K) C) ]- r+ C2 V4 Z  _1. 配置nova-scheduler (controller节点),编辑文件 /etc/nova/nova.conf:
2 M9 t: j; X& Y# _1 G4 h1 P1 x, K[DEFAULT]& s2 E/ m- l4 @8 `8 P8 `# L: {
scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter/ w2 c6 a8 y& e4 b) H4 N
scheduler_available_filters = nova.scheduler.filters.all_filters' E6 C1 h8 v& k: Z% X/ k
重启nova-scheduler服务: m3 l2 I5 d4 r4 G  w+ Q  l; p
[root@controller ~]# systemctl restart openstack-nova-scheduler.service
3 H6 k6 ?3 L; Y: t
" R& ]& b& s3 b* v" J, J[root@controller ~]# systemctl status openstack-nova-scheduler.service
; M1 J- M" s* p0 J● openstack-nova-scheduler.service - OpenStack Nova Scheduler Server
* o: u8 _* |0 s& P* a0 N2 C   Loaded: loaded (/usr/lib/systemd/system/openstack-nova-scheduler.service; enabled; vendor preset: disabled)0 b, ?7 v8 p( K
   Active: active (running) since Fri 2022-06-10 21:50:56 CST; 22s ago2 Y8 f& H3 T# i: r, `* u% H2 W
Main PID: 105509 (nova-scheduler)2 ^! j6 X# `9 t7 t$ O7 {4 ~9 w
    Tasks: 9 (limit: 100963)' D+ d4 ~' l# c) i  @
   Memory: 276.0M
# @. u; z  c& a8 d; z   CGroup: /system.slice/openstack-nova-scheduler.service; C+ m9 k8 G# y& V/ E9 w
           ├─105509 /usr/bin/python3 /usr/bin/nova-scheduler- o2 H; c: v( h7 f  d
           ├─105528 /usr/bin/python3 /usr/bin/nova-scheduler& z) z) m- j0 T  H
           ├─105529 /usr/bin/python3 /usr/bin/nova-scheduler6 R% a: J& M& r6 K3 @; E9 s( |( \( `
           ├─105530 /usr/bin/python3 /usr/bin/nova-scheduler
2 v9 Q3 m6 u6 L$ P( G           ├─105531 /usr/bin/python3 /usr/bin/nova-scheduler
8 q* O% T+ \6 G           ├─105532 /usr/bin/python3 /usr/bin/nova-scheduler  n6 W# z5 g& J: `6 i& R
           ├─105533 /usr/bin/python3 /usr/bin/nova-scheduler
1 s6 ^5 D2 F% X1 p           ├─105534 /usr/bin/python3 /usr/bin/nova-scheduler: F2 b" c, K4 T- x- K& |
           └─105535 /usr/bin/python3 /usr/bin/nova-scheduler
1 V8 ~) ~" p- K" e+ {$ WJun 10 21:50:52 controller systemd[1]: Starting OpenStack Nova Scheduler Server...9 `3 i& ^1 l& {) K6 V$ o8 G
Jun 10 21:50:56 controller systemd[1]: Started OpenStack Nova Scheduler Server.
0 r+ @/ n. p$ F8 O$ l# M9 X$ D6 q
  T, r$ _' L% P3 D6 W( Z* N# H
# P% @% U6 S! K0 H+ u# r0 I2. 配置nova-api (controller节点),编辑文件 /etc/nova/nova.conf:; T& u( i& R' P+ p! E
[pci]
& h, q+ x/ ~# i: R- A4 A" `' Ealias = { "name": "nvidia1080", "product_id": "1b06", "vendor_id": "10de", "device_type": "type-PCI" }
' o; k, P$ W- u/ C2 s2 v  n1 l8 ?, [[pci]8 V7 P" N1 R9 Q% O$ D9 U) n
alias = { "name": "nvidiaGF119","product_id": "104a","vendor_id": "10de","device_type": "type-PCI" }
  F. J# e2 u4 g# H. L- p4 I5 d. M; g
重启nova-api服务. _9 g. _. y( y, y. ]

7 a, Z$ O2 J' j# j[root@controller ~]# systemctl restart openstack-nova-api.service
/ T0 t" P9 b4 a+ \
% {* u9 s5 O& O, e5 _: W7 V3. 配置nova-compute(compute 节点),编辑文件/etc/nova/nova.conf:
+ ]  a# \2 Q" [" J8 E. @[pci]  m* g, \4 c- Q% L+ s2 c. w: G! i
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }
( K- N0 R- `0 c* K  Xalias = {: l8 z" P. y1 i% h8 a9 N8 X
       "name": "nvidiaGF119",
  o5 }+ {1 S" a8 Y& u' c       "product_id": "104a",0 ^9 O# O( W& P- r+ R
       "vendor_id": "10de",. k" {; M. [' J, c8 _
       "device_type": "type-PCI"
: O* M4 c1 x1 n" g& B3 Y0 v4 L' l+ n}8 b: M+ |6 L  h* O! s8 P3 G

' m2 q& p1 v0 @% |
) D+ ~0 Z2 _* q' k" W" T4 J[pci], ?# @3 n. Z, ]
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }" b3 ~) E0 v9 ^+ i- l8 s, G8 t
alias = { "name": "nvidiaGF119", "product_id": "104a", "vendor_id": "104a", "device_type": "type_PCI" }
1 R( @7 p3 z3 a
0 i2 ]0 `/ Y; t! H5 O3 \8 N重启nova-compute服务
+ }9 ~) K+ V# s! ~1 [[root@compute01 ~]# systemctl restart openstack-nova-compute.service
1 c9 A2 S+ m( u+ O
! B! t5 A! d+ Y- g7 W' Z9 M[root@compute01 ~]# systemctl restart openstack-nova-compute.service 2 R, S4 O* t" b7 y6 E
[root@compute01 ~]# tail -f /var/log/nova/' d1 B7 V7 y$ L; |% Z; o
nova-compute.log    privsep-helper.log  
; [; Z; a! S. c% l2 Y[root@compute01 ~]# tail -f /var/log/nova/nova-compute.log ) z8 i+ ?2 L# c+ N# n3 e$ `, B
2022-06-10 22:10:51.891 12258 INFO oslo.privsep.daemon [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Spawned new privsep daemon via rootwrap  D. F% k: T3 P  k
2022-06-10 22:10:51.796 12299 INFO oslo.privsep.daemon [-] privsep daemon starting
6 U& k1 L! g( Q3 f2022-06-10 22:10:51.800 12299 INFO oslo.privsep.daemon [-] privsep process running with uid/gid: 0/0
4 P$ Y, o! W+ d. ~2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep process running with capabilities (eff/prm/inh): CAP_NET_ADMIN/CAP_NET_ADMIN/none: W; p) L4 x, E/ D! U: w
2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep daemon running as pid 12299- L/ Q8 P3 n/ G5 W4 f; D6 M
2022-06-10 22:10:52.437 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bf:2a:4e,bridge_name='qbr24719437-3e',has_traffic_filtering=True,id=24719437-3ee6-469b-af02-c1fcea041be2,network=Network(b83e2ffc-eaad-455f-b299-18e09d58be32),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24719437-3e')
: n4 R1 c+ [" u7 j2022-06-10 22:10:52.459 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:fe:4c:d1,bridge_name='qbr58f2e526-38',has_traffic_filtering=True,id=58f2e526-386b-43da-9818-208b6a34b6e8,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap58f2e526-38'), S: N! A+ A" `  q
2022-06-10 22:10:52.478 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bd:8b:42,bridge_name='qbr24c6e701-e5',has_traffic_filtering=True,id=24c6e701-e5b4-4277-9895-cc67a4097280,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24c6e701-e5')3 q% M* j: ]6 \, N* T/ @: G
2022-06-10 22:10:52.481 12258 INFO nova.compute.manager [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Looking for unclaimed instances stuck in BUILDING status for nodes managed by this host
, T! ^+ H1 M) @1 ^" @2022-06-10 22:10:54.740 12258 INFO nova.virt.libvirt.host [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] kernel doesn't support AMD SEV
& q* c: R0 }' o, x+ E5 |' B1 F/ k1 @9 i
" Q+ o. n8 V9 t6 e4 V
三 验证$ g6 ]. t3 Z/ i6 k' X' H
1. 创建设置flavor:
0 C; f0 n/ C2 a; S$ g, f" mopenstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large
8 t  I# A/ p3 p$ _$ E9 I% {openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'$ g) ^" n- n6 `" }* V) p' A
nvidia1080 即为alias中的那么, 2为GPU的数量。
9 a( x- c; a! }2. 创建instance:1 ?: ?& s* m- K5 {
openstack server create --flavor m1.large --image cirros-0.3.5-x86_64-uec --wait test-pci
: T: H1 X, x+ h% P4 T3. 在cirros下查看GPU信息如下:
6 [7 n) u9 l4 ?+ r$ lspci -k
' B3 Q3 S  b! _1 I" N6 H...
- r. |1 K! _6 E' B9 O0 k( x( F00:05.0 Class 0300: 10de:1b068 C5 F9 }6 @1 @$ }7 q( k  |
00:06.0 Class 0300: 10de:1b067 }- C* F7 I& n  r  N7 \
...7 `$ ]2 }; @" }8 P) `) m( [
四 NVIDIA显卡的问题
2 p2 C: T& u/ x6 E% q1 e# T因为NIVIDIA显卡的驱动会检测是否跑在虚拟机里,如果在虚拟机里驱动就会出错,所以我们需要对显卡驱动隐藏hypervisor id。在OpenStack的Pile版本中的Glance 镜像引入了img_hide_hypervisor_id=true的property,所以可以对镜像执行如下的命令隐藏hupervisor id:
1 q0 l2 E* t) _. n$ openstack image set IMG-UUID --property img_hide_hypervisor_id=true3 L0 r5 q" [$ v( p& l  m  k0 V
通过此镜像安装的instance就会隐藏hypervisor id。
( |, H) c9 m- C" T如果是Pike之前的版本, 可以参考Consumer-grade GPUs in an OpenStack system (NVIDIA GPUs)这篇文章的做法。
+ x5 @  F8 F( A0 g, I可以通过下边的命令查看hypervisor id是否隐藏:
$ l; ^7 N* y0 X3 q$ cpuid | grep hypervisor_id6 G0 N# v$ J4 [* r5 l6 r* W
   hypervisor_id = "KVMKVMKVM   "0 t% \3 L2 w! G0 e$ P
   hypervisor_id = "KVMKVMKVM   "
* [* z8 [" E) s% H上边的显示结果说明没有隐藏,下边的显示结果说明已经隐藏:* k/ s6 t% c3 c/ I( G  p
$ cpuid | grep hypervisor_id$ A- R' p; y( v' i$ s
   hypervisor_id = "  @  @    "
: {7 I5 k& I4 U: {   hypervisor_id = "  @  @    ". H9 }( }; s+ u* T
* ]" S2 I' v0 t  t$ C. ^4 \# p. I4 L

( M+ Y7 Y( r2 T, B( z. A3 T" p. W1 X* E$ }$ P! \; L7 n% P8 F
- s8 Q, y3 k: E. s5 p( k' n

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:13:07 | 显示全部楼层
[root@controller ~]# openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large
3 h# K# \2 Y; |( |+ M1 U$ r# A6 @+----------------------------+--------------------------------------+
" i2 ^% Q6 Z- D. o# ~| Field                      | Value                                |* \8 k7 W# t2 L7 p8 W' \
+----------------------------+--------------------------------------++ p+ x4 E7 Y' P9 I7 Z  b
| OS-FLV-DISABLED:disabled   | False                                |
/ [+ q& Z/ d7 W% s# S# C| OS-FLV-EXT-DATA:ephemeral  | 0                                    |
- U4 @  `. \: U6 Q& R% Y0 R| description                | None                                 |
5 ~8 a; G* c+ E: ^| disk                       | 20                                   |; j5 q7 G6 R* V8 D
| id                         | a56773dd-2ab1-453b-ab94-95c559334567 |
; a% B' k- x! P* p4 i1 ?| name                       | m1.large                             |
4 C7 y, {1 f( [" R1 C, n9 H| os-flavor-access:is_public | True                                 |, i5 K; V2 w; e
| properties                 |                                      |
# c% V/ D: e1 n# P% M| ram                        | 2048                                 |
  f& v0 d* ]8 A& B! Y| rxtx_factor                | 1.0                                  |% }0 X9 V. S2 A+ \0 r9 b$ N
| swap                       |                                      |% {3 ?0 y/ V: Z
| vcpus                      | 2                                    |* d3 k' e3 \3 Q
+----------------------------+--------------------------------------+: _6 {2 v. D2 L5 K; r
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:17:35 | 显示全部楼层
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidiaGF119:1'; w4 [& R7 {9 L7 C" h. U9 M7 \

( Y: \$ j! A0 M" j) u7 }这里的值必须和nova.conf中的值一样  
( R: G! S# L$ Y/ ]) y# k2 ?- C否则报错/ b; L9 P5 R7 A. }& N: V
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 01:01 , Processed in 0.017003 second(s), 22 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表