找回密码
 注册
查看: 874|回复: 2

openstack 相关gpu配置

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-6-10 22:02:28 | 显示全部楼层 |阅读模式
OpenStack的相关配置/ W3 o; A1 ?9 g( [7 r" v
1. 配置nova-scheduler (controller节点),编辑文件 /etc/nova/nova.conf:
' }- o1 a5 B$ Q  Q' ~3 g  n, Q[DEFAULT]- o& C1 B+ C/ O' T
scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter7 V& C. H) J7 C  k' U% H+ C5 {% |
scheduler_available_filters = nova.scheduler.filters.all_filters
/ N- `! m( _7 ]4 W" c9 d% j8 \重启nova-scheduler服务
* c$ |2 @7 k$ D, V4 F' T, N[root@controller ~]# systemctl restart openstack-nova-scheduler.service
" H0 W& V# A! @' H" m0 N
! n" z4 g; R+ ^/ a$ U) d[root@controller ~]# systemctl status openstack-nova-scheduler.service
" z: x8 [( i  o5 G6 ^& Z, y● openstack-nova-scheduler.service - OpenStack Nova Scheduler Server1 X/ W4 a( c8 y0 g
   Loaded: loaded (/usr/lib/systemd/system/openstack-nova-scheduler.service; enabled; vendor preset: disabled)& t7 @9 y, r  \# a
   Active: active (running) since Fri 2022-06-10 21:50:56 CST; 22s ago" f2 H" j* \- F( F. X; w
Main PID: 105509 (nova-scheduler)
4 g' y4 j0 y# {4 t3 d    Tasks: 9 (limit: 100963)9 Y$ q+ q& [  e# R3 c
   Memory: 276.0M
  F3 K' D: n% ^9 |# q   CGroup: /system.slice/openstack-nova-scheduler.service% u4 k# z) \0 a( A9 k
           ├─105509 /usr/bin/python3 /usr/bin/nova-scheduler
( L; t/ S' d. f. S; d  l4 c           ├─105528 /usr/bin/python3 /usr/bin/nova-scheduler$ Y0 I) f6 D  V
           ├─105529 /usr/bin/python3 /usr/bin/nova-scheduler
" U3 ^7 @. _. z: x( `) Q           ├─105530 /usr/bin/python3 /usr/bin/nova-scheduler
6 J0 |5 t5 ?8 j           ├─105531 /usr/bin/python3 /usr/bin/nova-scheduler+ X$ x! l% g/ i0 t7 c, R
           ├─105532 /usr/bin/python3 /usr/bin/nova-scheduler8 {$ @5 r8 I! j9 g4 n  _
           ├─105533 /usr/bin/python3 /usr/bin/nova-scheduler& ?. D9 I2 a$ b4 e0 B
           ├─105534 /usr/bin/python3 /usr/bin/nova-scheduler9 n; ~% E4 I% G5 E4 s. F
           └─105535 /usr/bin/python3 /usr/bin/nova-scheduler1 `  x. _2 e0 M. G4 o5 t3 x: c
Jun 10 21:50:52 controller systemd[1]: Starting OpenStack Nova Scheduler Server...% B; `5 H0 C. M
Jun 10 21:50:56 controller systemd[1]: Started OpenStack Nova Scheduler Server.7 L4 o9 p  b: C  L- j
; Y9 {1 {4 N' o) `) s8 A" |) U
, X$ ]" G4 R% `8 A. F
2. 配置nova-api (controller节点),编辑文件 /etc/nova/nova.conf:
, w/ ?3 f& r0 r& s0 D! f) G[pci]
& u8 l, _+ m& P- ?; `& q# }alias = { "name": "nvidia1080", "product_id": "1b06", "vendor_id": "10de", "device_type": "type-PCI" }
% I4 |9 D2 Z6 ?/ M- k0 t! e- ~[pci]
9 n" z9 u! R* N3 Z# Z+ D  nalias = { "name": "nvidiaGF119","product_id": "104a","vendor_id": "10de","device_type": "type-PCI" }% z: ?& T) R$ h6 e; y

8 `* N/ Y# `0 Q# x# t# G4 h( p9 [" f重启nova-api服务9 w3 a1 [: `8 `; t% d
/ P$ m8 g& a& D: G
[root@controller ~]# systemctl restart openstack-nova-api.service
- g0 ^" s. O( _$ p8 t: z) a  w( q( u' O/ g4 H- Y
3. 配置nova-compute(compute 节点),编辑文件/etc/nova/nova.conf:2 D+ G! d/ F! C. Z( Q
[pci]. c7 ^- \3 u) y& G, a. E5 K, H
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }3 w8 Q4 c" u" P) L
alias = {: a/ x' c$ N9 l1 G+ _
       "name": "nvidiaGF119",
8 I, j" m  X5 H: O3 k! m: a       "product_id": "104a",; C; g$ O! P" y5 H
       "vendor_id": "10de",# R) ^4 p$ K: l* [
       "device_type": "type-PCI"
& r; p  T% Z+ }( Y}
) Q; w- e* I- Q3 S, v
6 n' x8 e0 h$ `1 U0 w( q# w" P% H" a
[pci]
9 Z  o/ n3 l3 H# l. A9 {( ypassthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }) Y- m. s" l9 ^0 f6 L$ i, R
alias = { "name": "nvidiaGF119", "product_id": "104a", "vendor_id": "104a", "device_type": "type_PCI" }
8 ]: {- o* B; @: _9 x4 V) \; S% t7 G9 S4 W% h" z
重启nova-compute服务
2 V1 S3 j9 Y3 A! A9 ]% c[root@compute01 ~]# systemctl restart openstack-nova-compute.service
. N  W9 W* ~/ G* C# m0 {. ?+ k' I. m. z5 i$ v  C3 U; X+ g6 t3 t1 e
[root@compute01 ~]# systemctl restart openstack-nova-compute.service " S6 t: V8 H1 h7 C
[root@compute01 ~]# tail -f /var/log/nova/4 l+ }! {7 l! e9 i$ \7 @
nova-compute.log    privsep-helper.log  
: [) `2 i( z1 k2 z( C" ?! O[root@compute01 ~]# tail -f /var/log/nova/nova-compute.log # w7 `: p0 t4 }; d; s
2022-06-10 22:10:51.891 12258 INFO oslo.privsep.daemon [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Spawned new privsep daemon via rootwrap
5 R7 N4 U  H$ F4 b1 ~2022-06-10 22:10:51.796 12299 INFO oslo.privsep.daemon [-] privsep daemon starting: P8 a; y7 n, G$ M# N
2022-06-10 22:10:51.800 12299 INFO oslo.privsep.daemon [-] privsep process running with uid/gid: 0/0
: V2 K) E5 _; a. j+ m2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep process running with capabilities (eff/prm/inh): CAP_NET_ADMIN/CAP_NET_ADMIN/none
+ l# C' ]1 V. r- H2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep daemon running as pid 12299) ~0 R& y. f0 ]! X- e9 K! n+ h
2022-06-10 22:10:52.437 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bf:2a:4e,bridge_name='qbr24719437-3e',has_traffic_filtering=True,id=24719437-3ee6-469b-af02-c1fcea041be2,network=Network(b83e2ffc-eaad-455f-b299-18e09d58be32),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24719437-3e')) R7 k0 u; P* H- q( b9 m$ I# h
2022-06-10 22:10:52.459 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:fe:4c:d1,bridge_name='qbr58f2e526-38',has_traffic_filtering=True,id=58f2e526-386b-43da-9818-208b6a34b6e8,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap58f2e526-38')0 c" `% s0 |' W6 a* W5 R
2022-06-10 22:10:52.478 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bd:8b:42,bridge_name='qbr24c6e701-e5',has_traffic_filtering=True,id=24c6e701-e5b4-4277-9895-cc67a4097280,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24c6e701-e5')0 q( T) Y) X- ]7 k6 t! C
2022-06-10 22:10:52.481 12258 INFO nova.compute.manager [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Looking for unclaimed instances stuck in BUILDING status for nodes managed by this host
/ v. z) Q9 ~* y  I2022-06-10 22:10:54.740 12258 INFO nova.virt.libvirt.host [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] kernel doesn't support AMD SEV
/ }: t) l  u$ ?( n4 J
& X8 p3 t( ?  Z6 a& C) |) q: T6 G' p9 u; Q
三 验证' f& m- c/ K. J) v/ c9 I- {7 E
1. 创建设置flavor:$ e* \* ?0 p/ v$ r0 ?  K. O! r
openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large
! P; a( Y& [, I+ |& Fopenstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'
. n, Q! S% i' ~, q. F# S, c- J/ Unvidia1080 即为alias中的那么, 2为GPU的数量。& ~( I/ T2 T, o. t; u* |% o5 Y9 `
2. 创建instance:9 k6 ]5 y) F% n6 e- |! B2 m5 f7 W
openstack server create --flavor m1.large --image cirros-0.3.5-x86_64-uec --wait test-pci( a+ S/ s- D2 o  i/ b' G: L+ p0 f
3. 在cirros下查看GPU信息如下:
4 n7 \4 p+ j- U( F/ Z$ lspci -k
# W0 b, y/ A7 W! c4 g5 T...
" V$ S# H1 H. \# G" s$ y00:05.0 Class 0300: 10de:1b061 B9 ~4 z$ [% S
00:06.0 Class 0300: 10de:1b06, f( B; ^8 U3 G3 x! x7 U
...% F- o" h* W, \3 l5 W) i
四 NVIDIA显卡的问题
  ?( o" b& Q# r0 P9 F因为NIVIDIA显卡的驱动会检测是否跑在虚拟机里,如果在虚拟机里驱动就会出错,所以我们需要对显卡驱动隐藏hypervisor id。在OpenStack的Pile版本中的Glance 镜像引入了img_hide_hypervisor_id=true的property,所以可以对镜像执行如下的命令隐藏hupervisor id:
* y* A( i) C/ j) j" f3 }4 a; O' T; j$ openstack image set IMG-UUID --property img_hide_hypervisor_id=true
) a( x! N4 c1 i( W7 j% K, w* L通过此镜像安装的instance就会隐藏hypervisor id。
) G  F" j7 V7 U如果是Pike之前的版本, 可以参考Consumer-grade GPUs in an OpenStack system (NVIDIA GPUs)这篇文章的做法。8 G' w* Z& L4 ]1 k# D! M
可以通过下边的命令查看hypervisor id是否隐藏:
+ q7 u$ g8 }, b  ~! |$ cpuid | grep hypervisor_id/ D: |% F1 h- _0 |) q/ b- m3 O; S
   hypervisor_id = "KVMKVMKVM   "% P* j3 F, P$ ~2 Z
   hypervisor_id = "KVMKVMKVM   "4 M+ o; N7 b. @. x
上边的显示结果说明没有隐藏,下边的显示结果说明已经隐藏:
' A- O2 c$ |% g! Y' Q0 c$ D$ cpuid | grep hypervisor_id
. m& [$ b; E' U  \: [1 |   hypervisor_id = "  @  @    "1 i# R7 |; h& }6 P/ @; W
   hypervisor_id = "  @  @    "" x; N% s$ |' }0 b0 o% l
+ |" l/ O0 s2 m  n

+ q; K4 W% P( Z8 l1 b
9 N$ m* P# A- k! d# y
* d' X# Y( p# M8 d

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:13:07 | 显示全部楼层
[root@controller ~]# openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large& `. _/ m2 ~" W, c6 M: W
+----------------------------+--------------------------------------+- Y4 S& @: q  z2 f, J0 i
| Field                      | Value                                |
" n. E/ J. w; ]; g3 f5 h% _1 T) v+----------------------------+--------------------------------------+1 S  ?" B* L& }
| OS-FLV-DISABLED:disabled   | False                                |9 G* Z' G, \3 x2 p
| OS-FLV-EXT-DATA:ephemeral  | 0                                    |8 F$ g( O! M1 {5 n
| description                | None                                 |
! Y* c+ {& }$ r  ]* G+ n| disk                       | 20                                   |
. z) i8 c0 Q* w! k' k$ N; C| id                         | a56773dd-2ab1-453b-ab94-95c559334567 |
- C1 g5 Y& g" x- S) e7 W| name                       | m1.large                             |
& w+ O, l9 ~! m0 p| os-flavor-access:is_public | True                                 |
: ?% B7 d8 a- c| properties                 |                                      |
" Q# Y! [" }* u$ {; y: i| ram                        | 2048                                 |
' O4 J* w9 t: K* c  p2 ~| rxtx_factor                | 1.0                                  |
, P! L0 Q$ o0 O8 K* S6 D| swap                       |                                      |" j, X( _# L( J
| vcpus                      | 2                                    |
* I( ]' V$ h& ]/ a' w& K+----------------------------+--------------------------------------+
( D  G  P( q# a4 |' c[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:17:35 | 显示全部楼层
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidiaGF119:1'8 I5 y/ q) ]3 W0 K

% O% H9 Q: l; n% @0 g3 Z0 L) L这里的值必须和nova.conf中的值一样  3 e6 x0 G2 c3 y% d  I
否则报错. J$ q5 J0 K6 k3 Y( _% T; b
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:58 , Processed in 0.019606 second(s), 22 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表