找回密码
 注册
查看: 873|回复: 2

openstack 相关gpu配置

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-6-10 22:02:28 | 显示全部楼层 |阅读模式
OpenStack的相关配置
0 j& l) E, Z  j( F1. 配置nova-scheduler (controller节点),编辑文件 /etc/nova/nova.conf:
5 B8 k) w9 ], c8 K[DEFAULT]# @8 ~& \9 m2 U8 @* P5 z& p* |. W
scheduler_default_filters = RetryFilter, AvailabilityZoneFilter, RamFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter* r( C5 B5 v) b( R
scheduler_available_filters = nova.scheduler.filters.all_filters
3 t0 ?, p9 j# I$ g9 T+ E( s重启nova-scheduler服务
$ h; ^' _! e$ q6 u' x: K& B9 C[root@controller ~]# systemctl restart openstack-nova-scheduler.service * M6 J+ U. u  l% B  ?
1 w) `/ L4 J$ B( y) M$ ^# |
[root@controller ~]# systemctl status openstack-nova-scheduler.service
7 I+ q' h2 C6 r4 L$ n& `● openstack-nova-scheduler.service - OpenStack Nova Scheduler Server
6 n( R/ t4 d" K: e8 J% H3 x   Loaded: loaded (/usr/lib/systemd/system/openstack-nova-scheduler.service; enabled; vendor preset: disabled)
, k9 {/ B  G! s% m+ S   Active: active (running) since Fri 2022-06-10 21:50:56 CST; 22s ago
  j0 `' ?: Y% }. F Main PID: 105509 (nova-scheduler)
8 j3 x6 m* {1 h% b) Y& S    Tasks: 9 (limit: 100963)# s( D4 b, q# s. t8 O3 `
   Memory: 276.0M
, ?* ^/ v2 \. N5 S0 f2 A   CGroup: /system.slice/openstack-nova-scheduler.service
3 Z* p/ F, w8 C% g2 q( d           ├─105509 /usr/bin/python3 /usr/bin/nova-scheduler
8 z; J0 b! e4 f* K( j+ k           ├─105528 /usr/bin/python3 /usr/bin/nova-scheduler9 u# e4 l  z7 {. H( Z! Q
           ├─105529 /usr/bin/python3 /usr/bin/nova-scheduler. _! C2 c% \1 A2 V: `* l$ G
           ├─105530 /usr/bin/python3 /usr/bin/nova-scheduler
5 ~' E: Z! ]# b1 s- H           ├─105531 /usr/bin/python3 /usr/bin/nova-scheduler" R4 ~1 ?0 L) D6 u
           ├─105532 /usr/bin/python3 /usr/bin/nova-scheduler; h2 N9 i0 t9 D  m
           ├─105533 /usr/bin/python3 /usr/bin/nova-scheduler
; r( T# Q) t  D' a" U; k- f5 Z           ├─105534 /usr/bin/python3 /usr/bin/nova-scheduler
) x2 i* X' g3 L  w$ k$ Y- L           └─105535 /usr/bin/python3 /usr/bin/nova-scheduler) b, v4 n% E# N' ]' V6 {) t5 u8 [
Jun 10 21:50:52 controller systemd[1]: Starting OpenStack Nova Scheduler Server...
  n5 c, t- S: d# [Jun 10 21:50:56 controller systemd[1]: Started OpenStack Nova Scheduler Server.
5 H) O  Y8 w1 V, k. \- f
) [! G3 `5 m8 w4 T% ~" t: C& e) N* @* ?9 _3 h! u9 e2 `
2. 配置nova-api (controller节点),编辑文件 /etc/nova/nova.conf:
) Q: f% q/ k$ Z" t4 [0 F[pci]
; E8 M, ?& j2 x0 U/ C/ {alias = { "name": "nvidia1080", "product_id": "1b06", "vendor_id": "10de", "device_type": "type-PCI" }9 \, C( `$ [5 [2 ?
[pci]
/ c! [" H/ u! ^+ ~, U6 \alias = { "name": "nvidiaGF119","product_id": "104a","vendor_id": "10de","device_type": "type-PCI" }
9 D; O0 I- A; k3 P3 z! ~9 u/ X4 U: M# c9 a
重启nova-api服务
" ~& d6 y) c6 @! J& k2 M
) v/ a  }, R7 r4 ?9 A. y2 Y[root@controller ~]# systemctl restart openstack-nova-api.service ( S. ~% [" C$ }

7 ~9 B) R1 W3 {; O3 D1 E) T, J3. 配置nova-compute(compute 节点),编辑文件/etc/nova/nova.conf:
" N+ u; }! d8 o" p' Q[pci]
& Z% s# K, r0 m$ \( m1 jpassthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }) ?  Q8 a  @9 ^- x; M% E" E
alias = {
7 z2 Q, Z. S4 }- W5 U       "name": "nvidiaGF119",; d0 v' o; W, c& C! c
       "product_id": "104a",3 G2 M, r) x. t2 q
       "vendor_id": "10de",( q6 U9 F5 w$ y, {  G% y
       "device_type": "type-PCI"3 t( D. Q& E2 x5 X2 @1 i' g
}/ M9 y; b& u% S# k6 [3 [* r# M- M

3 {9 J6 c! o# E
* W! c/ z7 }& j5 W) H[pci]) \$ j" a- V7 a* j7 }: _
passthrough_whitelist = { "vendor_id": "10de", "product_id": "104a" }! c( v' S: u9 w
alias = { "name": "nvidiaGF119", "product_id": "104a", "vendor_id": "104a", "device_type": "type_PCI" }
! B" c# k/ P& }: Q/ D8 s! @( S, ]4 V: X- Y0 x9 s0 l
重启nova-compute服务
0 \" V/ ]' ?, Z* p7 g% @- q, Q) R[root@compute01 ~]# systemctl restart openstack-nova-compute.service ! z8 M3 t; `6 ~

% S- ?7 u: S8 N+ m0 l8 `  ][root@compute01 ~]# systemctl restart openstack-nova-compute.service
1 Y2 \( a* m' ~0 N* B, n[root@compute01 ~]# tail -f /var/log/nova/
, X' D% f% X, P6 C" n+ v8 S$ }. Onova-compute.log    privsep-helper.log  % P1 H9 O! x( o( ~2 B9 j+ i2 u
[root@compute01 ~]# tail -f /var/log/nova/nova-compute.log
" ?. r2 Z6 d; k5 v/ N6 }3 [2022-06-10 22:10:51.891 12258 INFO oslo.privsep.daemon [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Spawned new privsep daemon via rootwrap
; I9 S  r5 S& ~6 p2022-06-10 22:10:51.796 12299 INFO oslo.privsep.daemon [-] privsep daemon starting
0 t- O7 h9 R  d4 c2022-06-10 22:10:51.800 12299 INFO oslo.privsep.daemon [-] privsep process running with uid/gid: 0/0
5 d; ^0 U3 R4 }4 }* z5 r' n8 e2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep process running with capabilities (eff/prm/inh): CAP_NET_ADMIN/CAP_NET_ADMIN/none
  a3 k4 y& x) K6 [# s2022-06-10 22:10:51.804 12299 INFO oslo.privsep.daemon [-] privsep daemon running as pid 122993 Y7 d) b- I  Q$ i
2022-06-10 22:10:52.437 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bf:2a:4e,bridge_name='qbr24719437-3e',has_traffic_filtering=True,id=24719437-3ee6-469b-af02-c1fcea041be2,network=Network(b83e2ffc-eaad-455f-b299-18e09d58be32),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24719437-3e')
. m- |- X& d; p2022-06-10 22:10:52.459 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:fe:4c:d1,bridge_name='qbr58f2e526-38',has_traffic_filtering=True,id=58f2e526-386b-43da-9818-208b6a34b6e8,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap58f2e526-38')6 D% S  h6 R1 A: V, _  B
2022-06-10 22:10:52.478 12258 INFO os_vif [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Successfully plugged vif VIFBridge(active=True,address=fa:16:3e:bd:8b:42,bridge_name='qbr24c6e701-e5',has_traffic_filtering=True,id=24c6e701-e5b4-4277-9895-cc67a4097280,network=Network(5eb067d8-cd9b-4eec-ac0b-b5982752e75d),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap24c6e701-e5')
6 n: d# h+ p' P2 J  w% q/ F2022-06-10 22:10:52.481 12258 INFO nova.compute.manager [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] Looking for unclaimed instances stuck in BUILDING status for nodes managed by this host( U# u$ s- S# X. x$ Y4 N9 `( r0 J
2022-06-10 22:10:54.740 12258 INFO nova.virt.libvirt.host [req-e600b0bc-1cc4-4e85-a406-4d0c094560ee - - - - -] kernel doesn't support AMD SEV) z  l1 k& e. N& R, k( b0 j7 w
$ c; q( n% R3 J  N8 M. u. O0 R8 G' H: ]
2 @; L7 K. n9 l* j8 Z# G3 Y5 y
三 验证2 G) p5 O6 b; b0 A. I/ Z- }
1. 创建设置flavor:( E8 M" }. i9 M1 p/ f
openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large" u5 @  z7 P$ o0 G7 u1 C" M
openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'0 `/ G, n2 m! Y8 J% B
nvidia1080 即为alias中的那么, 2为GPU的数量。
# K5 G; Y- T) W2. 创建instance:* w3 R6 m# v) N5 j
openstack server create --flavor m1.large --image cirros-0.3.5-x86_64-uec --wait test-pci
1 M6 z, Z# x- x9 ]3. 在cirros下查看GPU信息如下:- F" U- G7 @. R! s' I" a% }
$ lspci -k
0 R& m% Q# U' C% X; t" W" l& m...
& ^- W8 }! I! E: {4 Q00:05.0 Class 0300: 10de:1b06
2 F$ n9 m1 u5 r00:06.0 Class 0300: 10de:1b06
6 G9 L5 X- Q; Q0 J8 N.../ i: n5 w+ N# N
四 NVIDIA显卡的问题
$ H3 V4 l$ j" d: M因为NIVIDIA显卡的驱动会检测是否跑在虚拟机里,如果在虚拟机里驱动就会出错,所以我们需要对显卡驱动隐藏hypervisor id。在OpenStack的Pile版本中的Glance 镜像引入了img_hide_hypervisor_id=true的property,所以可以对镜像执行如下的命令隐藏hupervisor id:( \2 W% f2 k3 w) J2 h5 d3 U
$ openstack image set IMG-UUID --property img_hide_hypervisor_id=true' v! b- l: q) y9 @. ?4 L+ h
通过此镜像安装的instance就会隐藏hypervisor id。
3 _: q# @4 t5 W" O0 C. B如果是Pike之前的版本, 可以参考Consumer-grade GPUs in an OpenStack system (NVIDIA GPUs)这篇文章的做法。! {( E4 n8 H0 C7 d
可以通过下边的命令查看hypervisor id是否隐藏:
! c7 m0 `# j& P* B4 J$ f$ cpuid | grep hypervisor_id4 F' W) E! A' O* o9 z
   hypervisor_id = "KVMKVMKVM   "
: Z! w/ p# ^: `( v9 B7 e   hypervisor_id = "KVMKVMKVM   "/ b& M) G- S. @( f
上边的显示结果说明没有隐藏,下边的显示结果说明已经隐藏:
4 s* W! V; R, H; ^: l& V$ Z$ cpuid | grep hypervisor_id. i& ?# F# _6 z6 |  }: g
   hypervisor_id = "  @  @    "
9 `! d0 c  v8 t- @. v   hypervisor_id = "  @  @    "
8 o8 q( ]9 f4 X+ Y
/ a$ k9 H: v' K6 Y$ C6 @+ Y! K5 n8 Y8 E4 P4 J

. f2 ]& O: B  i8 ^6 Q8 m# z, ~

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:13:07 | 显示全部楼层
[root@controller ~]# openstack flavor create --public --ram 2048 --disk 20 --vcpus 2 m1.large; B$ H" O5 B6 _  d1 X. R
+----------------------------+--------------------------------------+4 ?- S: ^: A, x4 z# d0 S
| Field                      | Value                                |# e8 b4 z) z) p( n
+----------------------------+--------------------------------------+
3 T8 U; b; P. D) [' w! O| OS-FLV-DISABLED:disabled   | False                                |" Q- g- Z* ]" j+ g% I
| OS-FLV-EXT-DATA:ephemeral  | 0                                    |
4 o. G/ I' @/ C/ C& b| description                | None                                 |, _: Y6 g" s+ W2 ~
| disk                       | 20                                   |
- m7 L9 V$ l% z8 u| id                         | a56773dd-2ab1-453b-ab94-95c559334567 |0 g" ]9 U2 o# I5 U8 G3 V8 i
| name                       | m1.large                             |( d& ]/ A% k3 D+ @
| os-flavor-access:is_public | True                                 |
. m9 f0 w5 Z4 J- k" E4 {& w| properties                 |                                      |0 Y& G9 ~: F3 w! V4 N
| ram                        | 2048                                 |$ r0 z( X1 t& G1 `& ?( k
| rxtx_factor                | 1.0                                  |& e  Q5 H7 F% C; s
| swap                       |                                      |
. f: _& |( M! I! [* `- @" |* I' s| vcpus                      | 2                                    |
& Z5 Q1 D& a% b. @4 a" c+----------------------------+--------------------------------------+7 U2 v7 }0 R" C+ i! j/ J5 H
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidia1080:2'

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-6-10 22:17:35 | 显示全部楼层
[root@controller ~]#  openstack flavor set m1.large --property pci_passthrough:alias='nvidiaGF119:1'! p/ u9 n( V0 Z  m6 Y7 y9 ]

& Y( E2 R5 J& K7 r/ I! u2 ]! ^. A这里的值必须和nova.conf中的值一样  , l) C. h- i. o9 a9 a
否则报错  ]7 ]+ `; v! Y7 q3 s
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:02 , Processed in 0.023666 second(s), 22 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表