|
|
集群中剔除了一个osd,没有新加入,进行了一次pg的均衡,做完均衡后集群出现· Degraded data redundancy: 256 pgs undersized
: I3 _' P. }2 k,为了保证集群的pg副本数为3,需要新添加一个osd来做pg的均衡# r% ]- d! X- `( c, R5 X6 l/ L
6 o8 O* \# k2 i6 X! I
, b. h7 f( B. y2 Q* o! q
& [' K4 B7 ]/ i' S+ A* w3 J( r" z
ceph集群中osd的recovery的相关参数
; S0 O+ [% E! p* m( m[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd
7 m, V% y( j# g: X "osd_allow_recovery_below_min_size": "true",$ ?& @- [; C8 L3 x
"osd_async_recovery_min_cost": "100",
8 ~4 I) y" _- I N+ } "osd_debug_pretend_recovery_active": "false", L# i5 a, l4 }
"osd_debug_skip_full_check_in_recovery": "false",+ _* t4 z2 R" q( O. t$ t# }1 F
"osd_force_recovery_pg_log_entries_factor": "1.300000",5 C2 O' E: H8 s6 k) E, U
"osd_min_recovery_priority": "0",
) g4 o6 T: G, J8 j# t" X3 J "osd_recovery_cost": "20971520",4 a3 k/ c0 p3 v) u
"osd_recovery_delay_start": "0.000000",( \' [7 C5 t" p _' U) i
"osd_recovery_max_active": "3",* ^/ U9 x; T, W: X
"osd_recovery_max_chunk": "8388608",
5 P9 Z8 |% D; B9 ^ "osd_recovery_max_omap_entries_per_chunk": "8096",
5 e5 \1 o* l" p* W; t% l4 O "osd_recovery_max_single_start": "1",
$ a. N: ]6 Y, S/ c. E# y "osd_recovery_op_priority": "3",' \0 Q8 [$ t* P* C, B! ~/ ~
"osd_recovery_op_warn_multiple": "16",
- L" \: G! ~% b1 E( \1 F) { "osd_recovery_priority": "5",# j7 U% u u3 C) g/ V
"osd_recovery_retry_interval": "30.000000",
! h) F8 w. c& s) m1 J. C "osd_recovery_sleep": "0.000000",
; F3 T) }" J5 p( n! m2 A5 `& C% @ "osd_recovery_sleep_hdd": "0.100000",
. M2 X2 M! o ^5 y6 p "osd_recovery_sleep_hybrid": "0.025000",9 K" i/ V/ O+ R: q9 F, U
"osd_recovery_sleep_ssd": "0.000000",
' Q3 }6 n9 M8 I/ g9 t, c2 k "osd_repair_during_recovery": "false",7 E! x Y( R8 J5 `4 c. L% C' v
"osd_scrub_during_recovery": "false",& n# o. K5 Y4 I3 o! P
* t9 [7 V) P1 `" H! _; r[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd
2 `6 g9 c' J" v. S. [$ A "osd_allow_recovery_below_min_size": "true",, X" n/ L+ I n, @- s2 b
"osd_debug_skip_full_check_in_recovery": "false",! `( ~% C- t& D9 P! U- k" o3 ]& M" A
"osd_force_recovery_pg_log_entries_factor": "1.300000",/ I- g' X( T+ C' r8 b+ a- W
"osd_min_recovery_priority": "0",3 N1 b0 W/ v7 l; ] @8 {! Q
"osd_recovery_cost": "20971520",
4 ?4 |$ J' z5 X+ J, t "osd_recovery_delay_start": "0.000000",0 D& c. o, L+ `1 e7 H, \ b! i
"osd_recovery_forget_lost_objects": "false",1 Z5 o7 ]" X% B& d
"osd_recovery_max_active": "15",
. f) K0 s; ?2 a' U# i) u- ^ "osd_recovery_max_chunk": "8388608",5 [- X: n9 E$ K0 y
"osd_recovery_max_omap_entries_per_chunk": "8096",
# Y6 T0 Y% d+ l1 p) K! `8 y "osd_recovery_max_single_start": "1",( l/ l* O3 g# J4 u5 k9 F1 B- l- }( i
"osd_recovery_op_priority": "3",* w G' @% g5 h. J/ y8 R
"osd_recovery_op_warn_multiple": "16",3 o+ R; Y" o- g5 D$ X7 r* X
"osd_recovery_priority": "5",. {; b2 q2 R9 {
"osd_recovery_retry_interval": "30.000000",4 q ^& D8 \0 L1 W
"osd_recovery_sleep": "0.000000",
- J* q2 q7 ?1 T. _" `, E4 v "osd_recovery_sleep_hdd": "0.000000",
- F/ q& F' g+ E- f% I6 l2 |* E- O e "osd_recovery_sleep_hybrid": "0.025000",
7 H7 n3 f: f0 p! q: H# F4 ] "osd_recovery_sleep_ssd": "0.000000",4 I. f. h3 B0 G. b
"osd_recovery_thread_suicide_timeout": "300",8 i! J) F2 x [. `8 }: T* f4 @& U
"osd_recovery_thread_timeout": "30",
! f, i( C- i* i) t "osd_recovery_threads": "1",
( f) n6 F7 \/ b, Z* {! G3 { "osd_scrub_during_recovery": "false",$ G6 J6 K* Z+ P' {. g) s ?, Y
3 Z6 K9 A H7 u( G
: b+ z5 n# o( v/ c4 s加快Recovery的速度
; D8 Y* v) ^% u+ G// 集群中添加一个osd, ceph-deploy osd create --bluestore node1 --data /dev/sdg --block-db cache/db1 --block-wal cache/wal14 `* m9 R: \% y0 q
, u6 B/ ]6 [1 J/ J
// 查看每个osd节点的参数,或者通过 ceph daemon osd.x config get osd_recovery_op_priority 查看单个osd的参数( ^- r' a* u8 f
. e* }; G6 t4 m4 x: _[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority ) Y* f( v, T* p2 Q7 t; ]0 x
{0 D) D% Q! ], J8 q/ E
"osd_recovery_op_priority": "3"
1 L# O2 W; j5 l7 ^1 G: R}+ P/ R1 [. \2 C0 Z
+ X/ ?& s8 `2 h5 T7 Z, {' t, {9 P" c
& I/ k: ~: j" q7 e9 R2 I% H6 Y, V! X' r" D9 J5 [' D
+ K5 d6 m/ [2 }- `2 f
+ s# v/ z0 \: p9 I! Q4 {* v. O[root@ceph-1 ~]# ceph daemon osd.0 config show |egrep "osd_recovery_max_active|osd_recovery_op_priority|osd_max_backfills"
! n- j) ^( L9 L1 w/ B* O3 Q3 ` "osd_max_backfills": "10",
0 a" m; w/ V7 E- R- } "osd_recovery_max_active": "15",* T! m6 T. [3 j
"osd_recovery_op_priority": "3",# X6 A' k) F/ K9 Z0 Z/ l
# U8 v7 x' n" E//每个osd节点执行如下的参数调整或者通过
# o1 O* J6 q8 Q$ Fceph daemon osd.2 config set osd_recovery_op_priority 1 9 B4 H( o$ Z& i) B3 L( o) @9 h
[root@ceph-1 ~]# ceph daemon osd.0 config set osd_recovery_op_priority 1
/ `! s! N4 u/ J. C6 `9 q1 `( Z# @' {{" X, G' S' E: J; a1 p
"success": "osd_recovery_op_priority = '1' (not observed, change may require restart) "
, k' ^" t8 B; `; @( I}% ]9 E# |: d$ _
$ Y; c$ O4 N- A$ ^
, q2 x6 U6 O1 j! T( l) R1 A4 `
全部osd参数设置为0:
! F3 e9 `* o) C& i' J: C# E[root@ceph-1 ~]# ceph tell osd.* injectargs --osd_recovery_op_priority=0
# ?# g; I7 R$ [: vosd.0: osd_recovery_op_priority = '0' (not observed, change may require restart) 5 j! Y8 Q. q) X6 k
osd.1: osd_recovery_op_priority = '0' (not observed, change may require restart)
4 a. t3 j8 x7 K8 a: X7 h0 V; `( iosd.2: osd_recovery_op_priority = '0' (not observed, change may require restart) 8 S7 A: w0 a3 t1 M* T
osd.3: osd_recovery_op_priority = '0' (not observed, change may require restart)
1 \' W: `( L- L% n Kosd.4: osd_recovery_op_priority = '0' (not observed, change may require restart) 1 \. q2 i; x7 K5 E% a$ s
osd.5: osd_recovery_op_priority = '0' (not observed, change may require restart)
, g2 q Q4 I! B4 J4 [0 z) B5 }. t, ~6 E, \! k( x7 H$ H
get osd参数值:
5 q& _; O4 S! O$ N2 R2 T7 A[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority7 b, q. v- y; o
{
* h5 A4 C/ ^8 t# l "osd_recovery_op_priority": "0"+ Q* h |6 F& g) `/ s) |
}1 x4 f( t4 Z) f0 T
( o8 a! d4 h, _
使用这种方式设置,不需要重启osd服务,直接生效:% X# c" N& U- H3 ?3 s1 _# Y# s W
来设置: u8 L: X; j/ d3 w2 [" N. J8 ^
ceph tell osd.* injectargs --osd_max_backfills=128
: i9 c2 A' I( Wceph tell osd.* injectargs --osd_recovery_op_priority=0
$ c! t0 ?/ {! wceph tell osd.* injectargs --osd_recovery_max_active=64
* l0 {) K6 \7 L8 m& @' Sceph tell osd.* injectargs --osd_recovery_max_single_start=64) }% v, z1 o9 Y- v6 j8 i" F
ceph tell osd.* injectargs --osd_recovery_sleep_hdd=0* l' @. r f) ~3 n! C
, |- M4 G" \! z' w( Z$ X3 I
6 j, E5 \1 w! r5 \2 f) x- G; \. x, Z* ?- _* ]% Q
核心影响恢复速度的参数osd_max_backfills这个参数默认值10. 由于一个osd承载了多个pg,所以一个osd中的pg很大可能需要做recovery.这个参数就是设置每个osd最多能让osd_max_backfills个pg进行同时做backfill.recovery做修复,通过pull或者push的backfills的操作数一般是分开的,所以一般会考虑设置这个值大一些,用于primary osd通过push修复replica osd或者primary osd 通过pull方式修复replica osdosd_recovery_op_priority默认值10. osd修复操作的优先级, 可小于该值;这个值越小,recovery优先级越高。高优先级会导致集群的性能降级直到recovery结束osd_recovery_max_active默认值15. 一个osd上可以承载多个pg, 可能好几个pg都需要recovery,这个值限定该osd最多同时有多少pg做recovery。osd_recovery_max_single_start默认值5.
1 Z# x( e' R S! M$ M3 h% I3 ^& r这个值限定了每个pg可以启动recovery操作的最大数。4 ^: g/ {" A$ N
第一种情况,配置osd_recovery_max_single_start=1,osd_recovery_max_active=3,这代表每个osd在某个时间会为一个pg最多启动1个恢复操作,并且最多可以由3个恢复操作处于活跃状态。1 d* \: x$ x' {
第二种情况,配置osd_recovery_max_single_start=2,osd_recovery_max_active=3,这代表某个时间点osd会为一个pg启动2个恢复操作,并且最多能有3个恢复操作处于活跃状态。osd_recovery_sleep_hdd每个recovery操作之间的间隔时间,单位是ms' |1 }9 _8 Z7 T, L- A& ]% W
. Z- D( S! j8 a1 s$ ?5 J+ x/ F! L3 N9 j3 {
) g' F& F$ y. x) C' h* ^5 U7 g r
|
|