|
|
集群中剔除了一个osd,没有新加入,进行了一次pg的均衡,做完均衡后集群出现· Degraded data redundancy: 256 pgs undersized
; a% ?" i1 V5 \8 M2 y,为了保证集群的pg副本数为3,需要新添加一个osd来做pg的均衡5 h2 i4 ~( d1 N5 D: N, ]
2 J6 G2 s, U+ A/ n' | l
2 I2 g. X3 C. Q
+ W, A/ i0 c9 D) E
N- @5 e8 x8 ^; Z0 f7 Cceph集群中osd的recovery的相关参数
- C8 `2 }7 n* R F9 Q: k[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd Z" {+ P) `5 |/ M+ c- L
"osd_allow_recovery_below_min_size": "true",
2 Q- z: w7 `7 l! A "osd_async_recovery_min_cost": "100",
" M' M; k$ J1 l7 N; z1 ` "osd_debug_pretend_recovery_active": "false",; d1 D# p9 C; B1 q6 c9 c5 r' C
"osd_debug_skip_full_check_in_recovery": "false",5 L. W E+ S' P% H6 F
"osd_force_recovery_pg_log_entries_factor": "1.300000",
; V* z4 t# h( M3 r, d "osd_min_recovery_priority": "0",9 q/ k1 e) |/ L1 L7 b* Y
"osd_recovery_cost": "20971520",* b# n# b. y: B) ?' J
"osd_recovery_delay_start": "0.000000",
5 F# F. E! Z6 I "osd_recovery_max_active": "3",
' K5 S( e1 [/ F+ ~. P* M1 c' y "osd_recovery_max_chunk": "8388608",
: b8 f" E; S6 F: K8 Q, \ "osd_recovery_max_omap_entries_per_chunk": "8096",# R# v" S7 c D) p( k' X
"osd_recovery_max_single_start": "1",
- u4 X3 a3 I4 b "osd_recovery_op_priority": "3",0 ?5 ~. |! Z* }
"osd_recovery_op_warn_multiple": "16",8 t: f) ~" U" d
"osd_recovery_priority": "5",# j/ c* u4 z& l4 _
"osd_recovery_retry_interval": "30.000000"," ~' V, L. ^" |* y- F& w) r7 N6 g
"osd_recovery_sleep": "0.000000",3 J2 D! Y1 u( E. c1 O' e3 k
"osd_recovery_sleep_hdd": "0.100000",
2 l3 ?9 {$ W7 I. W: ` "osd_recovery_sleep_hybrid": "0.025000",& n* q# K& g/ D+ a+ |( H
"osd_recovery_sleep_ssd": "0.000000",
4 K: S8 R6 h0 Z& Z, S9 _ "osd_repair_during_recovery": "false",: O9 R; Z1 W9 q0 u# q
"osd_scrub_during_recovery": "false",& E3 Z2 G7 [, D) }0 O" }
! d" J7 B1 J8 I& M! }$ f/ I
[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd
; X7 l! Z5 @0 s "osd_allow_recovery_below_min_size": "true",- i& y: {% y: Q' C# C i
"osd_debug_skip_full_check_in_recovery": "false",
+ I3 E+ h4 ]# N1 B! R% v' o' B7 _ "osd_force_recovery_pg_log_entries_factor": "1.300000",$ B, c) b' E6 y M
"osd_min_recovery_priority": "0",, ^# ~+ M3 Z% J2 t3 o- c% M) ?- k
"osd_recovery_cost": "20971520",$ o3 C1 Z. b) F5 P
"osd_recovery_delay_start": "0.000000",
& D. ^: a, o- \( K* I& D4 H "osd_recovery_forget_lost_objects": "false",
+ H X1 ~" t, \' \ "osd_recovery_max_active": "15",
[( p+ w& I1 u! p8 ^* d( k* j "osd_recovery_max_chunk": "8388608",& h$ `3 R5 T* ?9 E* Z
"osd_recovery_max_omap_entries_per_chunk": "8096",
0 i6 L0 }( w9 X8 ~: s "osd_recovery_max_single_start": "1",
$ M" B# x' S) z5 k "osd_recovery_op_priority": "3",+ q5 K, E! o/ A _% Q
"osd_recovery_op_warn_multiple": "16",- s' c; E7 `; W! M4 Z! C* D [
"osd_recovery_priority": "5",( D i' {, n9 u0 \6 N" Y5 a
"osd_recovery_retry_interval": "30.000000",1 z! s" B: l# u
"osd_recovery_sleep": "0.000000",7 L3 j) M1 v4 L" a' m
"osd_recovery_sleep_hdd": "0.000000",
/ K* k" E& m4 y% W7 e% X "osd_recovery_sleep_hybrid": "0.025000",; S/ t: K5 {' p) R! ? U r
"osd_recovery_sleep_ssd": "0.000000",
' |+ p/ K! z9 H "osd_recovery_thread_suicide_timeout": "300",
1 v- p! j* B6 j% [5 k "osd_recovery_thread_timeout": "30",
9 u$ V8 L+ B$ X) K "osd_recovery_threads": "1",
% }' w7 { `0 M "osd_scrub_during_recovery": "false",2 _! y/ @+ h( t \3 @
" L4 R O/ K7 u3 c O+ C, }& T
, y0 l& j4 V# y( E- e) t加快Recovery的速度9 q8 N8 L: E q, }4 X
// 集群中添加一个osd, ceph-deploy osd create --bluestore node1 --data /dev/sdg --block-db cache/db1 --block-wal cache/wal1
& R8 k$ I0 Q: y$ U* V
% D, i4 W: }% p4 b6 ]// 查看每个osd节点的参数,或者通过 ceph daemon osd.x config get osd_recovery_op_priority 查看单个osd的参数
- u4 v1 B1 h5 t
& f" f$ d% C) `# X[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority ) S! y7 L& x# B" j2 [
{" }" ~3 I- Y; l1 k; [
"osd_recovery_op_priority": "3"
6 O( ^9 R* x8 {6 B}8 P5 f! v$ o$ |- G& C8 k* Q
4 U7 y* x' f" |+ H; `8 r
+ a' _3 a" M' l* ^+ h7 ]
p( ]. X; ^3 ~" z
" I: J5 n( U% z% U! m X% @3 j* A, g M/ k6 q
[root@ceph-1 ~]# ceph daemon osd.0 config show |egrep "osd_recovery_max_active|osd_recovery_op_priority|osd_max_backfills"8 y* n8 m9 N( f5 S- z- R( d. H
"osd_max_backfills": "10",5 m, A& t/ I2 H6 c! k
"osd_recovery_max_active": "15",
3 U' R2 C9 x8 G2 f5 ]8 G "osd_recovery_op_priority": "3",
6 n% w& S9 B' R2 a
' o# U* z- f8 B5 m//每个osd节点执行如下的参数调整或者通过3 q/ z' a) Z o \
ceph daemon osd.2 config set osd_recovery_op_priority 1 9 K" a& |' f# N D
[root@ceph-1 ~]# ceph daemon osd.0 config set osd_recovery_op_priority 1
. N. Z, ?1 S. v( F: u0 ^/ J3 U3 q{
- k, b& ^8 v' U$ [ "success": "osd_recovery_op_priority = '1' (not observed, change may require restart) "
& A& f7 O: X8 h. W! U# k}
+ A1 M1 W) i1 L# m( O: M; Y" e
. t) t6 r6 r, `; B$ l0 E) k5 l
. b/ q4 P; J V3 F1 T2 A& i* R全部osd参数设置为0:- S# K; J7 N; V. _3 j
[root@ceph-1 ~]# ceph tell osd.* injectargs --osd_recovery_op_priority=00 J+ F5 C5 F# q& m' I7 z/ U+ p
osd.0: osd_recovery_op_priority = '0' (not observed, change may require restart)
" m2 M/ r+ H0 \, zosd.1: osd_recovery_op_priority = '0' (not observed, change may require restart)
?+ N6 O5 G* f8 E9 \osd.2: osd_recovery_op_priority = '0' (not observed, change may require restart)
+ Z. o9 [+ B) h5 P5 J9 V2 Mosd.3: osd_recovery_op_priority = '0' (not observed, change may require restart) ! `) R, d0 b6 E7 S; L: t
osd.4: osd_recovery_op_priority = '0' (not observed, change may require restart)
* @$ b4 v6 r# K4 x5 O3 x* o% }osd.5: osd_recovery_op_priority = '0' (not observed, change may require restart) ) w' p- f+ {5 Y6 Z/ |' ?: ]
& }+ j6 Y" m% \* Hget osd参数值:
3 o: E$ n. V" V- H[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority
9 E) z/ T' a0 d2 O+ ~ K. i{
3 W* L& G9 R( j- z: \5 } "osd_recovery_op_priority": "0"" F1 E2 z; ]# ~+ M
}
# S9 X2 V2 J a
: Q2 H! X. `6 I/ v使用这种方式设置,不需要重启osd服务,直接生效:
* ^4 H9 G$ `% Y( r来设置
% ~: M9 V3 Z2 kceph tell osd.* injectargs --osd_max_backfills=128' w3 q. W, E% a3 P4 n3 m5 C
ceph tell osd.* injectargs --osd_recovery_op_priority=05 R1 ^# U1 U7 q+ r) R# x$ `
ceph tell osd.* injectargs --osd_recovery_max_active=64
. ^) R9 |. x4 t. gceph tell osd.* injectargs --osd_recovery_max_single_start=64
% a' P0 {# G( I! S: U9 {( Bceph tell osd.* injectargs --osd_recovery_sleep_hdd=0) N3 d$ }- G9 k3 k9 Q% U1 M0 ]. U
5 o7 Z8 `6 v0 x
' d/ H: n% @- D8 @# f1 n; H$ H8 a, |& f; s6 Z
核心影响恢复速度的参数osd_max_backfills这个参数默认值10. 由于一个osd承载了多个pg,所以一个osd中的pg很大可能需要做recovery.这个参数就是设置每个osd最多能让osd_max_backfills个pg进行同时做backfill.recovery做修复,通过pull或者push的backfills的操作数一般是分开的,所以一般会考虑设置这个值大一些,用于primary osd通过push修复replica osd或者primary osd 通过pull方式修复replica osdosd_recovery_op_priority默认值10. osd修复操作的优先级, 可小于该值;这个值越小,recovery优先级越高。高优先级会导致集群的性能降级直到recovery结束osd_recovery_max_active默认值15. 一个osd上可以承载多个pg, 可能好几个pg都需要recovery,这个值限定该osd最多同时有多少pg做recovery。osd_recovery_max_single_start默认值5. : K* b2 v/ a* j) {. A2 J' z
这个值限定了每个pg可以启动recovery操作的最大数。
2 o: X6 ?& g+ e' R第一种情况,配置osd_recovery_max_single_start=1,osd_recovery_max_active=3,这代表每个osd在某个时间会为一个pg最多启动1个恢复操作,并且最多可以由3个恢复操作处于活跃状态。- `# S+ h# A( K9 J/ a% W* x; Y
第二种情况,配置osd_recovery_max_single_start=2,osd_recovery_max_active=3,这代表某个时间点osd会为一个pg启动2个恢复操作,并且最多能有3个恢复操作处于活跃状态。osd_recovery_sleep_hdd每个recovery操作之间的间隔时间,单位是ms% Y0 a# c3 Z. l0 u4 d7 Y, e
- \5 o }6 l7 F* _6 q
& I# E1 t' _5 x4 M: s A; K
; x4 I5 b. J6 n. l; g% q |
|