|
|
集群中剔除了一个osd,没有新加入,进行了一次pg的均衡,做完均衡后集群出现· Degraded data redundancy: 256 pgs undersized
1 n% U# n$ j6 c1 h+ f) M1 s,为了保证集群的pg副本数为3,需要新添加一个osd来做pg的均衡; M2 F$ f+ a+ b5 h7 E+ M# m
/ J3 `5 f# \0 }' p9 `8 O
" p% N' l/ J4 u3 i1 {
7 h* j# D8 X/ f" l8 O5 K
2 c8 z+ Z* E7 j- M/ Hceph集群中osd的recovery的相关参数8 l: G) B, o5 u3 O
[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd4 I8 s2 |$ y8 n3 }. U. }
"osd_allow_recovery_below_min_size": "true",
+ [4 U1 E/ j% b7 q "osd_async_recovery_min_cost": "100",' Y/ y( x. x$ @: W! A: m
"osd_debug_pretend_recovery_active": "false",
3 Z) f9 P6 u- B1 m "osd_debug_skip_full_check_in_recovery": "false",$ {: v: U* ]8 M; F
"osd_force_recovery_pg_log_entries_factor": "1.300000",
- k$ d) \- a* C: V8 q4 v "osd_min_recovery_priority": "0",7 x! w' t& d! O1 O
"osd_recovery_cost": "20971520",9 @# p9 ^6 S0 c4 _
"osd_recovery_delay_start": "0.000000",% \5 e8 d3 T7 m/ A9 L$ X* i
"osd_recovery_max_active": "3",( r( f$ |) K) W5 ]! _
"osd_recovery_max_chunk": "8388608",
( N+ ~; Z# K/ k T& v2 r( E* e+ X "osd_recovery_max_omap_entries_per_chunk": "8096",
1 _6 x, |3 j% v; t "osd_recovery_max_single_start": "1",
( O# h# O! l3 X: g8 d1 l3 { "osd_recovery_op_priority": "3",5 e+ A9 W# f) D3 x5 W7 ~
"osd_recovery_op_warn_multiple": "16",
+ O7 S5 u9 A- F1 {7 I "osd_recovery_priority": "5",
5 G" \5 O$ u8 x8 {0 b; h+ | "osd_recovery_retry_interval": "30.000000",: x1 N- j+ i5 C& E& _8 I6 b
"osd_recovery_sleep": "0.000000",; N/ b! s, V7 @! N3 L- t
"osd_recovery_sleep_hdd": "0.100000",
" |$ s# s* R: ]4 t. w+ Q7 F2 p) H# N "osd_recovery_sleep_hybrid": "0.025000",! r1 y4 N+ ~4 q& g0 ?
"osd_recovery_sleep_ssd": "0.000000",! Z, V. _ J6 L4 W9 _2 z
"osd_repair_during_recovery": "false",/ B) Z/ i' G+ n l' ^
"osd_scrub_during_recovery": "false",, K8 { O9 t# M* H
3 f' G4 ^: G# ] D[root@ceph-1 ~]# ceph daemon osd.0 config show |grep recovery|grep osd
6 h0 j& w; I* g6 A "osd_allow_recovery_below_min_size": "true",
A) C0 }# f( a- ^' F- N/ L0 w "osd_debug_skip_full_check_in_recovery": "false",
3 O- l) b3 M4 w, X "osd_force_recovery_pg_log_entries_factor": "1.300000",# T/ }. [4 B* c
"osd_min_recovery_priority": "0",
. j7 o7 i2 L- ] "osd_recovery_cost": "20971520",
7 N f0 I4 `0 S/ v5 Z "osd_recovery_delay_start": "0.000000",: E/ N( J# S h- N. {+ T
"osd_recovery_forget_lost_objects": "false",# W3 J: F& N- J& j" E# Q
"osd_recovery_max_active": "15",: E+ G/ V: _* e Z2 ?) ^5 ]( f+ u& o
"osd_recovery_max_chunk": "8388608",
6 a2 D2 ~6 U5 g' F "osd_recovery_max_omap_entries_per_chunk": "8096",, y, k: Q+ N0 n- w8 W; x3 k
"osd_recovery_max_single_start": "1",
5 p8 x& x# Z$ n9 l5 v+ L "osd_recovery_op_priority": "3",
$ z, S6 S3 c# F4 \ t7 F5 ~ "osd_recovery_op_warn_multiple": "16",
( J' V3 |" S3 @" Z# Q# }) C "osd_recovery_priority": "5",) q0 {- p# c4 e" j) o1 P1 Q
"osd_recovery_retry_interval": "30.000000",
$ P( J5 ?8 T4 h a5 ^# @3 f, L "osd_recovery_sleep": "0.000000",( y. f$ t0 b- s, v" q" y
"osd_recovery_sleep_hdd": "0.000000",
- | a+ k5 W6 j1 @3 I1 w" u3 x1 y3 Q "osd_recovery_sleep_hybrid": "0.025000",
0 q# Q% [; q# x/ a U "osd_recovery_sleep_ssd": "0.000000",
, T$ R( O3 e8 |3 `" M "osd_recovery_thread_suicide_timeout": "300",
7 H1 k2 ~, |0 H "osd_recovery_thread_timeout": "30",
- V8 B+ W6 |& U. `* f3 S "osd_recovery_threads": "1",% O( K* e' ]8 \. e9 F
"osd_scrub_during_recovery": "false",1 ?4 h F8 y- f: U7 Q. @
2 ]$ `$ H7 A7 u6 [# b
* v# z3 B( P; h2 a$ F& U加快Recovery的速度
/ a; r, h' Q- X) W# [; O& x7 G// 集群中添加一个osd, ceph-deploy osd create --bluestore node1 --data /dev/sdg --block-db cache/db1 --block-wal cache/wal1
8 b* [6 o. Z7 g4 X+ k1 j0 b1 n& k. e' Q0 w; W2 q$ H3 u0 \" j
// 查看每个osd节点的参数,或者通过 ceph daemon osd.x config get osd_recovery_op_priority 查看单个osd的参数0 F' m, V" L3 p% w
$ q u# q6 e3 [
[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority . B2 O7 _1 {- U, U8 V! K+ T+ m, }' `$ k
{: P! V/ I6 i" [- d2 T
"osd_recovery_op_priority": "3"& P. Q( u2 H0 \8 T+ K! L. k
}/ K' h5 c+ J' B0 D; C
6 M8 o( c/ m3 w. n7 M
z' D. g$ T4 u) Q. d- ]
W% ?6 ~: ], H, t, N3 B. B4 H) |4 E& F) b& W
; c. B( {$ o2 E" X# ?[root@ceph-1 ~]# ceph daemon osd.0 config show |egrep "osd_recovery_max_active|osd_recovery_op_priority|osd_max_backfills"1 ?* b2 w* \; o Y$ z: }7 u4 Y
"osd_max_backfills": "10",) ~$ E) e$ s. s
"osd_recovery_max_active": "15",
+ e4 H! R; L. D1 { "osd_recovery_op_priority": "3",
) P" z: ^9 b) L5 g( _, ?3 a+ P5 {$ l! P) f
//每个osd节点执行如下的参数调整或者通过
/ {+ x* `7 N, ] L, k7 v8 B' hceph daemon osd.2 config set osd_recovery_op_priority 1
: I O: t9 d B4 A% G[root@ceph-1 ~]# ceph daemon osd.0 config set osd_recovery_op_priority 1
) l! d/ j- e* B$ ~{
6 Q# u! `# i b) a1 g "success": "osd_recovery_op_priority = '1' (not observed, change may require restart) "
$ A. h: u$ |# k. x3 F}
, M N O- Z. v0 ]; d& o1 u* r& ^9 g; }/ `
2 a9 U2 j6 k. a, k全部osd参数设置为0:& {/ w- V. V- j
[root@ceph-1 ~]# ceph tell osd.* injectargs --osd_recovery_op_priority=0" ]: \" b! h& j6 v
osd.0: osd_recovery_op_priority = '0' (not observed, change may require restart)
2 |3 G& _% N$ G+ P, O! z3 n+ sosd.1: osd_recovery_op_priority = '0' (not observed, change may require restart) " G2 S/ i h8 [/ @+ _
osd.2: osd_recovery_op_priority = '0' (not observed, change may require restart)
$ g7 F' ?; a* V4 F8 `- h; E9 C& \osd.3: osd_recovery_op_priority = '0' (not observed, change may require restart)
4 S) s# |( h# a; B3 ~. ~. U9 u4 I* \osd.4: osd_recovery_op_priority = '0' (not observed, change may require restart) 2 f1 N2 n5 ?" R# R: K) _# @' s
osd.5: osd_recovery_op_priority = '0' (not observed, change may require restart) 9 ]0 `! g5 R8 H" @
. g1 @2 j. S" y: ?3 n% Zget osd参数值:/ } ~; Q4 R3 q7 G* r
[root@ceph-1 ~]# ceph daemon osd.0 config get osd_recovery_op_priority& J: ^; o. l: U' b# U) `/ D. Z
{
7 M S. e/ j" U3 w8 u "osd_recovery_op_priority": "0"
+ r- J+ K) ]6 R# G}
% b7 Y5 j3 A1 O! o! E' V! s/ K2 v& ~& g/ p# D) |" k, @
使用这种方式设置,不需要重启osd服务,直接生效:
2 |1 Z5 |) `+ P' ~来设置* F- T! J9 l+ \4 A1 i& } _2 n
ceph tell osd.* injectargs --osd_max_backfills=128
0 T4 h) z5 G3 P/ mceph tell osd.* injectargs --osd_recovery_op_priority=0
0 L7 N& J/ R N9 ~) R: B( W+ x5 Lceph tell osd.* injectargs --osd_recovery_max_active=64
' V5 ~7 _, I; |* B: r0 uceph tell osd.* injectargs --osd_recovery_max_single_start=64
/ E" F9 {$ ?" f/ P9 I$ u% G qceph tell osd.* injectargs --osd_recovery_sleep_hdd=0
2 x0 W4 c& u' |
: S% [: @* Y; C
# B$ R6 l9 m, e+ ~
" o$ q5 S: Z$ J( y6 Z核心影响恢复速度的参数osd_max_backfills这个参数默认值10. 由于一个osd承载了多个pg,所以一个osd中的pg很大可能需要做recovery.这个参数就是设置每个osd最多能让osd_max_backfills个pg进行同时做backfill.recovery做修复,通过pull或者push的backfills的操作数一般是分开的,所以一般会考虑设置这个值大一些,用于primary osd通过push修复replica osd或者primary osd 通过pull方式修复replica osdosd_recovery_op_priority默认值10. osd修复操作的优先级, 可小于该值;这个值越小,recovery优先级越高。高优先级会导致集群的性能降级直到recovery结束osd_recovery_max_active默认值15. 一个osd上可以承载多个pg, 可能好几个pg都需要recovery,这个值限定该osd最多同时有多少pg做recovery。osd_recovery_max_single_start默认值5. 5 P3 R) {: M5 @# S. T+ }/ `- @
这个值限定了每个pg可以启动recovery操作的最大数。. y J& Q( y6 K& m& x' q) M' c' e
第一种情况,配置osd_recovery_max_single_start=1,osd_recovery_max_active=3,这代表每个osd在某个时间会为一个pg最多启动1个恢复操作,并且最多可以由3个恢复操作处于活跃状态。; b) R8 Y. Y4 P2 H
第二种情况,配置osd_recovery_max_single_start=2,osd_recovery_max_active=3,这代表某个时间点osd会为一个pg启动2个恢复操作,并且最多能有3个恢复操作处于活跃状态。osd_recovery_sleep_hdd每个recovery操作之间的间隔时间,单位是ms# k! C' P5 I( e, B" l
: ^# Y2 P% O7 s9 c F" s+ h
: r3 }/ G F+ i3 c) B. q5 T
) m1 k/ x$ U7 {+ m( N |
|