|
|
[root@compute01 ~]# ceph -s M/ _$ n: o; e! P& V" W
cluster:
( W6 k/ Z1 K7 P+ q id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482# \' b4 t0 K; C: W6 x" `8 L) J% @
health: HEALTH_ERR- P4 H/ z, X9 P( q2 l0 F
1 scrub errors6 H; ^+ M5 r v3 G' O
Possible data damage: 1 pg inconsistent2 w; u: G1 C- j2 F. g) S% y
1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops& o, o2 F: ~% o7 k3 E4 h
* _+ x/ M$ X) g+ G$ @. P services:+ |/ a* o( `, O# P/ w
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)( @; ` S. l4 _. u" B/ J+ V
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
2 Z3 K/ N+ o$ Z- D0 }) r$ o mds: 1 up:standby. h) `0 s7 R' {* U2 r/ B
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
8 D' f' N8 C( ]& h0 \3 R$ ?/ Q
data: h/ m# V2 L4 x9 Y1 r! W) g8 H
pools: 7 pools, 3712 pgs
9 W5 j' D0 d3 Y4 j objects: 1.88M objects, 7.2 TiB
) b- S+ Y) I) c" ]' l" x5 T. H usage: 14 TiB used, 129 TiB / 144 TiB avail" ^: E/ K# @$ `9 y# ]6 L2 h) H( w2 k
pgs: 3709 active+clean9 l) }, H9 o& ~4 V9 v1 w7 ^
2 active+clean+scrubbing+deep, r; I. E4 V) }. \
1 active+clean+inconsistent
! {: |) P, O) T, I
" e* \" m$ e" M io:: U) H- o3 G- D) r1 A2 J& B
client: 1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr
- @$ x- P: `) L _/ x. K
4 ?% J) B# T( @3 f1 K, Q查看状态:1 n7 z; P# _6 ~' B
9 Q, }* ]$ ]) c& G% w[root@compute01 ~]# ceph health detail : I8 d- E0 T2 _0 W( E; n$ W; L1 }$ C
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops& Q D* G s T
OSD_SCRUB_ERRORS 1 scrub errors) z' O5 c# X6 ~7 `6 `
PG_DAMAGED Possible data damage: 1 pg inconsistent7 U$ g/ X/ X }( o
pg 9.167 is active+clean+inconsistent, acting [9,11] ^8 N3 h& E' _
SLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
. Q2 L, }/ F; w! _& E2 H
# k) _8 i2 j6 T5 n/ s/ H v修复pg
$ I1 m4 }: ~9 h5 f. o) a[root@compute01 ~]# ceph pg repair 9.167% c3 `- K; }5 ^2 w9 V) f
instructing pg 9.167 on osd.9 to repair; h1 }4 }: K: q: U1 j7 x5 s
[root@compute01 ~]# ceph health detail ) Y3 O2 q7 D; [% f6 @
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops' C5 X: R! v( G7 t( P m& `# Z
OSD_SCRUB_ERRORS 1 scrub errors
" I3 E" h$ `* K) |6 d" p9 YPG_DAMAGED Possible data damage: 1 pg inconsistent/ m. ?; K; e* T7 @% [8 _
pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]! b* |% g/ l8 Y2 r( h
SLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops% w8 G% } [& p& ^9 U7 {7 O" L
[root@compute01 ~]# ceph health detail
1 G8 w% s& M0 R9 IHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops h+ H& O. w' M/ i7 k1 h$ X+ G
OSD_SCRUB_ERRORS 1 scrub errors8 V: q6 s2 e% G5 A$ A
PG_DAMAGED Possible data damage: 1 pg inconsistent, p ]# M8 b8 P! ]& s
pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
$ J6 e6 `# X- h2 w+ G- o) y$ xSLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops' Q! \5 H3 I, M5 Z; M, I4 k
7 R$ O1 j* M( ^, Z( I8 y[root@compute01 ~]# ceph -s
1 T) g7 ?2 n" Y, p' [% v cluster:
5 l0 K$ f2 o1 O- p& U) N id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
" b2 s: w, k5 i# D, | health: HEALTH_WARN. n5 k* k) D6 @+ J
1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops
$ J' O" x+ l5 a; T( M, ~8 t+ A4 t1 X* j- G8 a% A
services:: O2 r3 ]" A& ?( M4 e- W4 B; Y
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
0 p4 g$ L4 z( C2 L mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
/ y' O$ _9 K9 Z9 A& c; U; f, _ mds: 1 up:standby
* L v' {# r' p. Z3 O) K osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
: O/ N! |8 K2 I7 r5 W0 }, U0 A( z9 c/ z% z5 J, b3 F
data:
$ @% k2 l/ P, G pools: 7 pools, 3712 pgs4 s" i5 h( r1 c. e' J0 X& T
objects: 1.88M objects, 7.2 TiB
6 `9 L. ]4 c9 `' u* y. a2 i. q usage: 14 TiB used, 129 TiB / 144 TiB avail9 a2 l" G0 I/ J) U1 f
pgs: 3710 active+clean
( h; V6 A: [, h% {" ^: g 2 active+clean+scrubbing+deep2 L0 a. v" u- v# e3 Z. T& Z- \0 v
( o D5 ^3 ^( \, H
io:
" a; { K+ r9 a. s* z4 X# M client: 921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr
# k! y; H: u3 ^' {9 q4 h' Y
/ x8 ]# R0 o C; p$ m等会就出现正常的告警了。! B* Q2 n( M, W0 Q
[root@compute01 ~]# ceph -s
& L) @- H- X& _ cluster:% v0 C* Q# U# |. v/ p
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
- ?; @% I E8 i- A health: HEALTH_WARN* _; D1 K+ k+ x7 X( g
1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops4 D2 F# l/ ^) a- h/ H. T
+ b/ W' p7 B% o6 j6 ]5 ~3 Y- Q
services:
3 z: ]: y+ m ?2 r; T- E mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
8 E4 E, V t: Y. f; Y3 y |9 g+ g mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute050 l% ?6 p9 p$ ]& N9 C) z2 i6 B0 J
mds: 1 up:standby
/ ?3 ], w8 q- b0 o2 o osd: 32 osds: 32 up (since 14h), 32 in (since 4M)! j2 d, A+ U+ k- W% x$ m9 @
& n+ f8 J ^) d9 y7 V2 d
data:0 a9 o3 A+ v4 G K: J6 T
pools: 7 pools, 3712 pgs
/ x9 N: S6 p. s) g& R objects: 1.88M objects, 7.2 TiB% _" N' G j" P" n$ H/ U8 K, r
usage: 14 TiB used, 129 TiB / 144 TiB avail
6 ], Q# c7 p; |5 n& t pgs: 3710 active+clean( V/ [/ T; f' G h
2 active+clean+scrubbing+deep
3 `/ Y, f- B) p0 ^4 W9 g
& I {1 a- J3 X' d io:
; G! ]5 b" |4 ] D7 L client: 698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr% h2 m- b* ]9 b3 R, W/ U `2 o
4 A; q( o! o6 ]% `
[root@compute01 ~]# ceph -s. b, y" d/ P f' [: Q; S) n) O
cluster:7 @3 G# [' k/ V; X& U P0 D
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
6 g; Z6 p: n" h! _2 `' k health: HEALTH_WARN5 H: ~3 \+ I# C7 u; }
1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
/ N- Y8 c/ U J0 D2 y9 G$ V" y! C2 C$ i0 s3 r$ A
services:2 o7 l2 s5 ]" y0 u3 w$ z* ]
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)# ^- z' R. m: _7 P5 o
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05, H, S8 A3 f N+ X" j
mds: 1 up:standby2 B# I. `- A8 L4 m* f7 c. |7 H
osd: 32 osds: 32 up (since 14h), 32 in (since 4M) P/ W8 @ T0 ]5 d. F2 K
3 B! t! W- k% t4 ]
data:* U# E0 G* i, z6 q0 J
pools: 7 pools, 3712 pgs
* x) }$ z, ~0 E- N objects: 1.88M objects, 7.2 TiB
: T1 W5 p! ?9 I6 _2 T usage: 14 TiB used, 129 TiB / 144 TiB avail9 J/ j; T- }+ W
pgs: 3710 active+clean
" U, d* m& |) m% l/ H# X p 2 active+clean+scrubbing+deep
) S" F/ Y2 T7 Q" {6 @+ H w
7 N3 X) Y6 D$ E io:
( u. d7 r' R' ~2 }/ _. M% T client: 601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr1 Q3 p6 [4 T4 Z7 F$ @- o; [
& b1 Y) M4 h- \# s. Y% W' U( p0 z
检查下时间同步:
- J; R+ _& |& A: I0 S% E5 M[root@compute01 ~]# chronyc sources! y9 h3 @4 R! H" e) j- D# I
210 Number of sources = 1
* n) F" R1 H& L7 n& ]MS Name/IP address Stratum Poll Reach LastRx Last sample 6 M# G" a9 \" F' B4 ?6 a' |7 F8 M
===============================================================================
6 h- ^% p6 ^" i^* 119.28.183.184 2 6 27 26 +3312us[+7317us] +/- 86ms( k! A! H6 Y" ~" S, t
[root@compute01 ~]# ceph -s, V' |& P7 C3 b1 M" Q( s3 h
cluster:8 X- F5 _& @4 |0 p% E5 V
id: 2af51d38-db90-4a57-a43d-ea9f6ebd74828 m2 }3 w5 w4 m- S. @+ e+ i4 v
health: HEALTH_WARN0 B. N- s; M, i
1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops2 d. ^( I( t4 m7 E. [1 g+ W
' ^( e/ G$ V7 {! Z
services:
5 A1 f4 C2 R; }/ m mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)) U- G* W' Z l4 P% w
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
0 i6 a: N' ] _ mds: 1 up:standby
0 F. h t& Z d/ u7 _9 S osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
# k- X, \' V3 g2 M3 J; l; u8 G- F; r, d6 O, e' M$ f1 h
data:
8 o* i& u0 Q! R7 v& { pools: 7 pools, 3712 pgs
! T4 ~3 U6 l6 f Q- V$ `/ T objects: 1.88M objects, 7.2 TiB' M5 M( p# r. {# M
usage: 14 TiB used, 129 TiB / 144 TiB avail' D/ z) u0 J j: @/ S5 U7 f
pgs: 3710 active+clean
# _9 S9 @4 {' w4 e. b. j+ v8 E 2 active+clean+scrubbing+deep" ~# |* j; F' u! o) y- s7 F
6 v3 @- F# N3 }5 `9 x% E6 G4 r io:: Q1 {: D2 @9 K6 B6 D8 @( r* l
client: 968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr. Y5 M8 Z& T5 w4 d& p; Z5 x, G' [
$ @9 w! Q* v3 ?: Z2 @- l
[root@compute01 ~]# ceph health detail 3 j! [: B# A" J }0 h2 _% ]
HEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
, W% }4 U/ x1 v4 q' T4 l- X- J4 dSLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops8 L4 c, h; k8 C
重启下mon.target服务: z, c o2 g+ p
[root@compute01 ~]# systemctl restart ceph-mon* r; g. ?" e9 B: S. B& H1 W# Z6 j
ceph-mon@ ceph-mon@compute01.service ceph-mon.target
& \4 I7 y4 `9 P1 |[root@compute01 ~]# systemctl restart ceph-mon.target
/ f2 n$ F2 C- D, T# q( N3 M查看状态:) e+ J2 H( e- q2 n4 Z6 d
[root@compute01 ~]# ceph -s! \" Q+ e! r+ B0 @2 ]
cluster:) U% I1 i1 Y# m: d1 U( \' g
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482% @* s/ Q# h* w' L/ q
health: HEALTH_WARN
: }% ?' e3 ?; {( S2 e9 K( F7 B 1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops- `$ E! S, N) W. o
. C6 y1 X$ N3 s& j$ L; o1 u- G/ e
services:* g: a% V6 {* ?$ H
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
/ V* o; V9 k; R8 h7 I$ m/ {1 v mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
0 Q) y( s1 u( v& P2 A mds: 1 up:standby- ]* d2 f! i% C/ c, E
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)- v+ y; N* K3 \, F
" I/ a0 H1 m) F) S data:. v/ @9 S: u3 G0 ?/ F% M- q% c
pools: 7 pools, 3712 pgs k2 g% m) B U9 b$ D+ a& c5 J
objects: 1.88M objects, 7.2 TiB
! t0 o Z; X( T; e usage: 14 TiB used, 129 TiB / 144 TiB avail6 {! v; c7 @6 x: C- A W" O; r
pgs: 3708 active+clean% Q8 C9 C' s/ X' T' y) I
4 active+clean+scrubbing+deep' j3 u5 e- C6 h" E
6 i, X$ ~/ e5 Q io:+ `, f6 `( L* @! P; o' ~
client: 782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr1 d& ~% ~0 R7 N2 v
; y& e) \% T* S% s" p/ t! S
等几十秒钟再查看:4 L0 @+ B% k c" m( x
* R$ J5 f' U1 a$ ^* J2 T[root@compute01 ~]# ceph -s- _/ b% n( d5 Z0 }1 V C! Z, k
cluster:
+ d8 Z; H3 ^! v# P& {8 \5 C9 y id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482 D8 p! x6 v6 w
health: HEALTH_OK: R& ?, d4 q4 W2 R- q) ]
* w' w2 z+ |. f0 Z, X! ~ h9 H services:
0 M9 Q- }4 H; D# I mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
1 h% W6 e0 m1 k4 z3 _/ Q$ {4 e mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
/ b! }! ?& n1 O# T mds: 1 up:standby
3 ?% K) q( f9 {0 L& T' ^ osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
1 `. U! ]& Y, a& d: t; E' Z& a7 @0 e8 w: n; C4 P7 `, a
data:9 E& @5 \% ~' D5 H6 @
pools: 7 pools, 3712 pgs
; t% D0 d% k2 |4 o/ `' x objects: 1.88M objects, 7.2 TiB; @" y* o/ ~* n1 l# E) J4 ^" k: p
usage: 14 TiB used, 129 TiB / 144 TiB avail
- B7 E3 |% G0 S* h# y- ~/ ~; r pgs: 3708 active+clean2 N/ x1 S+ A1 N* l
4 active+clean+scrubbing+deep
9 f1 s# I1 n7 T$ L8 A/ x- G1 [
, f8 g, {1 G% Q1 y5 ~: Z# f: m* ?4 ~ io:$ C" ]; K6 S1 g
client: 508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr* x* r6 t1 C1 F9 _2 V! @6 T1 \# a
' n2 v/ a& S Z+ B[root@compute01 ~]# ceph -s$ T8 _" X# i& [0 [& N
cluster:) h- J* ~$ X8 P- ~: U7 j; q
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
$ F% B- [* a6 r) o0 ]9 \. @ health: HEALTH_OK
: z3 y% E5 n# A; L
6 M+ T! D f4 y" F* b5 A9 N services:
* V5 o/ W/ J; E# I mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)
; s7 F$ K3 y2 G9 I mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05- G3 A9 ~$ a- D; [6 ^
mds: 1 up:standby+ ]( \9 B) z4 `0 C1 B4 {" i
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)! l8 q; A+ o/ ]6 w9 K
2 P4 }( _5 J/ U data:
( B8 \. n% {. ?( B& p. W1 A pools: 7 pools, 3712 pgs
6 Z+ k1 F$ l x% C objects: 1.88M objects, 7.2 TiB! i. q; p8 n& p& j, W
usage: 14 TiB used, 129 TiB / 144 TiB avail& P2 i- x: g0 C/ M; F
pgs: 3708 active+clean
: [4 l) Q4 p8 T& n6 F 4 active+clean+scrubbing+deep5 v& v6 J* H$ z) s5 b+ w
# w8 z& l9 j/ v, O- B; I5 ]
io:% J1 t9 T2 i3 y" H* Z
client: 680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr
0 K6 Q0 B. t; V j, {: t# a. {6 o
) H& n4 U1 v9 S8 \7 w2 W[root@compute01 ~]#
7 |/ P+ b# i' B. z% ~& p0 W
& A' a5 f8 @. M. J" Q状态正常,问题解决。
5 i+ {. V# B: a. B4 J3 w( k
7 \2 @% _9 ^1 X! F) J |
|