|
|
[root@compute01 ~]# ceph -s
1 U# C! p+ c/ t( ?/ D0 P4 ]: ^ cluster:2 f1 T* I# ?$ u
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482! J2 W" f: Z, R/ i" N5 L+ D
health: HEALTH_ERR
2 c) Y1 `& f8 V( r2 s! F 1 scrub errors
' [) o$ Q/ [. {$ k- C1 r# D2 D* H+ i Possible data damage: 1 pg inconsistent
0 M9 ~* Y8 _9 A/ ]/ Q# } 1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops
% e6 ~2 F2 h! S- x! L7 @
0 u' e: u- B2 H% J6 q' U- a5 A services:
; S5 ~' w+ }+ I mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
. b; z! k/ U& U+ L4 k8 j mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05. g9 C- {9 _. H; E. }; x/ l$ e
mds: 1 up:standby
/ I3 C5 A3 _7 v# [& C osd: 32 osds: 32 up (since 14h), 32 in (since 4M)/ {4 R }- u, @0 c9 W3 w8 c: F1 @& Q
' c% J/ K6 a/ e6 U9 g! P2 B! e f. X# l4 h
data:8 [3 @# ]; h5 b. n' D8 @4 @
pools: 7 pools, 3712 pgs$ P% s/ D T/ a4 {% H5 m, r
objects: 1.88M objects, 7.2 TiB Q0 ?- j5 Q& `7 {$ j& ]# T& i) O
usage: 14 TiB used, 129 TiB / 144 TiB avail
. z9 J( v. t( x. k( ? | pgs: 3709 active+clean7 \2 h k9 Z. `$ W* H! \1 _
2 active+clean+scrubbing+deep
& \0 P' Z- S+ B7 D* Q 1 active+clean+inconsistent
8 z2 q) h( r3 f7 \8 }- p8 `+ `' w: q. Q3 u- u7 A/ t y
io:
+ t8 C) I% \0 V2 C! v client: 1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr
' x1 Q; E. }0 X7 k1 D& c
' X# u4 p$ t0 s: T查看状态:
/ s2 D4 P e. A/ Z0 @& X, l1 a
- I0 A: U/ b' D$ l[root@compute01 ~]# ceph health detail
3 e% ~/ m" G8 Y' A, JHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
# ~9 G: q$ ?5 L$ K2 }& B' kOSD_SCRUB_ERRORS 1 scrub errors/ b, |$ ]3 B5 Q8 S. U6 f
PG_DAMAGED Possible data damage: 1 pg inconsistent: S, o: R# L, S( Q6 C
pg 9.167 is active+clean+inconsistent, acting [9,11]
. B! q/ F" L: c$ u7 y4 z1 wSLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
% Q1 g' N4 r3 n
5 ]' q; C, `( G1 L, l9 I8 v* e修复pg & C5 @& m/ O( a! ]0 O& [% r
[root@compute01 ~]# ceph pg repair 9.167# p# v! P! L# @1 H. k5 A
instructing pg 9.167 on osd.9 to repair/ X" w* z( x' a# |8 f7 ?4 B# \
[root@compute01 ~]# ceph health detail ; L: B" X6 j) A
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops" Q3 L% Y7 T* q; s: \
OSD_SCRUB_ERRORS 1 scrub errors2 r, T& e, g) b
PG_DAMAGED Possible data damage: 1 pg inconsistent% ]; j# Y6 W3 e+ a
pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
! V; }( i, E" C9 I+ WSLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops
& ?8 ]# X: p* s2 v5 T: a[root@compute01 ~]# ceph health detail 3 ^! Y: u) }2 z6 o, a
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
* I; Z# b0 U; ?+ b: AOSD_SCRUB_ERRORS 1 scrub errors
: e% D$ U+ |* E: Q; qPG_DAMAGED Possible data damage: 1 pg inconsistent
9 g: o5 A3 t6 r) f7 Q* _$ _ pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]+ X5 X) `0 x* d3 I( |. T6 f+ F4 g
SLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
, R1 o9 P% b9 u0 `2 A" e
z2 h+ x( K8 v: H1 a1 A9 j4 R[root@compute01 ~]# ceph -s' M/ M q* c( L H9 f
cluster:
c! w0 Z7 U! W4 I) n( f. i* q id: 2af51d38-db90-4a57-a43d-ea9f6ebd74820 H* G. k: G( m9 J. d4 L$ Q/ Q# Z2 S
health: HEALTH_WARN5 J0 [4 y+ Y2 V8 n: f
1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops
4 |6 B# v1 Z$ U( Y
- j9 u# J* ~+ C7 j services:
& }% h2 G4 |3 g1 A" X. U+ y mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
, ], V7 E. ^( s8 v# j mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
4 o3 S" l4 H3 _. u/ Q- ~! m mds: 1 up:standby2 o; y' P J* V- }0 U+ U4 n
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
2 [* b" B5 _1 O" o/ o+ D/ B7 A. ?8 z5 D; `: E- F6 _
data:& x5 v. H( L0 g
pools: 7 pools, 3712 pgs
! P! l$ I' Y. m$ l) C' @ objects: 1.88M objects, 7.2 TiB
1 {3 E6 [! a, l- t+ b usage: 14 TiB used, 129 TiB / 144 TiB avail
: Y+ s- x% P2 X7 L$ b pgs: 3710 active+clean
2 r6 q; D* t- G9 r 2 active+clean+scrubbing+deep
- g1 a8 X5 h/ U& }3 x( \. Z# W8 w( c; d- E
io:
5 [1 \: x3 ]. _& c+ x9 m client: 921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr) l/ r& P' H7 m
1 U& G# c; w' Y) T$ L" v
等会就出现正常的告警了。
5 {" m5 Y; y1 V' X) Y& y[root@compute01 ~]# ceph -s) y e+ a' W- {- Q
cluster:
% }8 S& x8 k. N+ [ id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
* N9 l6 ~ ]4 x5 W" d health: HEALTH_WARN
" h% F! ^1 o: P' ?: q; Y 1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops9 S1 ?; ?" K) K2 v
7 t9 k4 |4 G, J9 L) V/ J' b services:
8 X- c% I$ c; N' D mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
2 p% D3 r- P3 P( I, K mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
9 ^8 N- T" |2 j, q+ Q3 s mds: 1 up:standby
. J6 V% q3 {8 l' j8 r osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
+ s7 ]4 j, R: F/ S- G/ A' t9 N2 b( a3 K/ _0 D& G
data:
, o. @/ o: _/ l4 K, d pools: 7 pools, 3712 pgs
& L/ U8 ^& V( O1 @* W2 t objects: 1.88M objects, 7.2 TiB
- d- ?' s8 L: y6 P7 b# ]: k usage: 14 TiB used, 129 TiB / 144 TiB avail
0 b+ J6 ]- r* k& s6 R6 _5 L% [ pgs: 3710 active+clean6 i1 Z% E4 Y, _! j3 i, F
2 active+clean+scrubbing+deep, T1 Q) B, g/ L
G+ R. S" c+ H7 m' D) q1 A
io:/ H) j- d1 a" b7 }# e3 g
client: 698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr
3 p- @! p1 \9 B4 F
' k7 \+ q2 ~! x' U/ C[root@compute01 ~]# ceph -s+ j6 `1 Q5 N2 c& w0 J- O' B
cluster:' F, ^9 V8 `8 Y$ k [9 ?6 [
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
1 A" B* g# f0 F2 A6 z3 s# n health: HEALTH_WARN
) _) ~, |$ U& K6 E- m) U$ k3 A 1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
1 J% Z9 E' p/ N# Y: G9 f, E$ Q$ g6 r3 q; s6 q3 S
services:7 b, w2 g( ~% v2 i2 S0 H4 f
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)4 B9 j/ U7 \$ r- o9 |7 w9 |) k
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute057 J& L# N- z" d+ B
mds: 1 up:standby
7 p0 Q" ^+ v1 p* R. J8 T% X osd: 32 osds: 32 up (since 14h), 32 in (since 4M)/ I8 V' l+ M' K/ V ~+ f1 J0 N
: E, |/ c0 ?& c) |' T9 S! S2 H
data:
/ n/ X1 n; a- N$ D pools: 7 pools, 3712 pgs- U1 }5 }/ E. K, ]2 ~
objects: 1.88M objects, 7.2 TiB
8 ^0 \# k" d! v usage: 14 TiB used, 129 TiB / 144 TiB avail+ f8 t7 q/ [5 v% ^7 G5 \) z
pgs: 3710 active+clean* c- w& x9 @/ \ Y# S
2 active+clean+scrubbing+deep
/ b) g$ Y% R0 M u, o: O0 ?3 K* f# t" N
io:% A, A( D# `3 b. j7 a
client: 601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr
( _% g8 X' {: x9 j0 T' L
/ i3 P% A0 y% k: D8 N: C2 L4 \检查下时间同步:
: z: `8 ?0 C) t2 ][root@compute01 ~]# chronyc sources
' f) ~1 }% i' E- }) [210 Number of sources = 1
% U7 x& B, ]( G& T: vMS Name/IP address Stratum Poll Reach LastRx Last sample
2 x- I. y, ^( E% |===============================================================================
. [0 `* ~2 k' U^* 119.28.183.184 2 6 27 26 +3312us[+7317us] +/- 86ms0 C4 h$ J# }# H( ~
[root@compute01 ~]# ceph -s
* m8 [% x# E! }/ y cluster:
7 `* L5 c4 e5 @: y a; `5 V id: 2af51d38-db90-4a57-a43d-ea9f6ebd74828 X3 _3 h8 M0 x( N" i
health: HEALTH_WARN2 c- T9 \. e, Y
1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops" k* Z. x- J0 [5 O' k4 D
9 Z4 m- [$ r5 r# y. D/ i
services:
3 U- B. d% w! k0 M; m mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h). s8 ~* `, u0 H, t! r6 |# @
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05( Z8 Y: N' a+ ]
mds: 1 up:standby
A7 X v' h7 ?& _* V# } osd: 32 osds: 32 up (since 14h), 32 in (since 4M)) b- v; S, q& @
( A. q# X3 o0 V- V; Q4 a data:0 Q" I5 V: n$ J4 A7 N7 W" R
pools: 7 pools, 3712 pgs. W& T! S l9 ^' b l
objects: 1.88M objects, 7.2 TiB. b$ o! Y; e6 \" Y
usage: 14 TiB used, 129 TiB / 144 TiB avail
' ~ W3 i+ L& Q! _$ U' T% x( w9 w pgs: 3710 active+clean
, i! I# }& L0 W7 j6 Y1 e6 \; H 2 active+clean+scrubbing+deep- k$ U d r! [3 F! y2 j
" t8 [& ]8 @ o/ @* L' ^7 _% [
io:" |- V" D! V7 i0 s1 y8 H* S
client: 968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr
( {! C. h Q1 f) s3 ]0 p9 W, \- Q2 F
[root@compute01 ~]# ceph health detail
" y6 `. V4 P8 e5 S/ j. i' YHEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
. _: V6 \4 \6 V' S7 lSLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
9 X7 i; N# U8 V2 i% m' S, Y' D重启下mon.target服务:
1 _" L+ c* N( R; E; C$ r A! X[root@compute01 ~]# systemctl restart ceph-mon
" X; I/ M+ r6 q$ Wceph-mon@ ceph-mon@compute01.service ceph-mon.target \# h4 `9 E/ g I0 X
[root@compute01 ~]# systemctl restart ceph-mon.target
* [: }: ?( E) d查看状态:
0 _# b% d- a5 I; T4 K; k2 k0 T5 K[root@compute01 ~]# ceph -s# ?8 h0 {% ~- B% X: I5 C
cluster:
% d: O* h9 ]( g0 z id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
! h/ Y1 z( z0 ~' M; }( [+ y/ q health: HEALTH_WARN
8 |& c( D, L% m* x p 1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops
, {0 a0 I4 v- |5 y" ]1 K
& y& J! W! M( t) c services:
8 e3 G8 _0 x0 g$ s. S% F* | mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)7 N3 L/ ^2 q" f8 e% p
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
- Q. c$ T; N. k: n: E mds: 1 up:standby
2 ]4 f) Z- w" F" N& u+ _* I" Y osd: 32 osds: 32 up (since 14h), 32 in (since 4M); \, T: u. S* Z9 K' `
, |" w# a3 f+ V; T( m data:3 s5 d& q1 W; W! [# ?- I/ ^
pools: 7 pools, 3712 pgs! U K C$ J: K) n( U; m6 V
objects: 1.88M objects, 7.2 TiB& c |; H/ d. X; \ o$ X( o. P" h
usage: 14 TiB used, 129 TiB / 144 TiB avail
* L0 z! M i3 Q pgs: 3708 active+clean3 o* R% a8 B: ~( M
4 active+clean+scrubbing+deep
5 [" R3 p, b' A: ? |+ z; ]
' i( \& \2 B! n- _/ t! ?* p io:
! U0 Y9 ]2 m4 D% U- a5 h( K client: 782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr
, F8 Y- q$ l N% c
. c# H% Z ] [9 f. {, [等几十秒钟再查看:8 n2 P" m3 r0 I5 E8 L; t% I$ o
0 Q$ W3 E4 H# M: X, N6 J7 E5 I* b* g0 X[root@compute01 ~]# ceph -s
; j& n8 L( J2 x4 d! t/ V cluster:
% a+ d7 S* H: k; t0 f; t2 Z id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482" A- a' R% e* C
health: HEALTH_OK
: x" a3 t- A3 B) p1 k. s/ O/ b# T+ C" K. J g! ^4 W
services:
+ X) e9 e9 b2 z5 V/ Z mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
7 p; h( W; K& q- ] mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute056 m( m+ w; L, N8 l0 I$ h% n' ]
mds: 1 up:standby: a* ?$ d8 E' [ e, g: ~( E% _$ s
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
, k$ B1 F: s# W9 a, a
" f. t" j" a+ P% g data:
" B7 X8 T1 _* X2 }% g pools: 7 pools, 3712 pgs& y% [1 e! ]1 L C+ K8 W) H
objects: 1.88M objects, 7.2 TiB9 t4 ?, v% f5 t) X# C
usage: 14 TiB used, 129 TiB / 144 TiB avail
- J" g( z& z- w" I( z pgs: 3708 active+clean
4 _# ^6 L; }0 Q; Q u# t 4 active+clean+scrubbing+deep
9 [# h) H( w0 ^+ [; h0 O/ G7 |4 b, ~! a" m1 p: I9 P9 ^
io:
5 z0 \+ R. m: ~( _ client: 508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr: c8 v, Q2 | g/ N& f
! J* J, a* E6 M( m- A$ w[root@compute01 ~]# ceph -s3 X. Q. g: h4 n
cluster:. ]: n/ ^4 D2 d& d9 S+ H0 s
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482% d$ `6 U- g0 o5 t- S6 d
health: HEALTH_OK
2 T( a$ }) u: P, m& u
4 }6 s* J$ x3 p; w1 U6 d/ k, r2 G/ j. I' M services:" {' ~# c) c: x' ?
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)$ z2 E5 v2 @0 _+ x& S
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute056 f( g0 v- k7 ^. y+ ]7 {* h3 S
mds: 1 up:standby# b5 @) @4 i; a" q) S
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)2 X& s0 m# }- }) R& F" z6 O/ G
; {( j5 x& h9 \% V E& s# G
data:+ \% q, C! t+ O, \
pools: 7 pools, 3712 pgs
" Y9 J/ x! a7 q( @: n. A5 w5 j objects: 1.88M objects, 7.2 TiB
1 V: e( C$ l0 ~+ C" W. X usage: 14 TiB used, 129 TiB / 144 TiB avail
0 m2 C9 E! }' n. U pgs: 3708 active+clean
' L( ~5 x$ D' L 4 active+clean+scrubbing+deep7 z/ Y& R" z3 |0 O: Q
, Q% S# p1 n! H) r0 Z C N
io:/ p O) a6 @- P
client: 680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr: L0 Y L: D- f, s
1 ]$ g% U: s! G
[root@compute01 ~]#
7 J) | @0 v9 c: z- u8 p0 C9 a
1 a3 h7 X! L2 N: |状态正常,问题解决。
4 {9 h( n0 E/ R: T# T7 i* u* n% }* Y( ]8 Z8 ^
|
|