|
|
[root@compute01 ~]# ceph -s1 q& A4 j( z- Y' ]4 G0 U
cluster:9 s8 t4 @8 o0 z" G! q, C/ E( j8 l2 i
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
, s2 D& W" a2 E! H5 z; T% V health: HEALTH_ERR
- d6 j6 m) j9 E! n1 ]0 ^ 1 scrub errors; z( a" p% T* N* H, s3 H
Possible data damage: 1 pg inconsistent
0 v% v( L+ \4 F4 S; K 1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops4 h, C7 _& S* T4 ?/ @$ o" Z7 Y
, e( O. c6 h. H4 Q8 D' \+ f
services:: L# W8 i H/ v% D- G2 E
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)# [7 j- p" u. [/ ^7 p4 }
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute053 _0 L2 p' w) b9 A' h
mds: 1 up:standby9 s7 z; @+ M( v7 m" F
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)# y% G1 K. i" T1 P/ m# @- g2 T
8 |7 X; g6 q* Y7 U4 o data:
g6 i1 T8 p: H0 S9 }4 M pools: 7 pools, 3712 pgs
- j+ J' L& E$ n2 k. T objects: 1.88M objects, 7.2 TiB
( {, Q. P( f; Q. R3 m. Y/ p usage: 14 TiB used, 129 TiB / 144 TiB avail
; @: b4 [" v' p$ W& w- Z. @ pgs: 3709 active+clean4 @0 D5 w6 b. I: P6 g+ l0 y) d
2 active+clean+scrubbing+deep. ?0 e( w: w% f7 G5 H
1 active+clean+inconsistent
8 p& G% S5 c1 l) w& N* Y8 k" f" A: Y
io:( `: g& J3 R1 |1 l# M* z2 ]- N0 @
client: 1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr
* ]3 T- X D" V2 o' j+ k+ V4 Y6 q
查看状态:9 G3 v( y1 y9 Q/ D. \
* h! W, D; d' l' k2 [/ W4 r" p[root@compute01 ~]# ceph health detail & q; f! q0 X; r$ V4 Y1 S8 r. a/ c/ z
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops/ P# F* x/ w9 Q7 w6 u' {& j- a7 m
OSD_SCRUB_ERRORS 1 scrub errors. {2 d+ D9 |% N2 `1 l
PG_DAMAGED Possible data damage: 1 pg inconsistent
7 X. b" A0 H" a d" Y pg 9.167 is active+clean+inconsistent, acting [9,11]
/ Z! O8 K# C0 f3 MSLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
% J R. X- U$ [' n# D2 p; ^$ w+ j7 b' C) z" _+ @
修复pg 5 D& \8 j# O1 v0 |: T
[root@compute01 ~]# ceph pg repair 9.1675 L1 ^. L0 [% L4 X5 r) Y0 X6 }* i! c" E
instructing pg 9.167 on osd.9 to repair
9 {% E" k; H* h% U1 O4 p[root@compute01 ~]# ceph health detail
' [9 O3 F, W0 J4 H0 d- AHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops4 l" t, ^7 v1 o
OSD_SCRUB_ERRORS 1 scrub errors; |9 i" S% G. f- b1 h
PG_DAMAGED Possible data damage: 1 pg inconsistent
0 G& ^( B* N1 A) s' d0 `8 h pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
6 ~6 Y% E9 R) F* K" r" pSLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops, i9 V& W$ K% G# p' d
[root@compute01 ~]# ceph health detail " a6 ~! ~, G& B- _, l
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops( J6 W' Q6 b3 v$ Y2 F6 P
OSD_SCRUB_ERRORS 1 scrub errors
; D2 f3 t/ n# N( _PG_DAMAGED Possible data damage: 1 pg inconsistent
' g" `" k+ v y& k2 n0 y pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
1 g7 w6 H* K- S& v) nSLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
% E* V0 [3 k S; y
: A7 _ S% E1 M[root@compute01 ~]# ceph -s
* R6 J0 r7 {% n! P3 [+ V5 p9 N r9 }6 W: _ cluster:* C! `0 H. n5 E, k) W- E+ u
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
& D9 B; _& J, Z5 s8 ~ health: HEALTH_WARN5 ]( W8 o" w( u$ @$ d% h
1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops( }3 R3 O. V3 e- s% A
8 H1 F9 j% N" M services:
% [, ~2 R4 a F2 x2 G$ d6 w; E. K mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h) U( s' @2 ]2 [( l
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
9 h, k& v8 T6 m) ~3 ^ mds: 1 up:standby8 m d0 R/ i& d" c+ ]. e
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
+ p9 t; W$ u4 N5 y: c( {' U' \4 C! w- N+ f
data:& _6 d- g; @3 V% q5 d$ |
pools: 7 pools, 3712 pgs$ x" T& }" S- ]$ _/ @( X
objects: 1.88M objects, 7.2 TiB
b% T+ Q* q5 I2 k4 {3 v' C; p% k8 T usage: 14 TiB used, 129 TiB / 144 TiB avail
0 n$ d) @' S0 ^5 ^& t+ t8 d pgs: 3710 active+clean
: g1 _. n. b. g" C5 U/ I5 e 2 active+clean+scrubbing+deep3 k+ s! O$ K7 U/ {& K7 U: _
* i2 a; Q! X0 h io:' U. O& L" D' b& J
client: 921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr
6 P0 v7 x; O: U+ e
5 B9 D& {* v" a/ a9 Z* y等会就出现正常的告警了。+ u3 ^" c3 j) ~2 B- N
[root@compute01 ~]# ceph -s$ o8 n" `; V/ l! c- d
cluster:
( [3 G3 z/ H' z" F1 n- j5 ` id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
" Y8 ?* w2 ^( k" O! z health: HEALTH_WARN
. f: X/ z$ U5 g" k* _ 1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
+ J; g d! a- [/ g6 B) \
; D0 U4 L. q/ U8 v4 ~' _ services:
+ C9 E' g5 w( o4 [ mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)! ?0 y+ k3 w5 D
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
( E ]: n! h( s" H7 E4 g" X mds: 1 up:standby
9 d; B% L' q) Q6 v$ M osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
3 O" s+ o/ ^* ]* D5 j, Y% B8 T+ B, k% y" V
data:
. D* V l) Q; H- E pools: 7 pools, 3712 pgs) H: Y1 H4 K) q$ e
objects: 1.88M objects, 7.2 TiB
- u" I3 r& ^& h7 M) v8 W! x usage: 14 TiB used, 129 TiB / 144 TiB avail
3 U7 c; y4 Q1 t g' f9 | pgs: 3710 active+clean+ ~$ d5 w# `& j
2 active+clean+scrubbing+deep% C( Y4 u8 m0 f* d% m% I
! I6 p; e- q3 K io:1 B& j$ V; j6 _8 X3 U2 Y7 }
client: 698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr5 V& w2 }) R: A% X
9 o7 C% L& S) h& e[root@compute01 ~]# ceph -s
9 ~8 r+ X x J% F cluster:
% f6 d2 `1 F a* h0 E+ h, H2 b9 M2 ^! V id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
8 ?. D4 ]& y) U* h3 K( p) V health: HEALTH_WARN
. Q" Q; u! i' B) g6 M: [# _& l 1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops; ~6 t( c+ t3 O5 L4 w. J
: K6 { N; L8 H* H. k- {" ] services:
) J# L+ v( ?( N) v2 t mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
, n. m; x& |6 p8 K1 g# O mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
) f8 J R/ W8 _! n) Q2 U7 l mds: 1 up:standby i: s( x- ?% c9 B( ?( x2 S
osd: 32 osds: 32 up (since 14h), 32 in (since 4M)0 w( b2 ]" f3 L8 C
0 Q* \+ K7 L, L. Z
data:7 e, S$ x- e- O( G6 @' j
pools: 7 pools, 3712 pgs
$ l$ y! z# Q' V. t, u objects: 1.88M objects, 7.2 TiB ~6 I8 w4 r3 @" x7 R9 t
usage: 14 TiB used, 129 TiB / 144 TiB avail& W: n& `" h, y2 v
pgs: 3710 active+clean
+ M2 A L4 D! {6 u: U0 z 2 active+clean+scrubbing+deep+ u, R7 Q+ W0 [, ~3 f5 h" |. T$ i7 e6 D
, U* Z" W3 b# q, A2 N6 K6 g; l+ { io:( m/ `. q T9 \
client: 601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr q) |4 g# `1 t5 }
# ]0 P9 {( Z1 p8 N4 b, v0 r检查下时间同步:
4 B4 f$ E o5 @% `8 v- d[root@compute01 ~]# chronyc sources
& q" ?# y. Q& Q210 Number of sources = 1
( T j* z. @2 r4 ~% MMS Name/IP address Stratum Poll Reach LastRx Last sample
# A: M8 @. ^5 O/ j; P# |( R! U===============================================================================4 F9 k6 U$ ]" w; O/ u, B
^* 119.28.183.184 2 6 27 26 +3312us[+7317us] +/- 86ms1 J" T2 L5 v6 y6 f# R
[root@compute01 ~]# ceph -s
: w3 P9 P- T0 N6 R. H8 n cluster:7 ~# i2 r) d' n& ]- s1 p
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482. I0 G4 Z6 e. \% d$ _6 D
health: HEALTH_WARN
2 \+ q% u" U" T- X! l" K# b: A 1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops: m& M3 r' i! W& c
; K0 x6 u8 t: T+ J6 o6 X7 Q* \ services:0 @$ Z. q2 o- g, W0 ^# Q. P, h
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)0 p: C, q1 J- j, @6 a( H/ e {
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05 b9 U/ ~; u4 O2 S: @
mds: 1 up:standby
& |/ |, t' z9 X% [6 J osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
3 S8 d Q+ A1 ^* H# b7 x
( R/ Z# M: Z( n7 G. d( E$ } data:
' D9 N% t- ]& M( i* D pools: 7 pools, 3712 pgs
5 H+ T( M: L9 d; B( k3 ~ objects: 1.88M objects, 7.2 TiB# S: ^1 W. V( g o$ z- @
usage: 14 TiB used, 129 TiB / 144 TiB avail z0 z* c! p) Q! Y( \
pgs: 3710 active+clean
/ y2 K# V; S9 e4 _ 2 active+clean+scrubbing+deep! i! s* k: u! b+ Y6 y5 R8 T0 Z
; E. X6 _7 a+ a/ W; ~9 l( F io:
# C( i3 z8 j* N client: 968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr
7 y& T R9 p7 \7 t9 b& K. o4 E" K# c
[root@compute01 ~]# ceph health detail
" B; u7 N! W1 P: R; x0 OHEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops3 c, W' O4 k6 K6 Q6 _
SLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops3 t2 }: u5 O/ K3 H, [, O
重启下mon.target服务:
9 E& U, t* [% I( }2 e% Q+ ^[root@compute01 ~]# systemctl restart ceph-mon, ~% T$ F5 g/ U. U D$ g
ceph-mon@ ceph-mon@compute01.service ceph-mon.target! K+ e3 G. x0 [
[root@compute01 ~]# systemctl restart ceph-mon.target
& i3 d9 T3 f* B1 H2 v查看状态:
: b7 Z7 @8 C' N+ z[root@compute01 ~]# ceph -s
' H7 Y1 @2 I$ W7 ^/ g& V cluster:+ {0 b1 b5 Z) ~
id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482# f" [6 [+ y4 j0 U9 z; Y
health: HEALTH_WARN: J' U% ~( o4 _8 k
1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops1 D! U. `* A9 Q* R! g- ?
8 p3 C& a* \" r* Z* Q8 ? services:2 O2 P. _2 {- W/ }& t5 h% n
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)4 u) U$ Y8 D* B% ?5 @; v. I9 W8 ~
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05; g F' m: J1 [3 k1 E
mds: 1 up:standby
+ x" Y p1 z4 y# A( Q) L osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
1 j2 p1 ^: X0 l" i$ W% J& @" ~' A+ s: z- }9 G6 l1 a/ ~$ ~( A
data:
. T' r) R, \) j5 ^+ n! N pools: 7 pools, 3712 pgs
A1 H4 {$ Z2 F" n" a objects: 1.88M objects, 7.2 TiB: \( m, [2 j1 P9 B
usage: 14 TiB used, 129 TiB / 144 TiB avail
5 S( ?$ _' Y" C+ m% o pgs: 3708 active+clean
+ l. N6 s& r% k: S, k- a 4 active+clean+scrubbing+deep, D0 k9 e! R4 y
. q. g! V5 i0 d- k4 s3 q io:( g7 Q; F& |) q+ S; e/ D+ D
client: 782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr" G8 P. z( s* u) {; P; z1 ~# w' m
. r$ i0 X" Q- T9 i5 E3 M
等几十秒钟再查看:8 G# m# e( D" v# B" a
- | A5 P" F/ }1 q$ H/ S
[root@compute01 ~]# ceph -s5 u* S1 _5 T1 u
cluster:
) E% P! _ u9 S id: 2af51d38-db90-4a57-a43d-ea9f6ebd7482
( X' Q0 Q6 ~# U2 b health: HEALTH_OK
7 W$ z' t3 {( F
9 |, X/ C: B" @: G% a services: S! ^( F' ? X0 ` o P! ]
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
5 V+ T( e& L8 [ mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05. p( w9 s; ]3 P) z
mds: 1 up:standby
1 T4 v7 B7 @6 x% e0 O osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
: S6 \& p' \* J8 i; l' Q5 P2 J4 g2 f3 X2 G/ S' l* C
data:. m5 o& `, C& f$ X
pools: 7 pools, 3712 pgs
! Y3 ^8 p: f/ E+ f/ ?+ z* v objects: 1.88M objects, 7.2 TiB
- n1 `1 \+ i( n1 | usage: 14 TiB used, 129 TiB / 144 TiB avail
4 r O8 ~$ R; {- k6 ?- D" V4 {6 E pgs: 3708 active+clean: W1 z4 e# B9 }9 M6 z3 b# t
4 active+clean+scrubbing+deep
& X- c; k7 O* L* z% O, i
- [7 d2 e( ~/ @# O# `$ {4 Z& D io:
( ?0 f) ~ m! d {- S5 a8 e* \ client: 508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr
6 l' W5 l6 |1 \9 |: W [9 K) q* I% m+ P N
[root@compute01 ~]# ceph -s
# c$ ]8 X! d2 a2 Q3 d. b cluster:' b' w0 _0 p* a1 z9 M; |
id: 2af51d38-db90-4a57-a43d-ea9f6ebd74821 A$ m: F+ v( c5 B1 u1 f j- r
health: HEALTH_OK
3 a9 a3 Q* q4 |; u9 v7 s2 z5 K" Z" {( q- V! f- P8 S4 c% [4 W
services:$ ^8 C8 `7 o, z5 O% t9 o0 `
mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)! X2 p6 J0 A4 O. k' J
mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
$ |- e! ~; n4 H8 M$ u& @ mds: 1 up:standby+ I7 D% [, H% m! M" y
osd: 32 osds: 32 up (since 14h), 32 in (since 4M) Z: V" P7 [$ ]& N3 ]1 o
8 o: U0 ]* e# G0 q+ t' Q4 G; l
data:8 c6 i6 D8 K9 c$ }5 _
pools: 7 pools, 3712 pgs
8 b5 P3 L& z$ N3 G' z objects: 1.88M objects, 7.2 TiB
p" v2 b" ^* w) v* R usage: 14 TiB used, 129 TiB / 144 TiB avail9 H. A3 F3 L- ]1 \% {
pgs: 3708 active+clean
/ `' w. W2 U9 E. i, C) k 4 active+clean+scrubbing+deep
5 j* x) l: i* z1 _3 q9 y
8 F9 S2 N \% L7 g io: f! q! i0 f$ N1 _& c. s! e
client: 680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr6 F5 F( C: G. F" P3 Y9 V# l3 z( R
" }) C6 f1 G% }: C& A[root@compute01 ~]# ; [. M+ L: A7 g0 o# u
" x4 s+ Y+ i& G状态正常,问题解决。
$ @) }7 q% j7 m8 ~' r
$ V2 @* z6 w8 y$ J- @ |
|