找回密码
 注册
查看: 559|回复: 0

HEALTH_ERR 1 scrub errors Possible data damage: 1 pg inconsistent 处理过程并恢复

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-9-16 15:21:58 | 显示全部楼层 |阅读模式
[root@compute01 ~]# ceph -s
1 U# C! p+ c/ t( ?/ D0 P4 ]: ^  cluster:2 f1 T* I# ?$ u
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482! J2 W" f: Z, R/ i" N5 L+ D
    health: HEALTH_ERR
2 c) Y1 `& f8 V( r2 s! F            1 scrub errors
' [) o$ Q/ [. {$ k- C1 r# D2 D* H+ i            Possible data damage: 1 pg inconsistent
0 M9 ~* Y8 _9 A/ ]/ Q# }            1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops
% e6 ~2 F2 h! S- x! L7 @
0 u' e: u- B2 H% J6 q' U- a5 A  services:
; S5 ~' w+ }+ I    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
. b; z! k/ U& U+ L4 k8 j    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05. g9 C- {9 _. H; E. }; x/ l$ e
    mds:  1 up:standby
/ I3 C5 A3 _7 v# [& C    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)/ {4 R  }- u, @0 c9 W3 w8 c: F1 @& Q
' c% J/ K6 a/ e6 U9 g! P2 B! e  f. X# l4 h
  data:8 [3 @# ]; h5 b. n' D8 @4 @
    pools:   7 pools, 3712 pgs$ P% s/ D  T/ a4 {% H5 m, r
    objects: 1.88M objects, 7.2 TiB  Q0 ?- j5 Q& `7 {$ j& ]# T& i) O
    usage:   14 TiB used, 129 TiB / 144 TiB avail
. z9 J( v. t( x. k( ?  |    pgs:     3709 active+clean7 \2 h  k9 Z. `$ W* H! \1 _
             2    active+clean+scrubbing+deep
& \0 P' Z- S+ B7 D* Q             1    active+clean+inconsistent
8 z2 q) h( r3 f7 \8 }- p8 `+ `' w: q. Q3 u- u7 A/ t  y
  io:
+ t8 C) I% \0 V2 C! v    client:   1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr
' x1 Q; E. }0 X7 k1 D& c
' X# u4 p$ t0 s: T查看状态:
/ s2 D4 P  e. A/ Z0 @& X, l1 a
- I0 A: U/ b' D$ l[root@compute01 ~]# ceph health detail
3 e% ~/ m" G8 Y' A, JHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
# ~9 G: q$ ?5 L$ K2 }& B' kOSD_SCRUB_ERRORS 1 scrub errors/ b, |$ ]3 B5 Q8 S. U6 f
PG_DAMAGED Possible data damage: 1 pg inconsistent: S, o: R# L, S( Q6 C
    pg 9.167 is active+clean+inconsistent, acting [9,11]
. B! q/ F" L: c$ u7 y4 z1 wSLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
% Q1 g' N4 r3 n
5 ]' q; C, `( G1 L, l9 I8 v* e修复pg & C5 @& m/ O( a! ]0 O& [% r
[root@compute01 ~]# ceph pg repair 9.167# p# v! P! L# @1 H. k5 A
instructing pg 9.167 on osd.9 to repair/ X" w* z( x' a# |8 f7 ?4 B# \
[root@compute01 ~]# ceph health detail ; L: B" X6 j) A
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops" Q3 L% Y7 T* q; s: \
OSD_SCRUB_ERRORS 1 scrub errors2 r, T& e, g) b
PG_DAMAGED Possible data damage: 1 pg inconsistent% ]; j# Y6 W3 e+ a
    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
! V; }( i, E" C9 I+ WSLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops
& ?8 ]# X: p* s2 v5 T: a[root@compute01 ~]# ceph health detail 3 ^! Y: u) }2 z6 o, a
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
* I; Z# b0 U; ?+ b: AOSD_SCRUB_ERRORS 1 scrub errors
: e% D$ U+ |* E: Q; qPG_DAMAGED Possible data damage: 1 pg inconsistent
9 g: o5 A3 t6 r) f7 Q* _$ _    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]+ X5 X) `0 x* d3 I( |. T6 f+ F4 g
SLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
, R1 o9 P% b9 u0 `2 A" e
  z2 h+ x( K8 v: H1 a1 A9 j4 R[root@compute01 ~]# ceph -s' M/ M  q* c( L  H9 f
  cluster:
  c! w0 Z7 U! W4 I) n( f. i* q    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74820 H* G. k: G( m9 J. d4 L$ Q/ Q# Z2 S
    health: HEALTH_WARN5 J0 [4 y+ Y2 V8 n: f
            1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops
4 |6 B# v1 Z$ U( Y
- j9 u# J* ~+ C7 j  services:
& }% h2 G4 |3 g1 A" X. U+ y    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
, ], V7 E. ^( s8 v# j    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
4 o3 S" l4 H3 _. u/ Q- ~! m    mds:  1 up:standby2 o; y' P  J* V- }0 U+ U4 n
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
2 [* b" B5 _1 O" o/ o+ D/ B7 A. ?8 z5 D; `: E- F6 _
  data:& x5 v. H( L0 g
    pools:   7 pools, 3712 pgs
! P! l$ I' Y. m$ l) C' @    objects: 1.88M objects, 7.2 TiB
1 {3 E6 [! a, l- t+ b    usage:   14 TiB used, 129 TiB / 144 TiB avail
: Y+ s- x% P2 X7 L$ b    pgs:     3710 active+clean
2 r6 q; D* t- G9 r             2    active+clean+scrubbing+deep
- g1 a8 X5 h/ U& }3 x( \. Z# W8 w( c; d- E
  io:
5 [1 \: x3 ]. _& c+ x9 m    client:   921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr) l/ r& P' H7 m
1 U& G# c; w' Y) T$ L" v
等会就出现正常的告警了。
5 {" m5 Y; y1 V' X) Y& y[root@compute01 ~]# ceph -s) y  e+ a' W- {- Q
  cluster:
% }8 S& x8 k. N+ [    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
* N9 l6 ~  ]4 x5 W" d    health: HEALTH_WARN
" h% F! ^1 o: P' ?: q; Y            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops9 S1 ?; ?" K) K2 v

7 t9 k4 |4 G, J9 L) V/ J' b  services:
8 X- c% I$ c; N' D    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
2 p% D3 r- P3 P( I, K    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
9 ^8 N- T" |2 j, q+ Q3 s    mds:  1 up:standby
. J6 V% q3 {8 l' j8 r    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
+ s7 ]4 j, R: F/ S- G/ A' t9 N2 b( a3 K/ _0 D& G
  data:
, o. @/ o: _/ l4 K, d    pools:   7 pools, 3712 pgs
& L/ U8 ^& V( O1 @* W2 t    objects: 1.88M objects, 7.2 TiB
- d- ?' s8 L: y6 P7 b# ]: k    usage:   14 TiB used, 129 TiB / 144 TiB avail
0 b+ J6 ]- r* k& s6 R6 _5 L% [    pgs:     3710 active+clean6 i1 Z% E4 Y, _! j3 i, F
             2    active+clean+scrubbing+deep, T1 Q) B, g/ L
  G+ R. S" c+ H7 m' D) q1 A
  io:/ H) j- d1 a" b7 }# e3 g
    client:   698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr
3 p- @! p1 \9 B4 F
' k7 \+ q2 ~! x' U/ C[root@compute01 ~]# ceph -s+ j6 `1 Q5 N2 c& w0 J- O' B
  cluster:' F, ^9 V8 `8 Y$ k  [9 ?6 [
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
1 A" B* g# f0 F2 A6 z3 s# n    health: HEALTH_WARN
) _) ~, |$ U& K6 E- m) U$ k3 A            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
1 J% Z9 E' p/ N# Y: G9 f, E$ Q$ g6 r3 q; s6 q3 S
  services:7 b, w2 g( ~% v2 i2 S0 H4 f
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)4 B9 j/ U7 \$ r- o9 |7 w9 |) k
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute057 J& L# N- z" d+ B
    mds:  1 up:standby
7 p0 Q" ^+ v1 p* R. J8 T% X    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)/ I8 V' l+ M' K/ V  ~+ f1 J0 N
: E, |/ c0 ?& c) |' T9 S! S2 H
  data:
/ n/ X1 n; a- N$ D    pools:   7 pools, 3712 pgs- U1 }5 }/ E. K, ]2 ~
    objects: 1.88M objects, 7.2 TiB
8 ^0 \# k" d! v    usage:   14 TiB used, 129 TiB / 144 TiB avail+ f8 t7 q/ [5 v% ^7 G5 \) z
    pgs:     3710 active+clean* c- w& x9 @/ \  Y# S
             2    active+clean+scrubbing+deep
/ b) g$ Y% R0 M  u, o: O0 ?3 K* f# t" N
  io:% A, A( D# `3 b. j7 a
    client:   601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr
( _% g8 X' {: x9 j0 T' L
/ i3 P% A0 y% k: D8 N: C2 L4 \检查下时间同步:
: z: `8 ?0 C) t2 ][root@compute01 ~]# chronyc  sources
' f) ~1 }% i' E- }) [210 Number of sources = 1
% U7 x& B, ]( G& T: vMS Name/IP address         Stratum Poll Reach LastRx Last sample               
2 x- I. y, ^( E% |===============================================================================
. [0 `* ~2 k' U^* 119.28.183.184                2   6    27    26  +3312us[+7317us] +/-   86ms0 C4 h$ J# }# H( ~
[root@compute01 ~]# ceph -s
* m8 [% x# E! }/ y  cluster:
7 `* L5 c4 e5 @: y  a; `5 V    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74828 X3 _3 h8 M0 x( N" i
    health: HEALTH_WARN2 c- T9 \. e, Y
            1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops" k* Z. x- J0 [5 O' k4 D
9 Z4 m- [$ r5 r# y. D/ i
  services:
3 U- B. d% w! k0 M; m    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h). s8 ~* `, u0 H, t! r6 |# @
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05( Z8 Y: N' a+ ]
    mds:  1 up:standby
  A7 X  v' h7 ?& _* V# }    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)) b- v; S, q& @

( A. q# X3 o0 V- V; Q4 a  data:0 Q" I5 V: n$ J4 A7 N7 W" R
    pools:   7 pools, 3712 pgs. W& T! S  l9 ^' b  l
    objects: 1.88M objects, 7.2 TiB. b$ o! Y; e6 \" Y
    usage:   14 TiB used, 129 TiB / 144 TiB avail
' ~  W3 i+ L& Q! _$ U' T% x( w9 w    pgs:     3710 active+clean
, i! I# }& L0 W7 j6 Y1 e6 \; H             2    active+clean+scrubbing+deep- k$ U  d  r! [3 F! y2 j
" t8 [& ]8 @  o/ @* L' ^7 _% [
  io:" |- V" D! V7 i0 s1 y8 H* S
    client:   968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr
( {! C. h  Q1 f) s3 ]0 p9 W, \- Q2 F
[root@compute01 ~]# ceph health detail
" y6 `. V4 P8 e5 S/ j. i' YHEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
. _: V6 \4 \6 V' S7 lSLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
9 X7 i; N# U8 V2 i% m' S, Y' D重启下mon.target服务:
1 _" L+ c* N( R; E; C$ r  A! X[root@compute01 ~]# systemctl restart ceph-mon
" X; I/ M+ r6 q$ Wceph-mon@                   ceph-mon@compute01.service  ceph-mon.target  \# h4 `9 E/ g  I0 X
[root@compute01 ~]# systemctl restart ceph-mon.target
* [: }: ?( E) d查看状态:
0 _# b% d- a5 I; T4 K; k2 k0 T5 K[root@compute01 ~]# ceph -s# ?8 h0 {% ~- B% X: I5 C
  cluster:
% d: O* h9 ]( g0 z    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
! h/ Y1 z( z0 ~' M; }( [+ y/ q    health: HEALTH_WARN
8 |& c( D, L% m* x  p            1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops
, {0 a0 I4 v- |5 y" ]1 K
& y& J! W! M( t) c  services:
8 e3 G8 _0 x0 g$ s. S% F* |    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)7 N3 L/ ^2 q" f8 e% p
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
- Q. c$ T; N. k: n: E    mds:  1 up:standby
2 ]4 f) Z- w" F" N& u+ _* I" Y    osd: 32 osds: 32 up (since 14h), 32 in (since 4M); \, T: u. S* Z9 K' `

, |" w# a3 f+ V; T( m  data:3 s5 d& q1 W; W! [# ?- I/ ^
    pools:   7 pools, 3712 pgs! U  K  C$ J: K) n( U; m6 V
    objects: 1.88M objects, 7.2 TiB& c  |; H/ d. X; \  o$ X( o. P" h
    usage:   14 TiB used, 129 TiB / 144 TiB avail
* L0 z! M  i3 Q    pgs:     3708 active+clean3 o* R% a8 B: ~( M
             4    active+clean+scrubbing+deep
5 [" R3 p, b' A: ?  |+ z; ]
' i( \& \2 B! n- _/ t! ?* p  io:
! U0 Y9 ]2 m4 D% U- a5 h( K    client:   782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr
, F8 Y- q$ l  N% c
. c# H% Z  ]  [9 f. {, [等几十秒钟再查看:8 n2 P" m3 r0 I5 E8 L; t% I$ o

0 Q$ W3 E4 H# M: X, N6 J7 E5 I* b* g0 X[root@compute01 ~]# ceph -s
; j& n8 L( J2 x4 d! t/ V  cluster:
% a+ d7 S* H: k; t0 f; t2 Z    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482" A- a' R% e* C
    health: HEALTH_OK
: x" a3 t- A3 B) p1 k. s/ O/ b# T+ C" K. J  g! ^4 W
  services:
+ X) e9 e9 b2 z5 V/ Z    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
7 p; h( W; K& q- ]    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute056 m( m+ w; L, N8 l0 I$ h% n' ]
    mds:  1 up:standby: a* ?$ d8 E' [  e, g: ~( E% _$ s
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
, k$ B1 F: s# W9 a, a
" f. t" j" a+ P% g  data:
" B7 X8 T1 _* X2 }% g    pools:   7 pools, 3712 pgs& y% [1 e! ]1 L  C+ K8 W) H
    objects: 1.88M objects, 7.2 TiB9 t4 ?, v% f5 t) X# C
    usage:   14 TiB used, 129 TiB / 144 TiB avail
- J" g( z& z- w" I( z    pgs:     3708 active+clean
4 _# ^6 L; }0 Q; Q  u# t             4    active+clean+scrubbing+deep
9 [# h) H( w0 ^+ [; h0 O/ G7 |4 b, ~! a" m1 p: I9 P9 ^
  io:
5 z0 \+ R. m: ~( _    client:   508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr: c8 v, Q2 |  g/ N& f

! J* J, a* E6 M( m- A$ w[root@compute01 ~]# ceph -s3 X. Q. g: h4 n
  cluster:. ]: n/ ^4 D2 d& d9 S+ H0 s
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482% d$ `6 U- g0 o5 t- S6 d
    health: HEALTH_OK
2 T( a$ }) u: P, m& u
4 }6 s* J$ x3 p; w1 U6 d/ k, r2 G/ j. I' M  services:" {' ~# c) c: x' ?
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)$ z2 E5 v2 @0 _+ x& S
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute056 f( g0 v- k7 ^. y+ ]7 {* h3 S
    mds:  1 up:standby# b5 @) @4 i; a" q) S
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)2 X& s0 m# }- }) R& F" z6 O/ G
; {( j5 x& h9 \% V  E& s# G
  data:+ \% q, C! t+ O, \
    pools:   7 pools, 3712 pgs
" Y9 J/ x! a7 q( @: n. A5 w5 j    objects: 1.88M objects, 7.2 TiB
1 V: e( C$ l0 ~+ C" W. X    usage:   14 TiB used, 129 TiB / 144 TiB avail
0 m2 C9 E! }' n. U    pgs:     3708 active+clean
' L( ~5 x$ D' L             4    active+clean+scrubbing+deep7 z/ Y& R" z3 |0 O: Q
, Q% S# p1 n! H) r0 Z  C  N
  io:/ p  O) a6 @- P
    client:   680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr: L0 Y  L: D- f, s
1 ]$ g% U: s! G
[root@compute01 ~]#
7 J) |  @0 v9 c: z- u8 p0 C9 a
1 a3 h7 X! L2 N: |状态正常,问题解决。
4 {9 h( n0 E/ R: T# T7 i* u* n% }* Y( ]8 Z8 ^
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-11 23:59 , Processed in 0.016046 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表