找回密码
 注册
查看: 557|回复: 0

HEALTH_ERR 1 scrub errors Possible data damage: 1 pg inconsistent 处理过程并恢复

[复制链接]

0

主题

0

回帖

9

积分

管理员

积分
9
QQ
发表于 2022-9-16 15:21:58 | 显示全部楼层 |阅读模式
[root@compute01 ~]# ceph -s1 q& A4 j( z- Y' ]4 G0 U
  cluster:9 s8 t4 @8 o0 z" G! q, C/ E( j8 l2 i
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
, s2 D& W" a2 E! H5 z; T% V    health: HEALTH_ERR
- d6 j6 m) j9 E! n1 ]0 ^            1 scrub errors; z( a" p% T* N* H, s3 H
            Possible data damage: 1 pg inconsistent
0 v% v( L+ \4 F4 S; K            1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops4 h, C7 _& S* T4 ?/ @$ o" Z7 Y
, e( O. c6 h. H4 Q8 D' \+ f
  services:: L# W8 i  H/ v% D- G2 E
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)# [7 j- p" u. [/ ^7 p4 }
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute053 _0 L2 p' w) b9 A' h
    mds:  1 up:standby9 s7 z; @+ M( v7 m" F
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)# y% G1 K. i" T1 P/ m# @- g2 T

8 |7 X; g6 q* Y7 U4 o  data:
  g6 i1 T8 p: H0 S9 }4 M    pools:   7 pools, 3712 pgs
- j+ J' L& E$ n2 k. T    objects: 1.88M objects, 7.2 TiB
( {, Q. P( f; Q. R3 m. Y/ p    usage:   14 TiB used, 129 TiB / 144 TiB avail
; @: b4 [" v' p$ W& w- Z. @    pgs:     3709 active+clean4 @0 D5 w6 b. I: P6 g+ l0 y) d
             2    active+clean+scrubbing+deep. ?0 e( w: w% f7 G5 H
             1    active+clean+inconsistent
8 p& G% S5 c1 l) w& N* Y8 k" f" A: Y
  io:( `: g& J3 R1 |1 l# M* z2 ]- N0 @
    client:   1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr
* ]3 T- X  D" V2 o' j+ k+ V4 Y6 q
查看状态:9 G3 v( y1 y9 Q/ D. \

* h! W, D; d' l' k2 [/ W4 r" p[root@compute01 ~]# ceph health detail & q; f! q0 X; r$ V4 Y1 S8 r. a/ c/ z
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops/ P# F* x/ w9 Q7 w6 u' {& j- a7 m
OSD_SCRUB_ERRORS 1 scrub errors. {2 d+ D9 |% N2 `1 l
PG_DAMAGED Possible data damage: 1 pg inconsistent
7 X. b" A0 H" a  d" Y    pg 9.167 is active+clean+inconsistent, acting [9,11]
/ Z! O8 K# C0 f3 MSLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
% J  R. X- U$ [' n# D2 p; ^$ w+ j7 b' C) z" _+ @
修复pg 5 D& \8 j# O1 v0 |: T
[root@compute01 ~]# ceph pg repair 9.1675 L1 ^. L0 [% L4 X5 r) Y0 X6 }* i! c" E
instructing pg 9.167 on osd.9 to repair
9 {% E" k; H* h% U1 O4 p[root@compute01 ~]# ceph health detail
' [9 O3 F, W0 J4 H0 d- AHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops4 l" t, ^7 v1 o
OSD_SCRUB_ERRORS 1 scrub errors; |9 i" S% G. f- b1 h
PG_DAMAGED Possible data damage: 1 pg inconsistent
0 G& ^( B* N1 A) s' d0 `8 h    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
6 ~6 Y% E9 R) F* K" r" pSLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops, i9 V& W$ K% G# p' d
[root@compute01 ~]# ceph health detail " a6 ~! ~, G& B- _, l
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops( J6 W' Q6 b3 v$ Y2 F6 P
OSD_SCRUB_ERRORS 1 scrub errors
; D2 f3 t/ n# N( _PG_DAMAGED Possible data damage: 1 pg inconsistent
' g" `" k+ v  y& k2 n0 y    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
1 g7 w6 H* K- S& v) nSLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
% E* V0 [3 k  S; y
: A7 _  S% E1 M[root@compute01 ~]# ceph -s
* R6 J0 r7 {% n! P3 [+ V5 p9 N  r9 }6 W: _  cluster:* C! `0 H. n5 E, k) W- E+ u
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
& D9 B; _& J, Z5 s8 ~    health: HEALTH_WARN5 ]( W8 o" w( u$ @$ d% h
            1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops( }3 R3 O. V3 e- s% A

8 H1 F9 j% N" M  services:
% [, ~2 R4 a  F2 x2 G$ d6 w; E. K    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)  U( s' @2 ]2 [( l
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
9 h, k& v8 T6 m) ~3 ^    mds:  1 up:standby8 m  d0 R/ i& d" c+ ]. e
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
+ p9 t; W$ u4 N5 y: c( {' U' \4 C! w- N+ f
  data:& _6 d- g; @3 V% q5 d$ |
    pools:   7 pools, 3712 pgs$ x" T& }" S- ]$ _/ @( X
    objects: 1.88M objects, 7.2 TiB
  b% T+ Q* q5 I2 k4 {3 v' C; p% k8 T    usage:   14 TiB used, 129 TiB / 144 TiB avail
0 n$ d) @' S0 ^5 ^& t+ t8 d    pgs:     3710 active+clean
: g1 _. n. b. g" C5 U/ I5 e             2    active+clean+scrubbing+deep3 k+ s! O$ K7 U/ {& K7 U: _

* i2 a; Q! X0 h  io:' U. O& L" D' b& J
    client:   921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr
6 P0 v7 x; O: U+ e
5 B9 D& {* v" a/ a9 Z* y等会就出现正常的告警了。+ u3 ^" c3 j) ~2 B- N
[root@compute01 ~]# ceph -s$ o8 n" `; V/ l! c- d
  cluster:
( [3 G3 z/ H' z" F1 n- j5 `    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
" Y8 ?* w2 ^( k" O! z    health: HEALTH_WARN
. f: X/ z$ U5 g" k* _            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
+ J; g  d! a- [/ g6 B) \
; D0 U4 L. q/ U8 v4 ~' _  services:
+ C9 E' g5 w( o4 [    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)! ?0 y+ k3 w5 D
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
( E  ]: n! h( s" H7 E4 g" X    mds:  1 up:standby
9 d; B% L' q) Q6 v$ M    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
3 O" s+ o/ ^* ]* D5 j, Y% B8 T+ B, k% y" V
  data:
. D* V  l) Q; H- E    pools:   7 pools, 3712 pgs) H: Y1 H4 K) q$ e
    objects: 1.88M objects, 7.2 TiB
- u" I3 r& ^& h7 M) v8 W! x    usage:   14 TiB used, 129 TiB / 144 TiB avail
3 U7 c; y4 Q1 t  g' f9 |    pgs:     3710 active+clean+ ~$ d5 w# `& j
             2    active+clean+scrubbing+deep% C( Y4 u8 m0 f* d% m% I

! I6 p; e- q3 K  io:1 B& j$ V; j6 _8 X3 U2 Y7 }
    client:   698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr5 V& w2 }) R: A% X

9 o7 C% L& S) h& e[root@compute01 ~]# ceph -s
9 ~8 r+ X  x  J% F  cluster:
% f6 d2 `1 F  a* h0 E+ h, H2 b9 M2 ^! V    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
8 ?. D4 ]& y) U* h3 K( p) V    health: HEALTH_WARN
. Q" Q; u! i' B) g6 M: [# _& l            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops; ~6 t( c+ t3 O5 L4 w. J

: K6 {  N; L8 H* H. k- {" ]  services:
) J# L+ v( ?( N) v2 t    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
, n. m; x& |6 p8 K1 g# O    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
) f8 J  R/ W8 _! n) Q2 U7 l    mds:  1 up:standby  i: s( x- ?% c9 B( ?( x2 S
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)0 w( b2 ]" f3 L8 C
0 Q* \+ K7 L, L. Z
  data:7 e, S$ x- e- O( G6 @' j
    pools:   7 pools, 3712 pgs
$ l$ y! z# Q' V. t, u    objects: 1.88M objects, 7.2 TiB  ~6 I8 w4 r3 @" x7 R9 t
    usage:   14 TiB used, 129 TiB / 144 TiB avail& W: n& `" h, y2 v
    pgs:     3710 active+clean
+ M2 A  L4 D! {6 u: U0 z             2    active+clean+scrubbing+deep+ u, R7 Q+ W0 [, ~3 f5 h" |. T$ i7 e6 D

, U* Z" W3 b# q, A2 N6 K6 g; l+ {  io:( m/ `. q  T9 \
    client:   601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr  q) |4 g# `1 t5 }

# ]0 P9 {( Z1 p8 N4 b, v0 r检查下时间同步:
4 B4 f$ E  o5 @% `8 v- d[root@compute01 ~]# chronyc  sources
& q" ?# y. Q& Q210 Number of sources = 1
( T  j* z. @2 r4 ~% MMS Name/IP address         Stratum Poll Reach LastRx Last sample               
# A: M8 @. ^5 O/ j; P# |( R! U===============================================================================4 F9 k6 U$ ]" w; O/ u, B
^* 119.28.183.184                2   6    27    26  +3312us[+7317us] +/-   86ms1 J" T2 L5 v6 y6 f# R
[root@compute01 ~]# ceph -s
: w3 P9 P- T0 N6 R. H8 n  cluster:7 ~# i2 r) d' n& ]- s1 p
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482. I0 G4 Z6 e. \% d$ _6 D
    health: HEALTH_WARN
2 \+ q% u" U" T- X! l" K# b: A            1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops: m& M3 r' i! W& c

; K0 x6 u8 t: T+ J6 o6 X7 Q* \  services:0 @$ Z. q2 o- g, W0 ^# Q. P, h
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)0 p: C, q1 J- j, @6 a( H/ e  {
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05  b9 U/ ~; u4 O2 S: @
    mds:  1 up:standby
& |/ |, t' z9 X% [6 J    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
3 S8 d  Q+ A1 ^* H# b7 x
( R/ Z# M: Z( n7 G. d( E$ }  data:
' D9 N% t- ]& M( i* D    pools:   7 pools, 3712 pgs
5 H+ T( M: L9 d; B( k3 ~    objects: 1.88M objects, 7.2 TiB# S: ^1 W. V( g  o$ z- @
    usage:   14 TiB used, 129 TiB / 144 TiB avail  z0 z* c! p) Q! Y( \
    pgs:     3710 active+clean
/ y2 K# V; S9 e4 _             2    active+clean+scrubbing+deep! i! s* k: u! b+ Y6 y5 R8 T0 Z

; E. X6 _7 a+ a/ W; ~9 l( F  io:
# C( i3 z8 j* N    client:   968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr
7 y& T  R9 p7 \7 t9 b& K. o4 E" K# c
[root@compute01 ~]# ceph health detail
" B; u7 N! W1 P: R; x0 OHEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops3 c, W' O4 k6 K6 Q6 _
SLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops3 t2 }: u5 O/ K3 H, [, O
重启下mon.target服务:
9 E& U, t* [% I( }2 e% Q+ ^[root@compute01 ~]# systemctl restart ceph-mon, ~% T$ F5 g/ U. U  D$ g
ceph-mon@                   ceph-mon@compute01.service  ceph-mon.target! K+ e3 G. x0 [
[root@compute01 ~]# systemctl restart ceph-mon.target
& i3 d9 T3 f* B1 H2 v查看状态:
: b7 Z7 @8 C' N+ z[root@compute01 ~]# ceph -s
' H7 Y1 @2 I$ W7 ^/ g& V  cluster:+ {0 b1 b5 Z) ~
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482# f" [6 [+ y4 j0 U9 z; Y
    health: HEALTH_WARN: J' U% ~( o4 _8 k
            1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops1 D! U. `* A9 Q* R! g- ?

8 p3 C& a* \" r* Z* Q8 ?  services:2 O2 P. _2 {- W/ }& t5 h% n
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)4 u) U$ Y8 D* B% ?5 @; v. I9 W8 ~
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05; g  F' m: J1 [3 k1 E
    mds:  1 up:standby
+ x" Y  p1 z4 y# A( Q) L    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
1 j2 p1 ^: X0 l" i$ W% J& @" ~' A+ s: z- }9 G6 l1 a/ ~$ ~( A
  data:
. T' r) R, \) j5 ^+ n! N    pools:   7 pools, 3712 pgs
  A1 H4 {$ Z2 F" n" a    objects: 1.88M objects, 7.2 TiB: \( m, [2 j1 P9 B
    usage:   14 TiB used, 129 TiB / 144 TiB avail
5 S( ?$ _' Y" C+ m% o    pgs:     3708 active+clean
+ l. N6 s& r% k: S, k- a             4    active+clean+scrubbing+deep, D0 k9 e! R4 y

. q. g! V5 i0 d- k4 s3 q  io:( g7 Q; F& |) q+ S; e/ D+ D
    client:   782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr" G8 P. z( s* u) {; P; z1 ~# w' m
. r$ i0 X" Q- T9 i5 E3 M
等几十秒钟再查看:8 G# m# e( D" v# B" a
- |  A5 P" F/ }1 q$ H/ S
[root@compute01 ~]# ceph -s5 u* S1 _5 T1 u
  cluster:
) E% P! _  u9 S    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
( X' Q0 Q6 ~# U2 b    health: HEALTH_OK
7 W$ z' t3 {( F
9 |, X/ C: B" @: G% a  services:  S! ^( F' ?  X0 `  o  P! ]
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
5 V+ T( e& L8 [    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05. p( w9 s; ]3 P) z
    mds:  1 up:standby
1 T4 v7 B7 @6 x% e0 O    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
: S6 \& p' \* J8 i; l' Q5 P2 J4 g2 f3 X2 G/ S' l* C
  data:. m5 o& `, C& f$ X
    pools:   7 pools, 3712 pgs
! Y3 ^8 p: f/ E+ f/ ?+ z* v    objects: 1.88M objects, 7.2 TiB
- n1 `1 \+ i( n1 |    usage:   14 TiB used, 129 TiB / 144 TiB avail
4 r  O8 ~$ R; {- k6 ?- D" V4 {6 E    pgs:     3708 active+clean: W1 z4 e# B9 }9 M6 z3 b# t
             4    active+clean+scrubbing+deep
& X- c; k7 O* L* z% O, i
- [7 d2 e( ~/ @# O# `$ {4 Z& D  io:
( ?0 f) ~  m! d  {- S5 a8 e* \    client:   508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr
6 l' W5 l6 |1 \9 |: W  [9 K) q* I% m+ P  N
[root@compute01 ~]# ceph -s
# c$ ]8 X! d2 a2 Q3 d. b  cluster:' b' w0 _0 p* a1 z9 M; |
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74821 A$ m: F+ v( c5 B1 u1 f  j- r
    health: HEALTH_OK
3 a9 a3 Q* q4 |; u9 v7 s2 z5 K" Z" {( q- V! f- P8 S4 c% [4 W
  services:$ ^8 C8 `7 o, z5 O% t9 o0 `
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)! X2 p6 J0 A4 O. k' J
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
$ |- e! ~; n4 H8 M$ u& @    mds:  1 up:standby+ I7 D% [, H% m! M" y
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)  Z: V" P7 [$ ]& N3 ]1 o
8 o: U0 ]* e# G0 q+ t' Q4 G; l
  data:8 c6 i6 D8 K9 c$ }5 _
    pools:   7 pools, 3712 pgs
8 b5 P3 L& z$ N3 G' z    objects: 1.88M objects, 7.2 TiB
  p" v2 b" ^* w) v* R    usage:   14 TiB used, 129 TiB / 144 TiB avail9 H. A3 F3 L- ]1 \% {
    pgs:     3708 active+clean
/ `' w. W2 U9 E. i, C) k             4    active+clean+scrubbing+deep
5 j* x) l: i* z1 _3 q9 y
8 F9 S2 N  \% L7 g  io:  f! q! i0 f$ N1 _& c. s! e
    client:   680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr6 F5 F( C: G. F" P3 Y9 V# l3 z( R

" }) C6 f1 G% }: C& A[root@compute01 ~]# ; [. M+ L: A7 g0 o# u

" x4 s+ Y+ i& G状态正常,问题解决。
$ @) }7 q% j7 m8 ~' r
$ V2 @* z6 w8 y$ J- @
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-11 22:57 , Processed in 0.024024 second(s), 25 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表