将设为首页浏览此站
开启辅助访问 天气与日历 收藏本站联系我们切换到窄版

易陆发现论坛

 找回密码
 开始注册
查看: 36|回复: 0
收起左侧

HEALTH_ERR 1 scrub errors Possible data damage: 1 pg inconsistent 处理过程并恢复

[复制链接]
发表于 2022-9-16 15:21:58 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。

您需要 登录 才可以下载或查看,没有帐号?开始注册

x
[root@compute01 ~]# ceph -s
5 f7 b" |* x6 g5 L  cluster:
( M8 {9 V3 x/ c. t6 w( k    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482  [0 m% n2 K9 c' d3 c/ P
    health: HEALTH_ERR
! O: C+ _0 T( P. P' q5 f5 c2 W            1 scrub errors7 J5 T$ q$ G7 X" }1 P
            Possible data damage: 1 pg inconsistent
" t; @0 S3 D; J+ B4 n            1 slow ops, oldest one blocked for 51555 sec, mon.compute01 has slow ops
- i5 ~( ?  E, T0 ^% }1 i- H7 M/ \& B8 e0 K. k1 S/ c2 c: Z! W+ |
  services:& ^3 G: W! K: l# P
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
$ D7 t" y2 c/ G* {3 d    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
" `# K( k7 X/ T    mds:  1 up:standby& x, r% G0 R% u& t0 Q9 D
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)( K( e5 `4 E; u6 n) \

. R$ v  C8 g* Y2 v. }  data:' @9 s7 H# u& ^/ ?9 T) W
    pools:   7 pools, 3712 pgs
3 ^$ N; \' D  R. M& b7 }9 o    objects: 1.88M objects, 7.2 TiB
7 J/ d) c! ?9 K1 W* U& @    usage:   14 TiB used, 129 TiB / 144 TiB avail1 C5 @  |# @* ?+ m: i0 g
    pgs:     3709 active+clean7 R' y& d( |. a) u& I
             2    active+clean+scrubbing+deep+ A- r* ^% k- B( R- ^
             1    active+clean+inconsistent
, y. ?% ~7 Q& |/ ]
( Q/ `( K( D+ F4 J0 [3 S  io:
2 K% {6 g1 w# ?, ^# T4 p    client:   1.2 MiB/s rd, 7.3 MiB/s wr, 1.54k op/s rd, 533 op/s wr. Z+ [1 Z+ Y- g# N
) K! h" i; ^- n6 u- ~- M/ V
查看状态:. C3 U! e1 n8 C# h! h8 U6 A; p

/ H" R: w, y3 i[root@compute01 ~]# ceph health detail 4 {; T" u% h6 p4 }6 ^
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
# s/ e1 }7 G: d, r7 COSD_SCRUB_ERRORS 1 scrub errors
$ P. Q& B; A! D3 VPG_DAMAGED Possible data damage: 1 pg inconsistent- Y% r' J1 g; `2 a* l
    pg 9.167 is active+clean+inconsistent, acting [9,11]
4 l6 S2 p7 Q" e0 R% @5 G7 U4 iSLOW_OPS 1 slow ops, oldest one blocked for 51565 sec, mon.compute01 has slow ops
7 X5 N$ `9 {7 w& U: {$ @9 o# g4 B, U% B1 z) c+ \
修复pg " e7 v" y3 B: Q: I$ O. e1 `
[root@compute01 ~]# ceph pg repair 9.167
- x  W( p, [! V1 k7 \6 ?instructing pg 9.167 on osd.9 to repair# r( `- ^+ X* \* ^/ p$ O1 s) u
[root@compute01 ~]# ceph health detail
- F! E, j9 e% nHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops
) I6 n$ Q1 g' T5 r1 R' a5 ^2 b' xOSD_SCRUB_ERRORS 1 scrub errors
' i3 y& ]& P$ N0 nPG_DAMAGED Possible data damage: 1 pg inconsistent
4 z8 X; E) t7 m* Q2 d8 v6 O    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]1 c7 p/ i7 J1 j5 V5 r
SLOW_OPS 1 slow ops, oldest one blocked for 51610 sec, mon.compute01 has slow ops* S7 Q" q, h8 ?
[root@compute01 ~]# ceph health detail
9 r7 C3 d0 [  O$ \* T  E3 RHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops
3 x* |: w6 p+ c, I) BOSD_SCRUB_ERRORS 1 scrub errors
6 S8 R7 Q% @' IPG_DAMAGED Possible data damage: 1 pg inconsistent
5 B+ N- P; e1 J- k0 f/ ^1 g, [. a    pg 9.167 is active+clean+scrubbing+deep+inconsistent+repair, acting [9,11]
1 K) w$ }% D& n: USLOW_OPS 1 slow ops, oldest one blocked for 51615 sec, mon.compute01 has slow ops& n; f. l2 a! {2 ?

4 j0 U) ^: {% Y7 D) d[root@compute01 ~]# ceph -s
) E! l8 F- `1 F- X  cluster:
! ~1 [9 k' t$ _5 x7 Y9 J% l    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
$ q) z1 Q$ N& Z1 u    health: HEALTH_WARN! q0 ?* W, p- @7 K/ O' U2 |+ l
            1 slow ops, oldest one blocked for 51700 sec, mon.compute01 has slow ops, J) P! E, J5 P3 a& H7 ]' W- l* {

5 {1 A: _9 @' r& N( n  c  services:
- v! B$ H$ H' s5 ~' ~( u    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
1 d1 t: Z3 F9 P+ T    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute055 D4 |! X  U5 }( a6 T; H* i% r
    mds:  1 up:standby" N1 Y9 }' [, q0 J
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)  p, J2 K4 {/ k0 M8 @; E, z

$ A" Y) }7 D; W4 _  data:  V8 [8 Q# I1 {1 \
    pools:   7 pools, 3712 pgs
7 @. v! a( n+ }% i0 g5 a: ]) s    objects: 1.88M objects, 7.2 TiB' b4 S. ^4 S) l" S+ w  D
    usage:   14 TiB used, 129 TiB / 144 TiB avail4 T. [0 @4 m" S" E3 ?% ]
    pgs:     3710 active+clean  d2 [+ Y- m* a; I# B& r: \
             2    active+clean+scrubbing+deep
0 ?; V! S- H. u" N
( R" ?5 B$ K8 N: I+ d  io:
% E6 g3 o$ p) F& _    client:   921 KiB/s rd, 8.3 MiB/s wr, 1.17k op/s rd, 545 op/s wr
' f9 h/ E. c& [" s* S) y9 P7 [0 G* J# N0 d: _3 s! Z) P/ j* m  \: z
等会就出现正常的告警了。( Q) s" a  t( p' [1 d0 j6 `7 v
[root@compute01 ~]# ceph -s
) w, U, K# Q* `0 |  cluster:
. u* f8 T* s7 z) G- ^. y2 s    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482- }/ r2 B! T5 Q- k1 L* b
    health: HEALTH_WARN
# K) N- O+ C: z3 k$ r3 u# X) i            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops
, t: ~; w5 Z  u9 }0 G+ R$ F% n) j( M) n' N( }" F
  services:
9 n3 X5 y  X5 @% Z" z. c% n    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h). }( p8 W. s# s2 r- K% l5 j3 x
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
5 p; O0 B" m% m    mds:  1 up:standby
4 S9 w4 R  z4 Y4 k8 T    osd: 32 osds: 32 up (since 14h), 32 in (since 4M), n8 F1 X5 }( X, V% h' J
3 z" K& }/ {$ i* ]3 {
  data:; L9 U* \  G$ F
    pools:   7 pools, 3712 pgs; ^5 `# ^" D4 r# @- S; [
    objects: 1.88M objects, 7.2 TiB
4 ~' P* W/ i- \1 R; v: c, Y    usage:   14 TiB used, 129 TiB / 144 TiB avail
4 c, m& I. q6 ?8 G  p    pgs:     3710 active+clean
" Z( \; K" U* a9 S             2    active+clean+scrubbing+deep
+ G* Q! z3 F% W$ L! l* Z" @- D  `" J
  io:
  |9 Z- a  b; g2 l" l9 n$ a    client:   698 KiB/s rd, 8.0 MiB/s wr, 901 op/s rd, 556 op/s wr: H& N0 o$ l  \. X# ^" W) C
& x4 f) L5 N0 u( o  W2 v
[root@compute01 ~]# ceph -s
! `  C: ?$ |/ K$ w3 I1 G# A  cluster:9 W9 K, c. ~* i: v' \
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482  @) _5 e) G, p" ]5 r; i9 B/ E
    health: HEALTH_WARN
& h' g6 m; c+ z! S' L. o( H            1 slow ops, oldest one blocked for 51705 sec, mon.compute01 has slow ops! [8 ~, `3 Q& \, P% y5 T3 y4 V

# i+ D* f9 T& K' F  [# e4 [6 U  services:
6 {0 b1 `8 x: s- z    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
. V! U* r, }2 ^6 s4 n    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
% B1 F  S, |2 m: ?4 z    mds:  1 up:standby
% g% P$ g) T8 B" ^    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)! _1 @) b1 Z0 E3 E6 A7 |0 V9 f

" x  t! Y2 q) L5 k; Q  data:, B' k1 M! {4 g
    pools:   7 pools, 3712 pgs+ Y5 q& g5 x* j6 C4 R  L) J$ L
    objects: 1.88M objects, 7.2 TiB
- U% y7 y$ O* @5 m1 `0 w$ n    usage:   14 TiB used, 129 TiB / 144 TiB avail3 k3 ~$ U9 [( {
    pgs:     3710 active+clean8 d" {, z& p" V3 m  n$ {
             2    active+clean+scrubbing+deep2 O$ ]' e, l, c, F( M/ T
3 ?  a4 U+ r% s3 ^9 x0 S' x
  io:
' L5 R5 b/ ]  B9 ?4 T- ~5 ?" ]    client:   601 KiB/s rd, 8.2 MiB/s wr, 787 op/s rd, 569 op/s wr* [: D) m! }1 n7 V1 O- M& m! H

% z5 z7 {9 z) `检查下时间同步:% l! f0 P. r2 p8 C1 @/ ~
[root@compute01 ~]# chronyc  sources4 Z, S3 {& |* U: u9 Y6 D
210 Number of sources = 15 f  g; }5 d9 o  c) I' @& C1 x8 j
MS Name/IP address         Stratum Poll Reach LastRx Last sample               
# y% t1 j0 e4 ^. u5 H/ z===============================================================================
* _  b3 N9 p% D0 d2 v  |3 L^* 119.28.183.184                2   6    27    26  +3312us[+7317us] +/-   86ms
" l) [8 r+ [- D3 b[root@compute01 ~]# ceph -s
& S6 X, F6 I/ t5 e  n; e# V  cluster:0 q$ }& X4 t5 Z1 T7 E9 {
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74825 j; [5 ?9 K# Y' B: U& F
    health: HEALTH_WARN
+ f# k# f7 U/ W            1 slow ops, oldest one blocked for 51780 sec, mon.compute01 has slow ops5 T: E$ j+ F, j1 K: V
3 b1 b1 i9 x1 T
  services:
$ T( i  O. k; \0 }    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)
' \# X3 i+ u+ w- x. f5 v1 `    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05; _; k/ c" X5 W1 H2 d$ ~6 k$ S
    mds:  1 up:standby+ k5 ~3 u- }% l7 p" C! s' {
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)% L7 x1 X0 ~6 S- x
9 j8 X- Y3 P6 Q! m. e1 `
  data:0 a& }2 G2 s  U+ v
    pools:   7 pools, 3712 pgs
% L1 c: _* ]7 @5 i4 H5 \5 h  Z7 G    objects: 1.88M objects, 7.2 TiB
+ L; h7 ]/ U" g% @/ M# ^" _    usage:   14 TiB used, 129 TiB / 144 TiB avail
2 ?' j; v2 o5 A0 y6 J: M  s: g! ^) Z    pgs:     3710 active+clean! A* e5 y8 \5 s- y0 r. T7 W" W
             2    active+clean+scrubbing+deep
0 `% i* [& |2 U) f( k; x3 S
9 B; t$ }1 W2 Y# R2 I$ f) n8 s  io:6 W5 M' f# p) }* y& S+ g
    client:   968 KiB/s rd, 9.1 MiB/s wr, 1.21k op/s rd, 624 op/s wr
7 z% _3 |. `/ z4 o( F9 \3 P$ |9 _! T6 T' w
[root@compute01 ~]# ceph health detail
7 R4 `) g% b1 m' h' mHEALTH_WARN 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
4 m# K7 t, \+ Y+ h+ T% FSLOW_OPS 1 slow ops, oldest one blocked for 51795 sec, mon.compute01 has slow ops
( B9 l( }( f$ g( _3 C重启下mon.target服务:8 r" F: n9 i6 X& i
[root@compute01 ~]# systemctl restart ceph-mon' f/ O# A8 m( p. V
ceph-mon@                   ceph-mon@compute01.service  ceph-mon.target
& M& ~) C8 o& N+ |; ?# ~[root@compute01 ~]# systemctl restart ceph-mon.target
9 O- Y# P1 [/ [1 K3 O查看状态:
( C3 N8 q; u% a  I) D$ e+ W" ?  g( o4 q[root@compute01 ~]# ceph -s; s, z' c  A! ^: k! E5 o
  cluster:
5 J1 K) O0 K, o/ R* Q    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
- R0 b5 E* x# ]+ \    health: HEALTH_WARN
7 Y! \. [( h. f: ^! \/ a            1 slow ops, oldest one blocked for 51855 sec, mon.compute01 has slow ops0 I8 C; ?' P# x0 t# m
' h0 b5 ^% Z$ J7 A
  services:9 `: V  c4 p( p" w2 j2 z' I
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 14h)' z% s4 U+ |$ ?! n+ C# ?
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05
- ]7 o) O; f3 r5 K    mds:  1 up:standby4 g, m( M9 `7 e0 F  k8 t0 ^( w
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)3 T/ G# K- w' x) G0 M6 S9 N6 l7 W1 \
3 f+ T! B2 `0 |+ g- N3 ?' z3 \) n
  data:% _  n" f: K/ l* q
    pools:   7 pools, 3712 pgs( A7 S) ^7 m  V0 _( ?
    objects: 1.88M objects, 7.2 TiB8 f$ x. ?5 X$ o$ J) ]
    usage:   14 TiB used, 129 TiB / 144 TiB avail
7 {; ?2 V, P1 \8 t# O    pgs:     3708 active+clean
4 Y- L' ]5 E" P/ z% \3 f             4    active+clean+scrubbing+deep
/ A7 ?& T" K0 B1 n) s6 J
% j$ {3 ^7 C" u2 _# l% P  J  io:
/ I+ J$ `$ z6 }/ }    client:   782 KiB/s rd, 7.5 MiB/s wr, 989 op/s rd, 463 op/s wr% C: w' t( u% m, p# @! w8 S8 j! C: W

. `' A! U% e) D. w等几十秒钟再查看:6 _3 b/ B: @, q9 H, P
* Z; [) k# C3 [
[root@compute01 ~]# ceph -s3 f4 R) E% @; P/ J( m1 f
  cluster:
/ i8 t1 h' _& l3 w- q- l    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482; d  Z; E0 A& H
    health: HEALTH_OK3 }0 E8 E! X0 D# ]0 D" ~, V
* \; L% ^  B/ b
  services:8 ~6 t; E8 V8 e- ?8 F8 L/ }
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 3s)
5 }' R  r6 j2 g7 f0 E& m    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05$ m$ Y9 Z; K; g8 v. H1 i6 X3 a
    mds:  1 up:standby4 g9 a- ]0 y; G) C9 T3 I0 X
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
& y* p# }- w' Q) e( L
! |- k& Y0 v* Q5 m$ I7 j  data:
9 u0 N/ n! ~9 g3 G) X$ G4 X    pools:   7 pools, 3712 pgs2 K* r7 J5 `9 V' j7 w4 v4 a
    objects: 1.88M objects, 7.2 TiB
; S7 P3 n0 B1 s. j' O    usage:   14 TiB used, 129 TiB / 144 TiB avail+ T3 [  Y1 m$ ?
    pgs:     3708 active+clean- e0 d; `/ ^; P+ l3 T) T: l
             4    active+clean+scrubbing+deep
& [2 F1 d& P+ T  A# L4 j/ F' \3 }% r1 t1 p8 T% g
  io:) q9 ^# v! Y; N, H. P; h2 E
    client:   508 KiB/s rd, 9.1 MiB/s wr, 667 op/s rd, 621 op/s wr
$ o+ j: z7 Y& n- R; J8 v# P$ c& V
8 a" {; C7 G# `$ h& n1 g" ^[root@compute01 ~]# ceph -s
5 @9 f! H+ ?) B. j' n# w$ a  cluster:; d6 }: I7 `" O, j* @3 d& W0 ?
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482  \% t  P1 [) K6 G+ F* h
    health: HEALTH_OK
% y  y( ~' d: C
4 `( \0 ~2 S* e  services:
8 o$ c( i0 Y! k/ U    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 5s)& o2 B: b* ?0 e9 |3 L/ h
    mgr: compute03(active, since 4M), standbys: compute02, compute01, compute08, compute05( \, ?9 i+ T3 L. M7 ~
    mds:  1 up:standby; ~% j9 r- U% [% K$ f1 j: d/ d( U" L  u
    osd: 32 osds: 32 up (since 14h), 32 in (since 4M)
# r) u2 g! X" L
6 c8 R  w) [+ t+ y  data:% ~" A( L3 o9 P! u9 A9 v
    pools:   7 pools, 3712 pgs' J! y- ?# K9 F3 _: L; W
    objects: 1.88M objects, 7.2 TiB0 W& D4 a2 q- W2 g+ }
    usage:   14 TiB used, 129 TiB / 144 TiB avail* p* C6 }& @1 s2 n1 ~
    pgs:     3708 active+clean& f6 C2 |& D' e
             4    active+clean+scrubbing+deep- m) [1 V% {3 }+ x& X7 I3 E
) }2 X4 i2 w5 r
  io:
7 j' a* [9 `4 g4 Y4 s; X" U" C    client:   680 KiB/s rd, 10 MiB/s wr, 869 op/s rd, 723 op/s wr
/ |8 c1 B. x- @
4 A/ C$ z/ ?5 h1 e# M; W: y[root@compute01 ~]# & `+ z& ~" P! }
5 @$ H0 ^- ~' J. Q5 v
状态正常,问题解决。( ]) f& v. i* C

6 d2 J; I9 W! I0 }! ^
您需要登录后才可以回帖 登录 | 开始注册

本版积分规则

关闭

站长推荐上一条 /4 下一条

如有购买积分卡请联系497906712

QQ|返回首页|Archiver|手机版|小黑屋|易陆发现 点击这里给我发消息

GMT+8, 2022-10-4 06:11 , Processed in 0.175316 second(s), 23 queries .

Powered by LR.LINUX.cloud bbs168x X3.2 Licensed

© 2012-2022 Comsenz Inc.

快速回复 返回顶部 返回列表