找回密码
 注册
查看: 596|回复: 0

HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent 解决过程

[复制链接]

0

主题

0

回帖

9

积分

管理员

积分
9
QQ
发表于 2022-8-8 09:19:38 | 显示全部楼层 |阅读模式
[root@controller1 ~]# ceph -s
! [3 t- R3 R1 _; C0 u6 T  cluster:. t) t( s8 m/ @& T
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
$ w% `1 z( X4 L- U3 w  C, Y    health: HEALTH_ERR$ s0 }8 {, B5 T# g7 N- M* O" U& B
            1 scrub errors. l8 X; U" ]# ?: v% j% B5 z/ {
            Possible data damage: 1 pg inconsistent
/ ~: q5 |. D5 M: ?
; Y, d$ Y' {1 B: T* o" n  services:
# d$ }( G8 v* A5 u3 ]    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
7 a/ v9 e- I' i0 K    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute056 R+ b5 j0 y: \  ~: r' E, k8 h; t' F! f
    mds:  1 up:standby
7 i/ i0 T: M  o0 V' n0 i5 @    osd: 32 osds: 32 up (since 4w), 32 in (since 3M)
3 G8 V+ E/ ~6 o! I
5 q2 s+ k! N2 f4 S/ ]- j& g+ M  data:
; X6 \# ?' `) ]& d    pools:   7 pools, 3712 pgs
8 e) l- s$ g2 L- [1 x, O6 u    objects: 1.71M objects, 6.5 TiB
& w; g" g8 G" M" f* R4 {6 M/ d    usage:   13 TiB used, 131 TiB / 144 TiB avail- q5 c% A" \/ \! R9 B% [
    pgs:     3707 active+clean
- |6 W4 v; F3 x8 _) j             4    active+clean+scrubbing+deep/ Q1 g1 j5 M2 O0 t1 E3 G" L
             1    active+clean+inconsistent
) C/ ?2 P# g" y7 p' T/ R$ S
0 |5 d1 O" r$ Q/ w4 M  io:0 g" a6 t9 V' w1 ^: \$ J6 Z/ @
    client:   290 KiB/s rd, 9.7 MiB/s wr, 400 op/s rd, 577 op/s wr
# ^! i% r/ W  z( k9 q2 i5 o2 L5 h( T, N) h6 b" n5 ?
[root@controller1 ~]# ceph health detail 6 C( l; d7 d, L. h* B
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent% i7 ?) [& s* \8 D* M
OSD_SCRUB_ERRORS 1 scrub errors
5 Y" _5 q3 x+ _+ g& `% u' ?% J( hPG_DAMAGED Possible data damage: 1 pg inconsistent8 |" G5 Y! N& t) N' t0 y7 p) o
    pg 9.f1 is active+clean+inconsistent, acting [11,9]1 L' i% ?: T! a" Q" ]
查找osd在哪里?9 J# |! D- Q$ f- Q; z. ]3 D- l
[root@controller1 ~]# ceph osd find  11% |6 T' ?  ?( S/ W( s
{6 b+ w7 M' n. Z2 v* B
    "osd": 11,
  L! r- G9 @8 L) i6 `    "addrs": {; W, m6 y4 M3 @3 g& o0 A2 t
        "addrvec": [) Q; G2 r+ f* I% P
            {0 v' {6 l1 o! e+ J& o
                "type": "v2",
! Y1 W1 J" J5 Q' o- O3 k                "addr": "192.168.0.79:6800",4 g8 s$ w( D! z" s3 Y5 M
                "nonce": 2194: J* X- _6 m! q; `
            },
" V9 |: ?8 Q0 \5 w# V            {
+ g! N% F5 h& w7 T6 ^: g                "type": "v1",1 j3 q" I& G3 g( O) |4 X0 q
                "addr": "192.168.0.79:6801",7 f* w/ M# ^5 O3 D5 b
                "nonce": 2194
$ ~( p- z9 U$ D            }- }/ Y! E2 F1 \& ?/ v8 E
        ]) ]" D, W2 Q" K5 G# d2 \' u' i! {
    },
. A' r( C8 {" a- Y5 ~. o  D0 N8 |5 `    "osd_fsid": "5bf2cf92-b7bc-45ee-bfc9-6a1cf553a1d3",5 Q( N- W; I+ K& h
    "host": "compute03",4 `7 B5 R# ]" ~' y* z1 B
    "crush_location": {
6 l$ y1 g; N$ H. _        "host": "hdd-0.6T-compute03",* ?# C& x' ^7 ]$ k  i
        "root": "hdd-0.6T"6 b* ]9 f) ?' |7 O: \
    }
, f1 v+ z4 A: T9 y! R1 r, V6 w$ e4 I, ?! B}1 `7 _3 h8 l! w; d2 v
[root@controller1 ~]# ceph osd find  99 X$ h1 ?9 s$ Q2 [# H/ V9 b
{
1 g4 {0 \) {. j+ v, j+ X  ^    "osd": 9,
! O4 I' T, t/ \) j& `# f    "addrs": {
2 Z) n* G( R: N3 ^! q. W/ g( O! h        "addrvec": [  H; F) P. [: V! f2 D  N4 H3 X
            {* z7 F0 [# J" M& {0 B! P2 h
                "type": "v2",3 U) G* E: u/ c2 d
                "addr": "192.168.0.75:6800",& i% y# l- l( ]% b+ {8 `1 S
                "nonce": 20465 ]8 _$ S/ V2 q3 A
            },: K* E8 Q, h+ T6 Q2 o6 x
            {8 A+ Z+ [, |1 h; U/ Q
                "type": "v1",' h' a! `$ b1 w" J/ R* r" K
                "addr": "192.168.0.75:6802",7 B) W: }; T7 l$ w3 F; g+ R
                "nonce": 2046$ e/ U* `! C  B" p* b1 C
            }
3 F/ G2 _/ j0 ~- C        ]& T) E* `' \6 a( r. u
    },
3 t+ Y% A! C. T, G2 v9 S    "osd_fsid": "40efbcad-2acc-4e43-82d0-da8e473054a1",
# n3 |4 m8 p; H) `    "host": "compute01",
9 Q9 S- J) G- Y/ s  i5 W    "crush_location": {
+ ]7 B$ V' `) k) G% e        "host": "hdd-0.6T-compute01",
, a* q  k, e4 [        "root": "hdd-0.6T"
4 j8 G1 {* k; A0 T, C6 q' G6 V( Q    }
, a- L( \2 g5 ]0 Y% z4 `$ g}
5 T% z% [+ V6 _' V+ J" X2 C# ^- _7 e, b( }
3 H2 G+ C% v7 E# C, M
登录对应节点进行操作:
3 A; u4 b. _( e$ J" q& H+ P[root@controller1 ~]# ssh compute01/ s% n8 M1 p5 a( f+ z+ g: I
Last login: Fri Aug  5 12:35:04 2022 from 192.168.3.28) W6 I$ {6 S3 C, ~0 J+ o2 U# F
[root@compute01 ~]# systemctl stop ceph-osd@9.service
( F: i3 |' k- a. ?' Y, i" K, ?% o5 G1 _
' F: T. X. {4 F' Q; |
[root@compute01 ~]# ceph-osd -i 9 --flush-journal8 q; x7 u* j' L. [8 t" A
2022-08-08 09:23:54.750 7f714bfc2a80 -1 flushed journal /var/lib/ceph/osd/ceph-9/journal for object store /var/lib/ceph/osd/ceph-9; Z& f# A5 B# w' @8 y* U1 e
[root@compute01 ~]# systemctl start ceph-osd@9.service
! v4 o8 c8 f5 m/ {4 W4 X
7 R" s$ m  @8 R) y) n6 l' N7 z  I& g8 S. ^2 u$ y' Q9 D6 l
[root@compute03 ~]# ceph health detail , A8 i0 U  R: G
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent) y1 o& V# _- M$ ?- `2 p8 X, k+ [
OSD_SCRUB_ERRORS 1 scrub errors2 m: X% ], j2 H8 r: B! l
PG_DAMAGED Possible data damage: 1 pg inconsistent
6 L- n; @1 G( ^4 o2 u7 D1 y# f    pg 9.f1 is active+clean+inconsistent, acting [11,9]
% }( Q/ k" [1 D
+ t7 b! D* q* ~; G' V+ Z' m[root@compute03 ~]# ceph health detail
1 C0 ^2 ?+ h3 ?5 W5 ^- e+ m& THEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent# }  ~1 b5 ^! `& a7 S  s+ o
OSD_SCRUB_ERRORS 1 scrub errors
+ K" r  Z+ X$ R) Y, T6 \PG_DAMAGED Possible data damage: 1 pg inconsistent7 i4 i+ r  W+ n" {1 Y7 X5 d1 K
    pg 9.f1 is active+clean+inconsistent, acting [11,9]
& X5 j: A' ]5 z- R: {7 K) s9 a! i! ~! l# B
使用repair方式修复:
3 Q# d2 K+ j  J! e9 m& Y) w$ A7 D8 b' m0 |6 ^
[root@compute03 ~]# ceph pg repair 9.f19 {0 [2 f: {0 j' q' e
instructing pg 9.f1 on osd.11 to repair
5 I( v/ v& G" k9 F5 b6 [5 d
. d/ f& c1 {1 e, h5 q[root@compute03 ~]# ceph health detail : u( H& z0 R8 D" ^+ Y
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent: \# t' x& n" F+ _; B
OSD_SCRUB_ERRORS 1 scrub errors3 |' D5 n; \6 E; m0 F
PG_DAMAGED Possible data damage: 1 pg inconsistent0 {" A2 w- e/ O  l1 ~8 _
    pg 9.f1 is active+clean+inconsistent, acting [11,9]0 v4 g7 n/ I! I. Z5 U
9 f- N0 R/ R) K6 j( I5 t& v) p
[root@compute03 ~]# ceph pg repair 9.f1# E9 u4 Z) H% u! e5 Y& \# Z% P
instructing pg 9.f1 on osd.11 to repair# Q5 e) m. H, e1 F2 N' U6 U: w# ?
8 j1 L" n8 x/ r* T
[root@compute03 ~]# exit
# @* w8 R7 |& c& D% X3 t- }% ilogout
0 ]8 w. f% y8 O! OConnection to compute03 closed.
% U* j; Q  c* z0 W  K1 C& K) V0 g, N% P% Q2 X" v
[root@compute01 ~]# ceph pg repair 9.f1
- G8 m$ B5 H: \3 W. g' z0 {( L5 `' M. sinstructing pg 9.f1 on osd.11 to repair
5 ^  F. w2 B5 J0 v$ ]) f: j' u' \- W0 X6 o& s7 D+ `: D% {. S/ ]  V
等待一小会同步后,即可:
4 b+ R3 m4 Q: P0 j6 f' e# f$ |* s[root@compute01 ~]# ceph -s7 b  C) h+ C" d! J2 e4 E) L! q
  cluster:7 a$ R' y5 S) G. s
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
3 ~; Y: o& N6 Y3 M5 Q$ P    health: HEALTH_OK+ j+ {  r- ]4 `3 X
- F' n3 E$ X- A' C. T
  services:
5 d+ W) T6 a# r; H/ k+ G    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
- R; `1 {8 ^" M! Z/ ~    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute051 B* g$ u8 W4 H3 y5 k5 E
    mds:  1 up:standby
0 {+ H. G7 ^7 n    osd: 32 osds: 32 up (since 112s), 32 in (since 3M)
& h. C7 m5 P" H- y  c% L) u) e9 H; B, X
  data:5 U% e) O0 [$ ~3 Y3 g3 D
    pools:   7 pools, 3712 pgs6 a! y: m; Z2 r! S0 U/ m
    objects: 1.71M objects, 6.5 TiB7 z% Q6 V) A8 K8 `. ^9 k; m& J8 h
    usage:   13 TiB used, 131 TiB / 144 TiB avail' c# P! U* i6 r$ ]2 Q' N( D
    pgs:     3707 active+clean3 V+ X7 e5 m  \0 B# V% K
             4    active+clean+scrubbing+deep* o4 i8 ]9 R# o0 `. c+ _, I/ g
             1    active+clean+scrubbing+deep+repair
( u) H: c6 o3 t* F- ?4 h+ ^- Z) F# ^3 k5 J. z. L# N9 [- i
  io:
1 m+ F) J% b" ^/ D9 i    client:   442 KiB/s rd, 9.1 MiB/s wr, 573 op/s rd, 562 op/s wr" G$ M% E, l* [2 W# b8 b1 L1 f6 a

: h& s) V( j) P" ~" [4 C  s[root@compute01 ~]# ceph health detail
2 w9 C6 t/ ^2 O' _8 m, T) G& u: ^HEALTH_OK
) I: h1 Z! ~- ?4 t+ s5 C$ i0 X5 Y, g) i& ?* B% }; K" y
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-11 22:58 , Processed in 0.045446 second(s), 25 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表