找回密码
 注册
查看: 599|回复: 0

HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent 解决过程

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-8-8 09:19:38 | 显示全部楼层 |阅读模式
[root@controller1 ~]# ceph -s
+ c% o% D+ y) S2 H5 F  cluster:- H- {2 G" z- j
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482' }: U" T3 c. s* J$ X  p5 L  x
    health: HEALTH_ERR* x' t& R) D: w4 D$ D
            1 scrub errors* K! z! D; F' H) L9 R
            Possible data damage: 1 pg inconsistent
( e& Y0 [8 b/ M- e# \: Q( |0 c& {9 \& _, |2 [
  services:
0 j3 B5 s8 B7 ~    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
* J$ f6 c2 j( l! ~6 k* h# j% T8 O    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute05
4 K% n" t( i9 F6 ~0 p8 w" @    mds:  1 up:standby
. e$ y; \/ \6 y9 z$ S- ?; ?% z    osd: 32 osds: 32 up (since 4w), 32 in (since 3M)
, o) C1 F. y; b; e$ B: W, b! C* Q$ K5 R; ~/ \% g8 o9 [
  data:
& z! i( B) v/ E: Y# t    pools:   7 pools, 3712 pgs
% s( b  i0 p( _6 G+ a" ^% b" v- i7 ]    objects: 1.71M objects, 6.5 TiB
: {" n# j: u2 q$ z6 X$ o- G    usage:   13 TiB used, 131 TiB / 144 TiB avail# P# T6 o+ o# ^4 C+ J$ {! z
    pgs:     3707 active+clean1 k. K: \; j$ F& j  K3 D( L8 A+ H5 \
             4    active+clean+scrubbing+deep
* [3 L. ]- f( J$ J# K             1    active+clean+inconsistent$ E/ e8 A# Y1 e, v, i0 B
/ a7 G; {1 E1 |: h+ B; P2 a& p3 Q
  io:3 v0 A7 a; R2 \4 ]
    client:   290 KiB/s rd, 9.7 MiB/s wr, 400 op/s rd, 577 op/s wr
: K% _/ J( g. P2 F1 v
0 A. _9 \: L) m[root@controller1 ~]# ceph health detail
* W% V. X0 a& p0 I- q/ Y- CHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent5 Y) y. ]6 e7 d) }6 V
OSD_SCRUB_ERRORS 1 scrub errors
8 P7 W8 X+ r! U5 o3 j1 V' cPG_DAMAGED Possible data damage: 1 pg inconsistent$ n% @' m- V1 `& U2 @) n& x  X
    pg 9.f1 is active+clean+inconsistent, acting [11,9]
( M4 W# X! V- D查找osd在哪里?1 N+ S4 Y3 I; {  Q& V: p0 P( O
[root@controller1 ~]# ceph osd find  11
& P& r! R" p' @: m1 N: t3 e{
0 \$ o3 X9 Q: \/ E2 a    "osd": 11,
7 M3 \* V/ R7 F& [" ?3 o+ E1 O    "addrs": {
" S7 r. ~# p# @( ^: D- }* O        "addrvec": [& I* W- }/ n$ R" Z/ X0 f7 U
            {. i' p5 N; P7 y. \: [' l7 s
                "type": "v2",& L2 s7 p" H% B- T
                "addr": "192.168.0.79:6800",4 d* s% @& K7 _' i& f
                "nonce": 21949 k; e9 ?1 Q6 l: d- t* Y8 a) J7 U
            },1 ^: Y4 Q8 F' u% x
            {
% e  f& ^  R4 o+ E6 S6 m                "type": "v1",# c! j- b( M* J" h
                "addr": "192.168.0.79:6801",# K) O' y' w* q! \3 d, w9 F
                "nonce": 2194( H' t* j; s. Q7 |7 p
            }8 P! e7 R5 T6 m8 H8 ~- I$ A3 f+ \
        ]
- s8 n  M& H1 r9 V: L# F3 ^, p9 z  B( W    },+ p( D) [2 ~6 F1 L
    "osd_fsid": "5bf2cf92-b7bc-45ee-bfc9-6a1cf553a1d3",
3 W& J$ i4 ?6 B! h+ a& p* g    "host": "compute03",
3 I; R' x: u$ E, J5 U    "crush_location": {
  ^6 L; H, }' R1 ?  d! G        "host": "hdd-0.6T-compute03",
+ y: e5 @; [& T5 D8 c, x        "root": "hdd-0.6T"8 a2 I5 B6 g1 _* x
    }5 ]' ?* T4 k+ S, g" m
}
9 }+ o' q; L& T0 _1 f, K[root@controller1 ~]# ceph osd find  9
5 D& n( s( W( @; M{
% L5 C5 U- h5 m3 N% j# F    "osd": 9,
8 v$ a4 a( C4 f9 P  i3 {    "addrs": {
  O, }7 A" t! L' w; G) L) ~" N7 l$ R        "addrvec": [
3 J0 y' b" V1 t, `. C7 |            {
. w5 ^: G. ?' z# @& N4 ^                "type": "v2",) K+ D4 f- ?/ [  Y$ O. l
                "addr": "192.168.0.75:6800",
+ g0 i0 |% q! U! i. H$ t" t                "nonce": 2046* [1 D' X9 O$ w% G$ `. ]
            },
) S9 L3 m7 W4 k; O, f            {
8 d5 `# f" J- _! s                "type": "v1",
% A& F( y0 U& J2 A9 u                "addr": "192.168.0.75:6802",
0 x! a& J: O# d3 S                "nonce": 2046  U! H- T% E5 i+ E1 W5 ~
            }
6 v- K. [6 t; r; Z6 ]        ]  }% X( e9 z& }; z  U
    },
) i% W8 [8 G; ?& r% q# n' T) M% R    "osd_fsid": "40efbcad-2acc-4e43-82d0-da8e473054a1",4 N6 b4 D9 o' n* E# ]9 t
    "host": "compute01",/ z! r9 p( ^1 a
    "crush_location": {
, M2 o( {- O, K( l" [8 `* m% ^        "host": "hdd-0.6T-compute01",
; I; z9 u7 {9 k2 G6 h        "root": "hdd-0.6T"# J/ S& e- [) y8 ~$ p2 ?7 u0 p4 d
    }
6 Y# }! r& g) S# ~}
; y7 @' X+ \7 X, e* ~
! |- d& O  [% i! H- ?& T- n" C9 G  O$ b
登录对应节点进行操作:( \- b, p6 F% r& ?2 _# D: K
[root@controller1 ~]# ssh compute01# g% M  `* o. z. }
Last login: Fri Aug  5 12:35:04 2022 from 192.168.3.28
5 m/ _8 v. K$ P$ r[root@compute01 ~]# systemctl stop ceph-osd@9.service
+ f4 S4 @' Y! N% p, U$ s  N2 B, M$ C' r- n5 n# s; M6 b" H/ R4 g
( m9 J/ E4 w+ E; X
[root@compute01 ~]# ceph-osd -i 9 --flush-journal" T( ^8 C$ c; m6 M
2022-08-08 09:23:54.750 7f714bfc2a80 -1 flushed journal /var/lib/ceph/osd/ceph-9/journal for object store /var/lib/ceph/osd/ceph-9
  Z" y/ ~" K! l: a; H[root@compute01 ~]# systemctl start ceph-osd@9.service
# C0 [% v7 Z) |
( R4 `- B/ H6 G! S7 B3 O% E3 p
" P& U: T7 O% ?! E" \6 s[root@compute03 ~]# ceph health detail
" w9 `8 ?- `5 h% W% hHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent; l. D- @) z0 [  |2 U% N
OSD_SCRUB_ERRORS 1 scrub errors/ l; J$ N3 s/ n, x+ D
PG_DAMAGED Possible data damage: 1 pg inconsistent$ Q6 [, o# x7 v% V* N9 |% R) H
    pg 9.f1 is active+clean+inconsistent, acting [11,9]
1 s& q" ]! d& W; X' e  }4 x  i% H
[root@compute03 ~]# ceph health detail 7 p# J& S& U# o) S
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent
) A! t, q, }: P% D( `6 E$ b/ q3 Y) zOSD_SCRUB_ERRORS 1 scrub errors
5 \6 a2 t9 m8 F! t! Q; ?3 yPG_DAMAGED Possible data damage: 1 pg inconsistent
, M4 p( t) _3 u1 v+ m+ P" [- E    pg 9.f1 is active+clean+inconsistent, acting [11,9]
3 c7 H# Q; Q# q; Q1 T
; f5 b9 t  U5 {7 p) b- m使用repair方式修复:
4 i( B7 ~2 a3 U- }  v( t1 r. T* M$ ?  h+ J# u( Z1 k5 w
[root@compute03 ~]# ceph pg repair 9.f1
. U" r% w  }  Q  W6 u8 I, e0 Cinstructing pg 9.f1 on osd.11 to repair- F# Y4 N9 L8 a2 s5 X

4 N& i+ W0 v3 u7 C[root@compute03 ~]# ceph health detail * p7 R. C" X( U/ S+ H5 F/ S7 u, e
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent
9 U' w) }/ P3 _, u1 vOSD_SCRUB_ERRORS 1 scrub errors0 L2 G+ z: R6 h. E/ T
PG_DAMAGED Possible data damage: 1 pg inconsistent
* n. D! ]  Q5 c' E$ i) A; ?; n    pg 9.f1 is active+clean+inconsistent, acting [11,9]) }* D* j: I$ R' w

+ H( r* G. g7 j! f) }; h$ P$ Z[root@compute03 ~]# ceph pg repair 9.f1% i8 z* _( @5 i9 Z/ Y. f- @) T
instructing pg 9.f1 on osd.11 to repair
6 G* }5 s" m) D
0 o3 W, I9 c* Q8 j[root@compute03 ~]# exit* A& U5 k: n1 E
logout
: q. {6 B& X: ]4 K5 g8 yConnection to compute03 closed.9 ~5 k# x: E% S$ l- T

$ a. b# j6 j& J& X- k, e: [& P: X' D[root@compute01 ~]# ceph pg repair 9.f1/ S* E; Y- Q2 Z/ K7 w9 Y- S- V* r
instructing pg 9.f1 on osd.11 to repair; y) J: _' U/ Y' O$ n& f! ]/ y

7 D) j7 }: d* x( ?7 H等待一小会同步后,即可:, |: J7 c( w9 Q: `/ a; d2 V+ s+ Y  [
[root@compute01 ~]# ceph -s
# U% }0 M3 k# A$ v# V3 s  cluster:
( R7 y; b6 f7 B" A: l: ]6 e    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74822 g6 s: [( H4 Z% i. m
    health: HEALTH_OK5 N- l2 X5 G; }" G, o: I3 b8 b2 ?( ?

* a1 I) o: e- }9 d! ]$ Y! E9 f  services:
8 C5 V* U3 J+ d6 M: H    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
2 ~3 t# b7 n  z9 p    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute05
( K) D8 e6 y7 x  _3 c8 Q; n! j    mds:  1 up:standby
1 Q( g/ [* \* m- y6 M    osd: 32 osds: 32 up (since 112s), 32 in (since 3M)8 C# M6 s% ]0 ]0 I
. l1 i9 L2 \7 o
  data:( y7 k5 x* F* K9 @* Q3 [. M+ c
    pools:   7 pools, 3712 pgs
: N. \" b* E5 ^9 F; i1 C1 p    objects: 1.71M objects, 6.5 TiB6 _, y7 K3 ^/ T; O# e1 l+ c
    usage:   13 TiB used, 131 TiB / 144 TiB avail
* {- v8 P- E1 u7 s( D    pgs:     3707 active+clean+ w1 U! w6 R+ |
             4    active+clean+scrubbing+deep: \# p. {8 ?3 ^4 F- o
             1    active+clean+scrubbing+deep+repair( e- N  L. [: z1 E$ K& p3 T
# v5 K7 }0 Z! _
  io:2 Z  D5 J1 b3 r! j0 c
    client:   442 KiB/s rd, 9.1 MiB/s wr, 573 op/s rd, 562 op/s wr
$ g; \* Q4 n" b
1 F) B% g0 U, p% O[root@compute01 ~]# ceph health detail
/ e7 w( b' X- h7 |HEALTH_OK
8 f' A2 ~/ H. X4 Q6 Z& u- P2 i; F1 R" Z* L8 V# s. h$ p% O" h
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:12 , Processed in 0.050141 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表