找回密码
 注册
查看: 597|回复: 0

HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent 解决过程

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-8-8 09:19:38 | 显示全部楼层 |阅读模式
[root@controller1 ~]# ceph -s
& X* d! H( {. x+ Z% \  cluster:
. G3 C4 @9 N0 y3 c    id:     2af51d38-db90-4a57-a43d-ea9f6ebd74823 c3 \% U; \! \& d! ?
    health: HEALTH_ERR& h1 t3 |; K( x9 A6 j! j- a
            1 scrub errors: F/ o  b, C: J: q
            Possible data damage: 1 pg inconsistent
* S, f/ Z" |# Q0 M" I% z. o/ J. z: \4 w( @
  services:+ ?5 B5 J- R; n" |& `
    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
* D+ o! w( B8 |$ ?. W) l4 m  U& T    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute05
: C, y' G$ E3 m* N" C2 y/ U. Y) E    mds:  1 up:standby
  w% t5 M2 @$ O9 g% h, @' L- y' q    osd: 32 osds: 32 up (since 4w), 32 in (since 3M)" p# }- m. w5 r! l

0 J- z" u4 L- L8 @) T% B- }  data:
: b: }1 r4 A) p0 q- h; Y% ]8 i  ]    pools:   7 pools, 3712 pgs- j9 i- g: k% _  n0 \2 ~" b
    objects: 1.71M objects, 6.5 TiB. t7 n- v$ p. A3 N8 w
    usage:   13 TiB used, 131 TiB / 144 TiB avail3 @: i2 ~1 }7 y6 w9 u5 A
    pgs:     3707 active+clean
+ ?3 C7 h+ N+ V% `             4    active+clean+scrubbing+deep
+ T6 \- C( p/ r. i             1    active+clean+inconsistent
) H0 O  d3 s" C4 S6 p8 u) T2 A# q- D. m# b/ [- _! S
  io:
/ G- O8 K+ ?6 h% W( I( h7 f    client:   290 KiB/s rd, 9.7 MiB/s wr, 400 op/s rd, 577 op/s wr
. D- F9 B6 h7 ^( V' ^* G$ G
; n% `. V. t2 I8 p6 j: x[root@controller1 ~]# ceph health detail 7 q7 H! y0 z" H# A' F! ?/ ]' ?9 S
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent
( L( M) s+ T$ @OSD_SCRUB_ERRORS 1 scrub errors! x/ f3 H1 z: V- Q5 I( j& E
PG_DAMAGED Possible data damage: 1 pg inconsistent
& _3 ~$ V# U9 ~# s+ m6 {# J" T+ Y    pg 9.f1 is active+clean+inconsistent, acting [11,9]
5 v" ?2 r6 A4 V4 z1 w1 b* {9 u查找osd在哪里?8 b) T4 ~% R' o! k2 t/ i% k
[root@controller1 ~]# ceph osd find  11) ^! I7 F* j- \+ t
{
* t5 ?$ Z" R% e    "osd": 11,: s7 G0 e" |2 l3 u+ |' F7 h! t
    "addrs": {
: B) s5 Q  Q% X( V        "addrvec": [
3 J2 N. R, r1 h# z1 L            {1 w1 o: `# c8 \
                "type": "v2",
$ }: Y- K, @1 ^9 l                "addr": "192.168.0.79:6800",
, a. H0 @' r1 G- M" Q+ C* ]                "nonce": 2194
& ^3 i! j. h, B            },+ D  F; d; D& v- A' H. d
            {
! q, c" d( b( s' P6 l7 Y) ~; F                "type": "v1",
% p) L3 n9 v' b* q) l4 t                "addr": "192.168.0.79:6801",
4 B$ ?! I* I" T& ^- Y* U                "nonce": 2194
4 ]4 `/ |. ~, x/ V5 M. u8 y6 p            }
, p% _! H/ Q1 t0 T6 v        ]
9 i: ^5 D5 ?/ g" Y9 h! ~    },9 L' Q- R7 u0 ]1 f3 D; S- o, Y9 c6 d% W
    "osd_fsid": "5bf2cf92-b7bc-45ee-bfc9-6a1cf553a1d3",
9 f; ~" x, C& D" }2 O/ S& N0 l    "host": "compute03",
# m& R+ v" g6 b7 K+ D    "crush_location": {" r1 X: v3 g3 O8 N
        "host": "hdd-0.6T-compute03",* y: W) F/ R' A5 `  n5 Q9 b
        "root": "hdd-0.6T"
7 m. m- b: M' `6 {( }    }* s. }: M1 S2 j: z. w" C
}
: ?) H5 u+ r, }3 m. h! O[root@controller1 ~]# ceph osd find  9# s) L3 s! K- L: S% Z" D7 M
{
2 H5 `! S9 b( M2 H) D    "osd": 9,
! T& Y9 C, R2 M( j' Y    "addrs": {+ S* u& }; Y& u: t* R  W
        "addrvec": [1 |+ |7 c/ H0 T% A, _
            {
3 t% s* [9 }2 `4 J                "type": "v2",
3 z2 a! c8 s. r5 j' ?" h                "addr": "192.168.0.75:6800",+ H7 ^: t! p+ N% x9 r
                "nonce": 20464 T! B% X: ?6 M
            },9 Y: y3 u+ A0 {, M# ~& l
            {" J: M$ T6 R1 n
                "type": "v1",7 f9 p3 u  V: P9 A- @4 _; p, J7 z
                "addr": "192.168.0.75:6802",4 `1 m8 c# l1 D8 q) P4 S
                "nonce": 20460 U% E" F2 t: X2 \! T
            }
/ j9 R$ u6 \: Z; }7 u        ]
$ u% Q8 m. j# q3 M    },
; E) \7 }% `) I- {& I5 X    "osd_fsid": "40efbcad-2acc-4e43-82d0-da8e473054a1",* ^+ g4 c; u# _( `$ k  y
    "host": "compute01",
9 d8 n4 q% X7 r    "crush_location": {( x% C. F/ I8 a4 ~: h5 u- I+ M
        "host": "hdd-0.6T-compute01",
3 |$ Q# |7 `  K$ g" ^8 t" J        "root": "hdd-0.6T"
% c5 z  x# N( m0 d0 ?5 M    }
) M+ R* M! ^, d: e/ ^}
8 I) r4 E5 _! ?/ p, h/ d: @( v
& T3 v/ i& B# l" }  K& v6 H/ `/ R! q- V- d* ^
登录对应节点进行操作:# t3 l' P. v: O7 B* I) F0 N
[root@controller1 ~]# ssh compute01
- s3 q/ c! M! U% _* r' S& lLast login: Fri Aug  5 12:35:04 2022 from 192.168.3.28& c3 W1 N* r: w/ B6 V3 N+ f
[root@compute01 ~]# systemctl stop ceph-osd@9.service % A6 T* T, f& \. b7 i. M9 ?- i
6 ?: n6 ^" d4 v8 ]3 u

: O. _4 E; d3 {[root@compute01 ~]# ceph-osd -i 9 --flush-journal! Z: f' [" Q/ k; v
2022-08-08 09:23:54.750 7f714bfc2a80 -1 flushed journal /var/lib/ceph/osd/ceph-9/journal for object store /var/lib/ceph/osd/ceph-9
6 G1 O' F0 t  k" _' {[root@compute01 ~]# systemctl start ceph-osd@9.service 8 x& H/ o* x/ @2 H$ y  B& ]

/ \6 i; L" R: Y) T5 X& ^$ Y
; ^' m# D- J4 v: f7 \[root@compute03 ~]# ceph health detail ( L- I) c" v) x3 h5 B9 a
HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent8 [& {& j5 M) ~2 v; Y& x# p) o8 |
OSD_SCRUB_ERRORS 1 scrub errors
9 Q& r; I; c& e( W* o( P! o9 DPG_DAMAGED Possible data damage: 1 pg inconsistent
& q% ]# g, P/ w$ a1 b    pg 9.f1 is active+clean+inconsistent, acting [11,9]
8 z7 D' B  q0 n2 `  }5 B: y; f" c& m, A  I& z- z2 o  S) t) X
[root@compute03 ~]# ceph health detail
- T6 h5 V- i. O4 \: P: Y; xHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent' z) a( N: y& [2 j: G$ v9 O
OSD_SCRUB_ERRORS 1 scrub errors
+ ]# I' x/ r. t( W6 P; DPG_DAMAGED Possible data damage: 1 pg inconsistent7 \3 ?, h( b$ X( b% j! q, c
    pg 9.f1 is active+clean+inconsistent, acting [11,9]& _; Q" t( D; _. M. p
9 E" _0 M6 G( W( r' x' g; Z
使用repair方式修复:" i) Y5 _( Y: X0 U! L# h

4 {* a6 }$ D7 C3 b[root@compute03 ~]# ceph pg repair 9.f1
- S5 N9 z7 ^7 m2 W. jinstructing pg 9.f1 on osd.11 to repair8 P' _& I8 m$ p8 W
* G; C/ ?$ I- A% [, u
[root@compute03 ~]# ceph health detail
! Z3 J& C1 x. m# M& t0 Q5 eHEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent
( {4 g  Q5 {+ z2 B. \7 _OSD_SCRUB_ERRORS 1 scrub errors) b/ s( [2 R7 C3 c7 q  C6 Z; ^5 p# P
PG_DAMAGED Possible data damage: 1 pg inconsistent
* Z  _* X( ^. {  ]    pg 9.f1 is active+clean+inconsistent, acting [11,9]
7 k5 K2 s$ B' j. W: U, v( a+ m1 R7 o, p& ^
[root@compute03 ~]# ceph pg repair 9.f1
. X. i' c- q5 r% }instructing pg 9.f1 on osd.11 to repair
6 a5 b8 s4 Y" T/ z. f* ?" s" v2 v0 x3 T" i/ f* W( ~2 k% d
[root@compute03 ~]# exit
; L: \8 X1 Y8 C0 x) l  Wlogout
2 g6 m( k) B  u+ _9 _% I" EConnection to compute03 closed.& P8 T3 S4 f/ u# I
0 p2 t) X2 Q; F
[root@compute01 ~]# ceph pg repair 9.f16 h9 Z; C. K$ j8 y# P2 @
instructing pg 9.f1 on osd.11 to repair
/ c0 }& J) S9 A, t
" A. h9 y! ]9 Q- f7 w2 G, _: p等待一小会同步后,即可:! g, @$ L& `! b! Q) i+ e0 o4 e
[root@compute01 ~]# ceph -s
& |2 _% `, ~4 O8 @* y  cluster:( B) S- y  f2 r( K2 l. j4 q! q. a
    id:     2af51d38-db90-4a57-a43d-ea9f6ebd7482
  ?: _0 o' G5 j. `9 W    health: HEALTH_OK; o8 R7 ?1 r$ e4 J. P
) t/ n( h7 |- }- O
  services:
+ H6 b* m( {  F. d' m5 H    mon: 5 daemons, quorum compute01,compute02,compute03,compute05,compute08 (age 13d)
1 F) ?6 A, J8 |! s( d$ s- A    mgr: compute03(active, since 3M), standbys: compute02, compute01, compute08, compute053 H2 C4 P4 c& n6 h$ O. S1 y) D
    mds:  1 up:standby
" }8 o3 C; R0 W: ^$ R    osd: 32 osds: 32 up (since 112s), 32 in (since 3M)4 t$ L* Q& k+ l2 k1 E
( P1 V' e" @2 v8 x
  data:
  u1 G" i" ?$ X2 h    pools:   7 pools, 3712 pgs4 T+ l1 O, C% v' j
    objects: 1.71M objects, 6.5 TiB
" m9 [3 W- H) s/ I: a) ?/ S    usage:   13 TiB used, 131 TiB / 144 TiB avail
5 S* ^, D4 k" L7 v  N    pgs:     3707 active+clean
# |. i7 i1 F9 k# \             4    active+clean+scrubbing+deep+ e7 v( \# z' ?' @0 j2 \, k
             1    active+clean+scrubbing+deep+repair
/ s( W0 n' s% W# v
8 V0 d1 G$ `) A  a, I7 |  io:
  I  w9 c4 G! Y    client:   442 KiB/s rd, 9.1 MiB/s wr, 573 op/s rd, 562 op/s wr
; n# N: W# S4 ~3 g4 b' @4 G7 M  C1 k' C! j$ E! j5 T
[root@compute01 ~]# ceph health detail ' n6 Z+ t8 r/ v# s
HEALTH_OK1 Z9 N) _* Q- l0 N; g

/ b% [1 A5 U; y& a# l
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-11 23:02 , Processed in 0.013535 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表