|
|
模拟删除mds 元数据恢复过程,2 g8 c- L7 q0 q5 |( k% ?+ t t
测试环境ceph版本号: ceph L版本:
+ c7 o$ z% ]! q8 L) z4 o8 s[root@ceph-3 ~]# ceph -v7 I/ M# v' _( |: A$ O a
ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)
$ ^1 b; y4 G6 S0 ]
; l% o. S V1 G$ c I& V d s0 x$ _- _
检查osd的状态:3 B. n* y9 y' i( `5 `9 O" u8 B
[root@ceph-3 ~]# ceph osd tree
; D3 s# C0 r1 f8 W: I; iID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
+ ]! s/ e l" V-9 0 host root
: e; x4 Z! W( D4 R-1 6.00000 root default
; X0 N! _; k# M+ d# D-2 2.00000 host ceph-1
* C, e& f8 S9 P1 o 0 hdd 1.00000 osd.0 up 1.00000 1.00000
- R' r9 f* u$ N 1 hdd 1.00000 osd.1 up 1.00000 1.00000 9 a' X# W* _, U7 c' b+ ]$ F
-3 2.00000 host ceph-2
% p. H: @! [: y1 z. l: w! y: O. \ 2 hdd 1.00000 osd.2 up 1.00000 1.00000
, C* _5 p/ }; p# i; | 3 hdd 1.00000 osd.3 up 1.00000 1.00000 0 e9 S9 L3 K: x" k. r
-4 2.00000 host ceph-3 9 w }) I; {, C# v6 i) {3 k: W* N
4 hdd 1.00000 osd.4 up 1.00000 1.00000 : \) s& x& Y" c8 {6 ]. m
5 hdd 1.00000 osd.5 up 1.00000 1.00000
# F/ r3 `4 u' Q; A5 p E- [' v( v
ceph 容量使用情况:
' X" \ o$ ?- s& l7 w[root@ceph-3 ~]# ceph df
. u* Z/ T9 ]" d5 o) MGLOBAL:; l# x. F; L( B4 e8 ]
SIZE AVAIL RAW USED %RAW USED
/ x6 H0 ]$ P( |+ |5 V/ L% B 585GiB 535GiB 50.2GiB 8.58
/ d( } P/ r7 w! p: L. X' _8 I7 yPOOLS:, {" A- }7 W/ n2 O$ D) h
NAME ID USED %USED MAX AVAIL OBJECTS
z+ f$ \' L0 D) M( G" [" N metadata 6 8.03MiB 0 167GiB 23
/ ]; y1 Z3 a5 N; k/ ~ fsdata 7 16.5GiB 8.96 167GiB 5016 # h Q+ e3 k% V5 }& k9 ]1 [
recovery 8 2.19KiB 0 251GiB 21
+ b' T/ l/ h0 Y5 l8 B0 ^- d6 J, y
- h: L" f# E& C. K. `[root@ceph-3 ~]# ceph df
$ }# \" p. ~4 }3 P* ?" K' f7 S0 YGLOBAL:
* \. y/ K$ f6 ~; i! C! t0 ~. E SIZE AVAIL RAW USED %RAW USED 3 u3 P4 y4 ^$ }. H6 V6 f
585GiB 535GiB 50.2GiB 8.58
) C& N$ K, g$ A" v3 b5 [POOLS:: @* U; |" v. G2 P5 F$ a& d+ y
NAME ID USED %USED MAX AVAIL OBJECTS
9 z) j/ g( Y6 F9 o6 l metadata 6 8.03MiB 0 167GiB 23
( J0 y5 A* [( W; q2 y N fsdata 7 16.5GiB 8.96 167GiB 5016
; q) e9 |2 L" B& U) ^* p0 A recovery 8 2.19KiB 0 251GiB 21
C2 M; B1 k3 C/ J x" q, D9 ^) T3 n
! g1 k2 p6 q, R R- G9 S8 ]( u5 y
6 t3 \. M4 M! {, n' P
查看metadata的数据记录:
! m, T: H4 V) H( N8 w |8 @& F[root@ceph-3 ~]# rados -p metadata ls
7 I! |; @$ j) a0 W1 N; l601.00000000
/ d3 {# j, B; b. B602.00000000: v7 y' `4 J0 t) H
600.00000000
! v$ r4 w# k! H: c( Z3 z603.00000000' c! ]. S* D) b2 w2 w9 y
1.00000000.inode
, M8 d: i/ e. Z1 C: o) d' Q$ _200.00000000* o, i6 n5 l+ |" r, f& `
200.00000001
" e4 h8 Q% W. j& b' n) g' G606.00000000# i2 U) {7 `8 b5 n# G) o
607.000000008 k* y6 s8 e/ q4 y& c& c
608.00000000
' U( a! ~6 }% N) p604.00000000% U1 S/ j6 S1 s% k7 C" M
500.00000000& T8 f/ M' A! P/ T$ Z
mds_snaptable
) c* p/ Z" C! e605.000000006 ?) a5 u$ f0 p6 N5 \) Q
mds0_inotable
0 ?& m% S& O: d; W4 z$ @; \100.00000000
7 R" y7 y" S$ b5 f7 L$ Y% o5 gmds0_sessionmap" |! G4 s' i9 W' {9 c
200.00000003# c& p! f8 H l) u# N% d$ f1 c4 p
200.000000024 s0 p" m) x% m: {" g+ K
609.000000006 j. n6 q$ q4 t1 \" f5 ~! K
400.00000000$ o4 S) T! r. t5 K
100.00000000.inode
6 ^5 i/ c" v }8 k0 W$ X1.00000000
k% M# i J4 g
) o) M/ I- Q8 E! S$ |4 B8 K5 t2 o- `
模拟删除metedata pool池的源数据:
; W9 I' p. e, I" X. P[root@ceph-3 ~]# rados -p metadata ls |xargs -i rados -p metadata rm {}5 C- I, g( R' B) H+ ]
[root@ceph-3 ~]#
& R+ d7 k2 u0 P, R检查还在不在:
2 u+ I, U0 t& ?& d [root@ceph-3 ~]# rados -p metadata ls
1 W* h. a. | H' v; e O5 I[root@ceph-3 ~]#
/ i, i% U" J" q0 y+ f' K p/ j( _6 b$ x
7 D7 C( z, v. x# H; I* g
检查状态:
) B" L \4 x1 m, m" I/ f e" K" y Every 2.0s: ceph -s Fri Aug 5 09:37:17 2022% l' E! d2 e. T4 f; ?
cluster:( }1 U! ~$ L, e. g- Z( {
id: 57bf4711-2218-46af-99d6-9c68ae230ce1" j3 R. V* c [, a6 P
health: HEALTH_OK9 ?1 c+ _" L! E& _" u" i. J+ R C
5 [6 n8 n" Q d8 B services:
! E4 @! L) r7 L+ }4 [ mon: 3 daemons, quorum ceph-2,ceph-1,ceph-3
8 G) e# N9 E- v. b9 `7 ]' m mgr: ceph-2(active), standbys: ceph-1, ceph-3" ^' |2 n4 x' ~. G% _
mds: filefs-1/1/1 up {0=ceph-2=up:active}, 2 up:standby
( x0 A) U; U% k D( T5 H* ` osd: 6 osds: 6 up, 6 in
! J* H0 i) D# N p, v& S7 H. b6 `" R K3 j0 `2 d
data:
/ x) N0 q- Z' X# A( v pools: 3 pools, 1280 pgs2 F: J' {9 N9 O* b8 |# K' Q
objects: 5.04k objects, 16.5GiB, l1 C7 T/ w6 X: t# ]
usage: 50.1GiB used, 535GiB / 585GiB avail
& p/ T6 \4 i4 e# L6 g7 T pgs: 1280 active+clean
; E, b, {/ _+ [) t* z7 H( Z. E0 @
) i) H( X/ u2 f# B: [- f
* h. v1 g4 a6 Q l+ l7 Y' X看到metadata池里面没有对象了已经,重启下mds看效果,因为mds里面会缓存元数据信息,所以要重启下mds:; r; V. z* E: z' s
1 Q! L8 ~; Z; ~6 z2 I9 P[root@ceph-3 ~]# ceph df
6 ~) M7 K9 i; k; n3 P% t" MGLOBAL:. {+ o N* n/ e7 w
SIZE AVAIL RAW USED %RAW USED 0 \. U/ {3 A, w
585GiB 535GiB 50.1GiB 8.58 # s; A! \& Q6 Z; L7 R4 g
POOLS:
) \5 _1 N5 M) L- C8 \ NAME ID USED %USED MAX AVAIL OBJECTS
$ X+ f# c+ H, V! p7 a; h+ A4 | metadata 6 0B 0 167GiB 0 9 s4 b+ S9 u8 |+ U
fsdata 7 16.5GiB 8.96 167GiB 5016 $ V; M* [% I3 I1 Z1 k( _$ w6 m
recovery 8 2.19KiB 0 251GiB 21
8 M5 S/ Y. l8 W检查ceph fs文件系统:
4 i( f& X7 I Z [root@ceph-3 ~]# ceph fs status filefs - 0 clients
8 b( e3 I# d1 ]" O4 L1 M) R. ]======
5 \& l) c) k- b" H/ o+------+--------+--------+---------------+-------+-------+
# }3 h0 k" d7 ^* b) A| Rank | State | MDS | Activity | dns | inos |% K0 B. c1 a- B- H, E& \$ U
+------+--------+--------+---------------+-------+-------+4 G h/ A6 {5 L1 k% L
| 0 | active | ceph-2 | Reqs: 0 /s | 1001 | 1003 | n5 ~$ O- A, A# L* s& O
+------+--------+--------+---------------+-------+-------+
5 ]1 {) ]: x: w) j0 ~+----------+----------+-------+-------+
( d1 t/ ?6 k. w# B2 W* O- ?| Pool | type | used | avail |
: ?2 V$ l0 a5 U! ]+ M3 o) T& v+----------+----------+-------+-------+# d8 }/ h: ?. E+ C$ ~
| metadata | metadata | 0 | 167G |
6 F9 T. u2 ~& S, T0 _, Y- `+ \- U| fsdata | data | 16.4G | 167G |
. w+ T( D V1 @- i' D! L7 P+----------+----------+-------+-------+
8 Q+ d: J: o4 z" f4 a. o9 w2 d
( B2 A7 O/ }0 `" I* i; P6 F+-------------+% |* e$ J# U$ r& G6 m$ F( {: C* {
| Standby MDS |
/ `: F; K8 o2 P+ x3 S4 C+-------------+" G+ m* K @4 A0 Q1 ?3 X
| ceph-1 |8 l) z; g1 j r; Y9 E A- T+ g
| ceph-3 |# p0 [) W) X+ l5 {- Y
+-------------+* C+ Z4 f: t% [ L8 C/ O6 Y/ h
MDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)9 K3 T1 o0 j$ G; ~* d- U# W/ {
[root@ceph-3 ~]# " J# q3 c& {2 H9 G7 s3 w. o+ {
9 S6 |% m4 Z% s9 d重启mds服务:
' D$ B" O' D0 j* ?; F- G[root@ceph-2 ~]# systemctl restart ceph-mds@ceph-2.service e: l' N& D" X1 B
! L' X3 B- T, H8 F* k7 Q
" A1 l+ Z k: A* [# D8 A查看ceph 的状态:7 S# J6 F0 Y% N, O, X5 m, n: b
$ W. X1 W! r. p7 K3 q
[root@ceph-1 ~]# ceph -s, S: w4 M0 N( [ W ?4 M
cluster:
5 v9 R* J$ Z# ~6 g' o6 ?+ M4 I id: 57bf4711-2218-46af-99d6-9c68ae230ce1
% w( n, [# K& Y+ ?: \ health: HEALTH_ERR3 U6 o2 `' [5 \5 j& l
1 filesystem is degraded/ \7 [7 M* e& U$ D& f. ^
1 mds daemon damaged
2 A7 K! F* U3 b; B
6 r( g0 r% S* y d services:$ ^+ @0 z) q% S6 \; W% j
mon: 3 daemons, quorum ceph-2,ceph-1,ceph-33 Y" S( a5 w6 s ?. `
mgr: ceph-2(active), standbys: ceph-1, ceph-3
: _$ B& F; M0 E4 B; M5 A mds: filefs-0/1/1 up , 3 up:standby, 1 damaged
& ]5 a& d) [1 B5 b( B$ C* c1 M8 c osd: 6 osds: 6 up, 6 in1 F, p* y9 f6 r1 C2 a6 K
+ l3 }2 ^* q) n4 u# e5 ?4 `
data:* A& G: J7 \6 e8 V$ h) {
pools: 3 pools, 1280 pgs, N; \: T5 C1 A! k/ T. E
objects: 5.04k objects, 16.5GiB
" `& `& ~2 {, m5 ~2 t ` usage: 50.1GiB used, 535GiB / 585GiB avail
* N4 @# l" a+ I+ b! b8 V1 R pgs: 1280 active+clean
7 [; R) }4 M+ x* u) q! D- A/ [& A7 r& M% |% x: p4 F0 v2 D2 s
[root@ceph-1 ~]# systemctl restart ceph-mds@ceph-1.service
6 n4 o3 k# z3 g8 D[root@ceph-1 ~]# % h6 G' i: T- Z" n. U# h8 Z
4 A6 Y, f5 O5 [( V' J) ^( Y
[root@ceph-3 ~]# systemctl restart ceph-mds@ceph-3.service / \% K7 \/ _6 m( }- h0 G7 I
: Z2 r0 Y# L4 F[root@ceph-3 ~]# ceph fs status 5 ]" R% [7 [6 G/ J% p) z
filefs - 0 clients
3 E% x5 o6 e( l. Y6 \2 O+ G======) M) E& t* u/ q$ e. n0 {
+------+--------+-----+----------+-----+------+ H: U" N/ H1 m4 Q8 f4 \8 P4 k
| Rank | State | MDS | Activity | dns | inos |) M, o @. z- F* e( G m! |4 Y' t
+------+--------+-----+----------+-----+------+/ C8 F3 e5 Z$ P7 p
| 0 | failed | | | | |
, x: p4 R8 o' @5 G+------+--------+-----+----------+-----+------+
2 }& y8 |) @/ d7 ?+----------+----------+-------+-------++ t7 k2 O3 ]" _/ W
| Pool | type | used | avail |
! f) d1 _; U! s* }+----------+----------+-------+-------+
4 v7 y- X4 o6 p) U$ c% q3 i| metadata | metadata | 112 | 167G |
! [+ r- e' g1 n/ J, f| fsdata | data | 16.4G | 167G |- m8 m- k' {( ~
+----------+----------+-------+-------+) P& g* H4 b6 j6 }: W5 d4 I
h& w1 w: M3 t9 k+-------------+, L' _/ y3 i6 r D% l# u4 y$ M. w
| Standby MDS |- i m t9 e3 _, O3 B, b
+-------------+
9 j+ A l% t6 t& N* P9 Q| ceph-2 |% j8 K! B `* f4 H
| ceph-1 |, e$ x( d1 m5 v" Q
| ceph-3 |
$ E/ X/ \$ Z9 U4 ]5 y+-------------+; T: b- u# A! R3 d
MDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)* h4 i2 [, a+ b" G, N8 y
[root@ceph-3 ~]#* k3 ?# z* J4 j- f" ?
看到集群现在不正常了,访问kc里面的数据卡住,说明数据已经无法正常读取了。7 T! R6 [% e# s# ]' r
/ v7 `) E% r# r$ n6 p
# ]* n# u! q- m[root@ceph-3 ~]# mount -t ceph 192.168.120.31:6789:/ /mnt/mycephfs/ -o name=admin,secret=AQBH+tRiATMVCRAAdTQnt4IFUWD45zGEZQa7A==
* v, o" R* b3 d$ g6 Csecret is not valid base64: Invalid argument." `7 m9 {) J B, {$ T
adding ceph secret key to kernel failed: Invalid argument.
& ]; |# P5 p/ U$ o. Rfailed to parse ceph_options+ B+ D7 H8 u, C: v0 ]- I5 L
3 \4 k, x; y: u' D/ I[root@ceph-3 ~]# ceph df detail
' m- y; r6 | j# U3 q7 aGLOBAL:
8 ], q+ v2 ^9 a& l5 E( e, p" _ SIZE AVAIL RAW USED %RAW USED OBJECTS 5 b+ |1 L& h: Z4 H/ i5 b
585GiB 535GiB 50.2GiB 8.58 5.04k * }$ c6 R, e4 p/ h7 z x( a: x+ s
POOLS:9 W- a4 m3 \6 p/ X( s
NAME ID QUOTA OBJECTS QUOTA BYTES USED %USED MAX AVAIL OBJECTS DIRTY READ WRITE RAW USED # Z+ G* l5 [- x% d0 P% @
metadata 6 N/A N/A 112B 0 167GiB 2 2 176B 1.25KiB 336B
2 Q. S- e7 _$ D7 b( ` fsdata 7 N/A N/A 16.5GiB 8.96 167GiB 5016 5.02k 3.19KiB 4.91KiB 49.4GiB
/ f$ I( k9 E% h* b6 \. t& h recovery 8 N/A N/A 2.19KiB 0 251GiB 21 21 32B 44B 4.39KiB
) p0 D0 F7 [" L( |1 Z- u C6 I[root@ceph-3 ~]# 8 P0 m Y; J5 k* U/ Q% o
2 E( w( e: i- Y1 i: P) d
开始恢复使用一开发者编写的py脚本(文末给出了源码)恢复,把脚本放到集群任意一台节点上执行:
* v# R$ ]" x+ X f1 Q[root@ceph-3 ~]# python recovery_cephfs.py -p fsdata
7 E! {, A7 {7 N: _; l
: h. I! ?% g/ l' n$ _在日志里看到的问题:9 }0 l( g: |. v
2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done& s. j3 w( h4 M3 h7 i5 _/ w4 a J
& ?1 o5 Z2 K. ^1 p- O- l
) E5 A9 Q& e5 F8 c
) H7 |3 }& C& I, E( c[root@ceph-3 ~]# tail -f recovery.log
7 ~+ b) N/ d$ l! A- R 7
6 u) [& L6 U+ k' y' ] ],
`, B5 R( p, Z- `1 l1 k "data_pools": [6 l2 J$ G) c; R$ V
"fsdata"
( o: Q: Z. I3 p% T: t/ { ]# [5 C: W4 q; v0 l7 \
}
; ~: W" a: O/ D# }& Y; P5 U* K. w& C]
6 e6 M1 ]( X7 \/ x" I% d* C, code: 0- w J9 e }: J& K* W9 ~4 S
2022-08-05 09:59:03 : INFO === main():: recovery start
# D+ q" e" h! l+ ~# Z2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done
7 I9 n3 c$ c6 }. Z* [* u, U
' e* W6 x+ i5 E. X; F( G6 V$ H) |
2022-08-05 10:35:36 : INFO exec_cmd():: cmd exec out: , code: 0
G$ L1 g N) b0 q5 z2 L2022-08-05 10:35:36 : INFO exec_cmd():: cmd: ls /root/recoveryobjs
, u f7 N3 E9 E) C
' ?# K N5 r' E8 {, B8 \2 S, ^3 U
- O& z% i' T( o- ?
+ f5 |; p! a& [; g* j- O) X4 [
|
|