|
|
模拟删除mds 元数据恢复过程,
; H. r6 U# u' t. w测试环境ceph版本号: ceph L版本:
$ V& _+ ^ m0 p0 \% f. i$ ?$ r[root@ceph-3 ~]# ceph -v
) k$ O( z9 X w2 [5 h9 ]ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)
+ w }$ U3 {) d1 {) N. M
$ N( x- w) w; M% e/ P& {8 O( K' s0 F8 V' i- c7 `
检查osd的状态:" n# |; g! c! R, S
[root@ceph-3 ~]# ceph osd tree
3 v @2 A0 \ [# h: C8 UID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF % w* v T2 c/ f* e
-9 0 host root
+ r: e z K, g0 m9 V% n-1 6.00000 root default
8 K# j2 I/ R. s; L( B- u& `8 g-2 2.00000 host ceph-1 ' [+ h. O: [; {( V7 q: _6 T( j
0 hdd 1.00000 osd.0 up 1.00000 1.00000 , N$ m+ O3 o- \# R
1 hdd 1.00000 osd.1 up 1.00000 1.00000
5 T$ J2 M7 Z& I5 u, a-3 2.00000 host ceph-2 5 E, Y$ ^3 Q+ L* t- Y" u) V; T
2 hdd 1.00000 osd.2 up 1.00000 1.00000 $ \0 G0 P+ ?( R8 S" M- u
3 hdd 1.00000 osd.3 up 1.00000 1.00000
6 ^ d/ V7 S8 a- s; c. t) p( s-4 2.00000 host ceph-3 9 j# c# n5 O# h: `- b$ B
4 hdd 1.00000 osd.4 up 1.00000 1.00000
p/ G2 E1 v: W8 C 5 hdd 1.00000 osd.5 up 1.00000 1.00000
- p$ ~& H& w1 H$ `( S/ k
$ z' A& u8 G ~$ f5 z1 cceph 容量使用情况:6 p4 V' O, _# Q8 \+ n& q, k
[root@ceph-3 ~]# ceph df1 t2 \: W- w2 g" c( k. ~
GLOBAL:1 {: H7 S' u( Y+ K5 W% k
SIZE AVAIL RAW USED %RAW USED 1 L! S( ^6 x. t+ h
585GiB 535GiB 50.2GiB 8.58
; v3 v/ x, Z: T5 G6 r2 A" HPOOLS:# }; h6 W9 v! b% G$ z8 ?! `
NAME ID USED %USED MAX AVAIL OBJECTS
% l* r( c' }4 d" _ metadata 6 8.03MiB 0 167GiB 23
, Z; N$ {+ v# e1 z' k fsdata 7 16.5GiB 8.96 167GiB 5016 & k: B* Z) J" G# f3 V6 H e
recovery 8 2.19KiB 0 251GiB 21
# H: U6 l/ w: F; p4 A8 ~6 [
2 X" } e8 \0 P' E7 Z' G7 C% t# t4 a. Q
[root@ceph-3 ~]# ceph df + }* N* a' k8 ^+ `2 f
GLOBAL:/ { b0 m0 `5 @# d
SIZE AVAIL RAW USED %RAW USED
" w6 t4 b `8 c/ f4 ` 585GiB 535GiB 50.2GiB 8.58 , g; h: c) n. P6 b* q
POOLS:0 @( Q! R# N9 _! e7 B1 w
NAME ID USED %USED MAX AVAIL OBJECTS ' T( F* [5 @- Z6 B
metadata 6 8.03MiB 0 167GiB 23
. _9 H) H' _6 f7 W fsdata 7 16.5GiB 8.96 167GiB 5016 ) R M# A! J2 D) H# v+ q+ H
recovery 8 2.19KiB 0 251GiB 21 N7 h' ~. j9 u! |1 `
" W% g7 y% I+ G, k5 m
( x& u A( b3 Y0 I# ~6 W5 x4 O. M+ g% ^" B# Y& ]) ~
查看metadata的数据记录:0 N, T/ _4 @! L7 s7 Y; }3 A
[root@ceph-3 ~]# rados -p metadata ls
9 n/ ?8 e, T' `0 F- x# v, R601.00000000
3 J- Q* N# f4 g602.00000000
) M: A4 B% J7 u7 K) ~600.000000007 B+ i3 ?+ T' t" r$ i; v4 U8 h
603.00000000
' Q) c1 n5 d' O7 _6 U* @, r1.00000000.inode
, S3 \9 I1 i5 g! I [200.00000000
$ B! x8 ]/ I; s& z200.000000012 z) d' }: e) V$ [) I2 @
606.00000000
+ u1 C: m2 z& H: u& c2 L' V607.000000007 z+ H8 P: ?3 L4 `6 M3 ?1 U
608.000000000 \$ p" k4 q5 [3 B' l( k7 ?; U
604.00000000: n, \" D% C4 ` b5 O+ ~ m A
500.00000000
- f% `8 p$ T( U1 r: a6 R# @mds_snaptable
( n. W; `7 A' _) R+ N" F0 [' ~( N605.00000000
1 k6 x( n6 O4 A5 xmds0_inotable8 C( R( @) d0 h* E7 I) D. {9 @
100.00000000" q v* l% i1 m5 Y: t+ d
mds0_sessionmap
- {! w6 ^- ? {. E200.00000003
7 D9 S# X# @) i v& X, U3 F) G! o200.000000024 h- D% ~+ T9 I- K% q; q
609.00000000. L d" d4 C! o6 N: _
400.00000000& T& h& H; g* g- D z7 V: o2 J
100.00000000.inode
' g4 w4 A: W4 i3 `1.00000000! J& r* O3 ^! ?' B1 q; v
& ~+ T, g9 B, n! l7 H
2 X9 b0 O' E& B6 a; q模拟删除metedata pool池的源数据:
: o' `0 W& c6 V6 B& [1 G# F[root@ceph-3 ~]# rados -p metadata ls |xargs -i rados -p metadata rm {}
# |: x5 ]" D. _' x5 o9 J[root@ceph-3 ~]#
9 k0 a- D9 P. S7 l/ b4 d+ ~3 B8 F检查还在不在:3 ?5 S% k/ [0 x1 Q( U# ~
[root@ceph-3 ~]# rados -p metadata ls
& z/ g. |: x/ p: G; g[root@ceph-3 ~]#: x/ g& b3 K! U% G/ J& X% {/ i) x
" L9 W) i! N! d" e1 q, J
+ S& T" ~- U" O2 [( ?4 M" C
检查状态:
7 t/ d* m7 V$ Q3 u5 R Every 2.0s: ceph -s Fri Aug 5 09:37:17 2022
# q9 p% B6 T3 z+ p3 H cluster:
$ _4 F/ w# F4 J! m$ w id: 57bf4711-2218-46af-99d6-9c68ae230ce1! M. b$ I' Y" D+ R o9 P
health: HEALTH_OK$ R) e: `4 |0 [0 h* R/ A
% V& X' L# @6 v& U' o0 |: k services:% W+ |; h3 b# Z; V! S; ^
mon: 3 daemons, quorum ceph-2,ceph-1,ceph-3
1 m/ t8 A* l% |0 y mgr: ceph-2(active), standbys: ceph-1, ceph-3
' c; @( b- n3 P& s" x2 ^, Z mds: filefs-1/1/1 up {0=ceph-2=up:active}, 2 up:standby
( ~8 \% f; t4 Y# s osd: 6 osds: 6 up, 6 in- q2 x$ m' h; j1 @8 \, r
9 g& U) ]' i/ _. |& p
data:) \; y4 `* N7 _( E' G# w: x
pools: 3 pools, 1280 pgs) { p+ A4 T5 `) ?3 z
objects: 5.04k objects, 16.5GiB7 t, a0 T7 F5 S# E
usage: 50.1GiB used, 535GiB / 585GiB avail
/ |, s* T% ^, o- s# }, p4 l pgs: 1280 active+clean
, F" z( Q5 u# e3 ~+ B0 n- a) o1 g5 R0 \& V U3 [8 L( t- }
5 Y0 ?% F" R N. e* b
看到metadata池里面没有对象了已经,重启下mds看效果,因为mds里面会缓存元数据信息,所以要重启下mds:
7 Z$ R7 u; w1 E5 t
& }4 F# A6 M( _$ w' D, M[root@ceph-3 ~]# ceph df
) }8 ?$ Q, O# o: M5 y9 c2 b% d8 bGLOBAL:/ v7 E; X4 y: W- U
SIZE AVAIL RAW USED %RAW USED ' U1 e* `7 O# D$ m$ `
585GiB 535GiB 50.1GiB 8.58 * t/ o7 g8 ?5 k# r
POOLS:0 L" B: m$ S& {' R, M- T
NAME ID USED %USED MAX AVAIL OBJECTS " s* V0 H# D$ t: z3 z7 u. R
metadata 6 0B 0 167GiB 0
! s$ s `- H5 F N fsdata 7 16.5GiB 8.96 167GiB 5016
4 z" G3 P/ o! ^, ?* B recovery 8 2.19KiB 0 251GiB 21
/ b4 ~9 N6 o% ~) `$ e7 b( q6 G; H检查ceph fs文件系统:0 X+ W5 K3 {4 j9 B" c E
[root@ceph-3 ~]# ceph fs status filefs - 0 clients R( S3 O) }- b2 D. K
======( W5 G9 w* V/ ^( z) D0 e
+------+--------+--------+---------------+-------+-------+' f: Y( ?! y- h: v( s
| Rank | State | MDS | Activity | dns | inos |
* u! C' |+ h7 U6 x8 G1 l" K+------+--------+--------+---------------+-------+-------+
- j- K) ]. ~+ ~: y( i, n| 0 | active | ceph-2 | Reqs: 0 /s | 1001 | 1003 |
0 V1 T3 a8 L+ A8 f+------+--------+--------+---------------+-------+-------+
/ k1 |$ h: t: x/ \+----------+----------+-------+-------+
) i, x- x4 U7 H( g5 S. V. M| Pool | type | used | avail |" h: ]/ z: o8 i$ S$ h
+----------+----------+-------+-------+7 H, Z# F/ I5 M
| metadata | metadata | 0 | 167G |& @. U2 U& j% |$ n) s
| fsdata | data | 16.4G | 167G |8 [) X. R' [2 _- t: u
+----------+----------+-------+-------+
C3 D2 H9 U c1 B
1 r9 k4 l% w. P7 T4 v% M# ~+-------------+
4 ?8 j& Z/ V9 |: Q| Standby MDS |7 K$ S- b' G7 O4 U/ @
+-------------+
8 F2 P ]9 F+ G| ceph-1 |
# R! b0 d* e' N# D, i; n| ceph-3 |
) V" i H% F3 u8 S* J' y+-------------+4 T& F7 {& u6 N5 X2 a& S, g! o# e
MDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)
( Y: t8 y5 V* R' k7 Q[root@ceph-3 ~]#
5 ?- X7 ~9 V! w2 H" _" U, t; d1 u# l
重启mds服务:1 N4 i( M0 q5 k+ V0 Z. I' b
[root@ceph-2 ~]# systemctl restart ceph-mds@ceph-2.service # q7 d3 Y; H. Q( \0 O$ Z( b I6 }
& k+ y. x4 y4 O
6 S! |8 h6 _- t. L; H查看ceph 的状态:) [- U7 q) ^3 H1 o% S2 O, `
0 z( b1 @: b% ?, F6 Z' N7 \4 @
[root@ceph-1 ~]# ceph -s# {+ v# Z7 [ N4 F7 }# S
cluster:
: @8 }( O, M( A6 {- U" Z id: 57bf4711-2218-46af-99d6-9c68ae230ce13 C; @* t$ ~) H8 W/ |, {' Q
health: HEALTH_ERR& K" t3 [) p8 t4 s6 \* I2 G
1 filesystem is degraded! `3 M6 X9 n# ]& r5 H3 D) i s" D5 `
1 mds daemon damaged. R. Z0 P2 P6 A7 e3 D, G+ A
# O3 c6 _0 q) b' i0 o% ] services:
8 ~8 t7 l& i0 f5 L3 s. t$ y mon: 3 daemons, quorum ceph-2,ceph-1,ceph-3
0 K6 h. N- p+ R, {+ } mgr: ceph-2(active), standbys: ceph-1, ceph-3 X: I5 R; `, B8 N' w, f
mds: filefs-0/1/1 up , 3 up:standby, 1 damaged3 S0 J3 x5 \# |- F' y
osd: 6 osds: 6 up, 6 in; a" N, r3 g1 W# D) E% S# i
7 i) Z) z8 t: s- C% H7 d data:
4 Z+ ?) j ]% b6 P/ g5 f pools: 3 pools, 1280 pgs
X7 H j8 L) C( o) F; v objects: 5.04k objects, 16.5GiB+ B' Z3 Z; k5 b, q" L5 s
usage: 50.1GiB used, 535GiB / 585GiB avail6 M& }; R3 ^8 k7 {
pgs: 1280 active+clean+ M% o" Z9 L8 s: }5 E6 A
; d, v% x' y9 l' z
[root@ceph-1 ~]# systemctl restart ceph-mds@ceph-1.service
0 A2 i: y) ]! ~" N/ r2 W. J; i# q[root@ceph-1 ~]# + r! r) s! M" o% J+ w0 b. m7 s5 W
) @. V, |. [: i1 @$ T8 b! R# o
[root@ceph-3 ~]# systemctl restart ceph-mds@ceph-3.service - ?1 m- S" Z' p8 s" n& a! `
2 U4 H5 N: \" O/ K3 t% I& ?: s9 O# x
[root@ceph-3 ~]# ceph fs status
* B) ?8 Y& o% W9 afilefs - 0 clients
3 u4 w; n& h7 j8 C8 B======+ @( r" A8 g. ~; ^& i E1 P
+------+--------+-----+----------+-----+------+
3 w0 a1 L; @6 _- || Rank | State | MDS | Activity | dns | inos |" |$ J: S2 y3 U% v& w
+------+--------+-----+----------+-----+------+
' q8 Y9 h0 v0 |3 N| 0 | failed | | | | |
; z* E; \9 _ |$ t+------+--------+-----+----------+-----+------+$ | ]+ ^7 a7 t6 A( n
+----------+----------+-------+-------+
) f1 f6 D2 v' F% c$ R5 T| Pool | type | used | avail |# v0 y& v* C$ f+ b
+----------+----------+-------+-------+
3 G/ _- S' w: `3 X| metadata | metadata | 112 | 167G |
9 j5 e! C& C+ G) X| fsdata | data | 16.4G | 167G |# l6 d2 }+ w4 F7 r# I8 @8 q
+----------+----------+-------+-------+$ I# U1 c/ e" Q5 G
) F: z+ ~0 N/ A4 S7 t+-------------+
, ^0 A6 k+ D1 G| Standby MDS |; \. H; y0 A \" ^6 {6 h& \
+-------------+
+ V6 P* }; H2 ]! Q| ceph-2 |
' x9 v3 w7 z% X2 A6 o: v# }| ceph-1 |
g3 g1 ] x5 Q* c9 |3 t7 l% ^" G| ceph-3 |
) H- |& U7 ?( `5 u+-------------+4 M5 l9 J0 l+ G) [: X6 \
MDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)
) a) j2 o' \2 K: k, s& D[root@ceph-3 ~]#
$ J. H7 O) y7 |0 L: Z. ]! U1 u+ J1 {. M 看到集群现在不正常了,访问kc里面的数据卡住,说明数据已经无法正常读取了。
7 @( v& ]' I8 |' g
/ h- y3 y" @# f! C6 N7 T# H+ f3 P
% i" j+ y4 M/ |/ O9 R[root@ceph-3 ~]# mount -t ceph 192.168.120.31:6789:/ /mnt/mycephfs/ -o name=admin,secret=AQBH+tRiATMVCRAAdTQnt4IFUWD45zGEZQa7A==
! T5 F. Y) u, ?* h7 u3 jsecret is not valid base64: Invalid argument.
* @3 Q6 F k" r" j9 S4 a; Sadding ceph secret key to kernel failed: Invalid argument.
. x6 Q) s4 Y _# J# U mfailed to parse ceph_options$ V0 i \7 H! S' ~5 {
4 g$ F- _8 Q( G4 K2 U7 O' f
[root@ceph-3 ~]# ceph df detail
$ O; \1 M% ^! E4 C7 sGLOBAL:
3 Q3 t7 k# g1 V% W SIZE AVAIL RAW USED %RAW USED OBJECTS 4 U$ U3 d; N0 _: {. c
585GiB 535GiB 50.2GiB 8.58 5.04k $ }3 B$ {2 Y6 f+ Q3 q V
POOLS:7 q! r; A. |. D0 c4 W* q A# X- L
NAME ID QUOTA OBJECTS QUOTA BYTES USED %USED MAX AVAIL OBJECTS DIRTY READ WRITE RAW USED
6 E( `6 k! ^1 Y' ? metadata 6 N/A N/A 112B 0 167GiB 2 2 176B 1.25KiB 336B 1 `# g6 x9 K+ h& c' B
fsdata 7 N/A N/A 16.5GiB 8.96 167GiB 5016 5.02k 3.19KiB 4.91KiB 49.4GiB
! p5 \4 j- N2 m% g( l1 p2 ^5 ` recovery 8 N/A N/A 2.19KiB 0 251GiB 21 21 32B 44B 4.39KiB
8 z( n' n' h7 u3 s* s; ][root@ceph-3 ~]#
8 q, d0 l& u9 l( l9 T& C8 A# Y: @
5 T; K* f# h3 Q, [开始恢复使用一开发者编写的py脚本(文末给出了源码)恢复,把脚本放到集群任意一台节点上执行: 7 j7 z/ T0 ~" m) A, |
[root@ceph-3 ~]# python recovery_cephfs.py -p fsdata% x+ U# c5 ~' X1 z
" ~5 E! F( j* z
在日志里看到的问题:
3 X/ b+ a5 [8 v' {- c8 F9 H0 n6 }& f2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done
4 p! y# c0 I/ k
C3 _/ G; Y7 C# m5 d& ]" S5 W7 z' L$ T! d; h1 K$ Y
5 k8 V& g+ l# T# k/ w5 K# T q
[root@ceph-3 ~]# tail -f recovery.log
5 l0 G& G3 \4 G' Q: c$ n 7
- O. N0 w/ @, b ^: _5 s8 V ],/ `, n: n3 m1 Z9 D+ R5 ?
"data_pools": [- _, `5 f+ s( ^; K" w T: x. d( E
"fsdata"' z, j: {0 l$ r
]
) a Z5 z5 o! [ }" N J8 Q( C B# g$ s
]
: m+ @3 v$ o* u- ?4 Z5 [, code: 0- a: V4 L6 E1 v5 U7 ~3 \
2022-08-05 09:59:03 : INFO === main():: recovery start
7 z, Y3 ~6 J" i( h. ^2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done
6 r) T2 L( q$ m7 }$ O8 V' S
& b# i X y' v
$ V6 V, [$ o( s0 |% M; o1 d" s% g2022-08-05 10:35:36 : INFO exec_cmd():: cmd exec out: , code: 03 r5 c# u/ P% G
2022-08-05 10:35:36 : INFO exec_cmd():: cmd: ls /root/recoveryobjs
4 g X0 D( Y8 p1 t8 l* b
( d' `' {0 p$ u$ y; k+ a# n- {/ T
2 I/ J, q% P( k
) c: Q7 g, \2 l( k$ a8 `* c$ i3 B
% _9 W! E+ s3 `) e |
|