|
|
模拟删除mds 元数据恢复过程,
# ?5 W/ |6 g6 Q$ s- x; {5 `0 D测试环境ceph版本号: ceph L版本:9 T; o# F1 Z) m
[root@ceph-3 ~]# ceph -v
: L7 Y8 B7 w6 Z% iceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)# ?: U: I2 f2 | T H' d& y
4 Q! _ j( R2 g a( u: X; c. X2 L7 I+ U5 Q
检查osd的状态:4 i/ E2 T: I. S" S R
[root@ceph-3 ~]# ceph osd tree 9 j& {8 J3 B7 A
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
- z5 d* A, g) a8 N9 y-9 0 host root
+ l4 F1 r- b. g9 E7 e/ ]% t-1 6.00000 root default
( s Y0 G1 E0 i; c) @-2 2.00000 host ceph-1
1 h# n( n. _( ~7 n! w 0 hdd 1.00000 osd.0 up 1.00000 1.00000 & D$ {$ N5 c2 r: d" L, B5 n
1 hdd 1.00000 osd.1 up 1.00000 1.00000 / E3 C1 f$ I2 W2 C7 h8 A( o9 z
-3 2.00000 host ceph-2 : m$ c# t/ T8 C7 t8 S) V: g3 Q' \9 `( s
2 hdd 1.00000 osd.2 up 1.00000 1.00000
& L! t2 ?2 O. l- s1 G: y+ Q 3 hdd 1.00000 osd.3 up 1.00000 1.00000
5 c/ t: I* R5 j/ F-4 2.00000 host ceph-3
s u( l! w( g& E& t 4 hdd 1.00000 osd.4 up 1.00000 1.00000 ! S/ h) U$ y2 I7 D* l8 V0 w9 H
5 hdd 1.00000 osd.5 up 1.00000 1.00000
5 k7 c7 R9 F; ~( @8 h5 ]; ?$ S
4 S' L, l( v, w! Lceph 容量使用情况:
5 o, f8 P/ p4 }' x[root@ceph-3 ~]# ceph df
, n" D; y0 Q5 K0 `) E/ W# m9 ]/ g% IGLOBAL:: K- Z& T8 v I1 ^" i
SIZE AVAIL RAW USED %RAW USED
; E" K0 l4 W* \. |, j) w! R1 E 585GiB 535GiB 50.2GiB 8.58 , ^6 K5 z9 k" J* | h: }: j6 ]
POOLS:9 U9 {% r/ h; }+ M/ }9 ?* [
NAME ID USED %USED MAX AVAIL OBJECTS : B- d0 ^4 h# u; o
metadata 6 8.03MiB 0 167GiB 23 - X0 B1 F) g4 b- {- P, l
fsdata 7 16.5GiB 8.96 167GiB 5016 * d% ^$ [5 x5 m- ?, {; m
recovery 8 2.19KiB 0 251GiB 21
% z: X/ W7 r) z" g& ]; [
9 e$ x5 M+ u8 q: }/ D, q) C! Y0 C3 ]( U9 c v8 ?0 f
[root@ceph-3 ~]# ceph df % Q" D! S8 `& I6 M
GLOBAL:
" d' \' v+ ?+ S, [. W. ?; N4 m SIZE AVAIL RAW USED %RAW USED
# n! c% K% L8 y$ p' A1 m; x 585GiB 535GiB 50.2GiB 8.58 ' r: I- H: X* e+ v# P
POOLS:
3 q' i3 l0 g, z1 b7 r) ?9 l* w NAME ID USED %USED MAX AVAIL OBJECTS , e% J c! X; a5 z1 @4 e% ?$ y! ~
metadata 6 8.03MiB 0 167GiB 23 + |; `+ l( K- R6 p
fsdata 7 16.5GiB 8.96 167GiB 5016
7 x5 p& i+ w$ n0 G! ]6 N recovery 8 2.19KiB 0 251GiB 21
( e& Z& _1 {) y1 e7 [9 l* _( n- V/ b# I. G$ a7 p( W
- P; ]2 v, E5 b5 c1 Z: h! C5 P; S3 e- m2 G4 r, d* }0 F
查看metadata的数据记录:
# M& v. j0 n, D1 v, p4 D9 f( c" b[root@ceph-3 ~]# rados -p metadata ls
) e" v; G' R, X9 }, B( A4 N6 m" Y601.00000000
' e$ I* `5 E K602.00000000: Q2 H# y r P2 F
600.00000000* E g8 K$ m1 T4 ?( {9 W/ k
603.00000000
4 h+ n* M |7 m: w$ t1.00000000.inode
( z( [, W+ B% T% A& z0 Y1 N200.00000000
- D7 u& p' P/ l* _8 B7 R200.00000001
9 a8 C' ]- L! K. {- r& c606.00000000' [0 X, T0 u# E% {1 T
607.00000000
" [) ^) ?" n: i& I1 [608.00000000
7 |8 R: d$ ]1 [2 s' x) C4 ?4 r604.00000000
! U2 ~% @- z% q! ]8 F500.00000000
- c" J) I8 |% p" C; D) ?mds_snaptable
- L. D9 g4 u6 e' x4 z605.000000000 @1 r. }7 R8 l4 a
mds0_inotable( S7 S0 |9 h% S5 g( l% w
100.00000000& C" a' h9 Q- ~+ I
mds0_sessionmap
; Z$ q* r- Q/ V0 \% @9 K, s200.00000003
6 ?: U+ g0 u. F/ t% l* p: W200.00000002
% _! Y# N* ~1 u4 N a/ K609.00000000
: a6 ?* y& e# r% V+ ?400.000000005 B, S! q. O8 n6 o
100.00000000.inode( t& E' |% z A/ M/ W. I! K7 X
1.00000000
; E: u. }) J% j' m. \+ C) t! M. r- P) |% X, c
& a$ Y! ~; U" h4 U. C1 H9 _模拟删除metedata pool池的源数据:* @( h3 D* a# {& i1 @2 N; O
[root@ceph-3 ~]# rados -p metadata ls |xargs -i rados -p metadata rm {}
/ h" O- {. E, r8 F$ m8 M ?& i[root@ceph-3 ~]#$ c' \" s- L7 |4 N0 }
检查还在不在:
" H4 W# ~4 i% c8 _- u [root@ceph-3 ~]# rados -p metadata ls9 z3 V" P# k' b$ I I( d8 r
[root@ceph-3 ~]#5 V: a3 l9 ~7 y+ ^, A5 Z6 K: k
6 e$ ~2 h2 B5 V' t* g' [% u
- N4 E& `3 p% c' W检查状态:
% S! J( ]. }; u0 S Every 2.0s: ceph -s Fri Aug 5 09:37:17 2022
+ Y# w) k) \* O3 M8 r( g cluster:
. P6 I( H: w' x) n# C3 w id: 57bf4711-2218-46af-99d6-9c68ae230ce1& G7 y3 Z+ f$ K5 {( E4 u3 l
health: HEALTH_OK1 Z X# ^" x1 }# _, d
9 w2 i* [7 n4 `5 J/ Y5 e* N
services:6 O) W; w# s( w8 F+ ?" I
mon: 3 daemons, quorum ceph-2,ceph-1,ceph-3
% L8 p3 c$ l K' [( F8 T) K' f% F mgr: ceph-2(active), standbys: ceph-1, ceph-3
/ @" C. I! z! D: d0 t) x mds: filefs-1/1/1 up {0=ceph-2=up:active}, 2 up:standby" O6 ], f2 D/ {. p3 F" d) y5 i/ k
osd: 6 osds: 6 up, 6 in
+ g! L& `. a9 {# m. E+ c6 `3 q, S! B) K9 w) L S1 u' n
data:
3 ~, Y8 F* C% Y6 B* l pools: 3 pools, 1280 pgs7 p0 l3 U, |* {) c
objects: 5.04k objects, 16.5GiB) }* E3 e; E$ [/ \% B
usage: 50.1GiB used, 535GiB / 585GiB avail
6 l2 p6 _! M3 l2 ~ pgs: 1280 active+clean0 E- [5 P' O8 d7 ?& U) F8 Z
- w6 ` f- ?7 _/ ], o( r* r
2 D) p. Y0 r: S) n- l4 Z/ y4 o看到metadata池里面没有对象了已经,重启下mds看效果,因为mds里面会缓存元数据信息,所以要重启下mds:# P& ~! c* G; o
8 K, i( g" X- ?% n[root@ceph-3 ~]# ceph df, P" e1 \& j" n, {
GLOBAL:
, W9 J/ K1 B! A6 f SIZE AVAIL RAW USED %RAW USED + U' T* C/ _" Q; `0 Y1 G0 E
585GiB 535GiB 50.1GiB 8.58 6 ~$ K$ P& r- O! p" U4 o
POOLS:
3 L/ w/ H8 u c3 R" b# I NAME ID USED %USED MAX AVAIL OBJECTS : o: b& D* B- S, N
metadata 6 0B 0 167GiB 0 k8 `% @3 Y+ s' ~- j
fsdata 7 16.5GiB 8.96 167GiB 5016
/ J# D1 F+ O( B- s5 A recovery 8 2.19KiB 0 251GiB 21) m f5 x+ Z9 E+ x/ _' a) t9 n
检查ceph fs文件系统:3 }# f* F' S& ~- I" ~3 I9 \
[root@ceph-3 ~]# ceph fs status filefs - 0 clients5 n( p M! S; ^& A h7 @0 B6 W2 C
======- \/ N6 A( `0 p: t1 t: u7 W
+------+--------+--------+---------------+-------+-------+
2 X# S. a( L( A, n: y| Rank | State | MDS | Activity | dns | inos |
- a# M( u6 N6 I4 Y5 a& E1 X+------+--------+--------+---------------+-------+-------+
0 k7 r0 G1 w# b& h& k+ F: W| 0 | active | ceph-2 | Reqs: 0 /s | 1001 | 1003 |
$ e9 h# Z) a* [! {& [+------+--------+--------+---------------+-------+-------+
" f5 m# t) K# T, j7 n4 S. v$ E+----------+----------+-------+-------+
`; V& y4 s: U$ {| Pool | type | used | avail |6 j6 I8 u' z/ Q( q, u( X
+----------+----------+-------+-------+
1 j2 x! H* Q; y# @| metadata | metadata | 0 | 167G | c5 V" N5 ` v
| fsdata | data | 16.4G | 167G |5 B$ t* W/ H7 g% L2 m- f/ R, S
+----------+----------+-------+-------+
$ L2 m8 O( n% R+ S9 _# p7 k& A) ` U( l4 r; b/ _3 f0 b f" q
+-------------+
7 D+ V0 ^, @# m0 [) W$ x| Standby MDS |
% q0 @" l b8 r& R+-------------+
( v: j0 I. o; y| ceph-1 |
) y! N8 U: P9 [- R' Z$ O7 o/ x| ceph-3 |
+ l; c. Q* `+ z+-------------+
. ^4 r( N- [& V" O ?% lMDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable). E1 r9 {- z9 ^0 w: ~1 N
[root@ceph-3 ~]#
H3 D1 T: q: ~7 p( \; o$ z
( p# q' G9 R* ^4 s% m" s! K重启mds服务:
& ^/ l+ S% c" D: [[root@ceph-2 ~]# systemctl restart ceph-mds@ceph-2.service
3 W1 o! X x; B- J# i6 ?3 x* Q8 L, l
* N! s1 |8 b- ~" L( ?: U# `& y1 A' i# D2 s* u0 L( g- w
查看ceph 的状态:
) E2 B2 z7 Z" P. }, V, V: u8 A: \! w
: M& P- d$ k& K! r& {2 v2 `7 o8 K, [[root@ceph-1 ~]# ceph -s
# B+ b0 W, q; O5 k, t: g( J cluster:3 U( Z2 l% {. M
id: 57bf4711-2218-46af-99d6-9c68ae230ce1
' k. o- u* N M health: HEALTH_ERR; K" C; C* [4 c" t; p2 V2 P' ]
1 filesystem is degraded
7 E3 c7 E" C3 O4 w. l 1 mds daemon damaged$ r0 m& H& _' l' U
; E w Y' y; s% L! B* h* n
services:, Z& o3 C% ~9 ~! a* S
mon: 3 daemons, quorum ceph-2,ceph-1,ceph-3
: I t# \) ^3 k mgr: ceph-2(active), standbys: ceph-1, ceph-3: U) I O* q1 f5 ?7 o
mds: filefs-0/1/1 up , 3 up:standby, 1 damaged
( h( i, l1 j* H2 E: {$ S* ] osd: 6 osds: 6 up, 6 in/ y2 F4 q9 N, q7 w- ~
2 f/ u6 \# l, ? f data:
1 a8 w7 b' b, R pools: 3 pools, 1280 pgs
1 l; M7 Z6 x$ U0 M5 R3 d objects: 5.04k objects, 16.5GiB# @9 ]5 @; Q/ B s) ]+ }7 T# u/ v9 @3 Y
usage: 50.1GiB used, 535GiB / 585GiB avail
: M) j% x& {- r9 b$ ` pgs: 1280 active+clean
! l I9 W* q6 c" u! F$ [/ @
; q# H& v1 j3 S L$ f. a[root@ceph-1 ~]# systemctl restart ceph-mds@ceph-1.service 8 R1 ]. ?/ l2 B$ @, L. \
[root@ceph-1 ~]# 8 C: I- w) [/ I+ Q6 s" T4 ?3 W* J
/ W/ e, N' F2 ]1 h5 a* F
[root@ceph-3 ~]# systemctl restart ceph-mds@ceph-3.service
) h& |& a, d3 \2 [8 i5 I" L$ P8 e0 }1 \3 J3 M1 U
[root@ceph-3 ~]# ceph fs status
/ u3 ~+ l) V, ?3 r" z! K( tfilefs - 0 clients) T0 T& D/ H% q7 Q0 F# }7 n
======
. h! G2 ~6 t3 B( N- e9 I5 V: {' y+------+--------+-----+----------+-----+------+: T% Q! i3 N$ q
| Rank | State | MDS | Activity | dns | inos |$ X/ t% M+ M b# b% b0 h9 x+ P
+------+--------+-----+----------+-----+------+9 ~7 e3 B/ o8 m! B B/ b# w, I% Z
| 0 | failed | | | | |
( S1 y( [1 `, n' z+------+--------+-----+----------+-----+------+
. n/ S7 s8 Q5 S* |. r1 i U1 w) ^3 ?+----------+----------+-------+-------+; U- R. B+ F% ?! A9 J
| Pool | type | used | avail |
/ _: u, n0 {) ]& \+----------+----------+-------+-------+
6 D. G, c; W2 t9 r# P6 S| metadata | metadata | 112 | 167G |8 q% y$ Z! S- l7 u. B, `
| fsdata | data | 16.4G | 167G |
+ n+ k a- I) A) v- f8 F- L/ A+----------+----------+-------+-------+
$ f, E1 w7 l) @) z5 t9 }- i) J9 x7 X# R {: i# E1 M6 u* Z
+-------------+) ]1 Q0 g- K. c5 P3 ~
| Standby MDS |* g V( ?6 K# N8 W6 x8 ^0 ^* } @
+-------------+& k# o3 \2 g: r! }2 D
| ceph-2 |
1 N$ F2 e4 T0 }2 w# S| ceph-1 |# t. }2 w' r1 r( ]: B
| ceph-3 |( [. D: X G- e- e, [- e0 Y0 h
+-------------+
2 s& P; ?" ?, pMDS version: ceph version 12.2.8-52.el7 (3af3ca15b68572a357593c261f95038d02f46201) luminous (stable)
/ d3 `' p7 B. F% U# d[root@ceph-3 ~]#
& |- w5 {3 q8 H 看到集群现在不正常了,访问kc里面的数据卡住,说明数据已经无法正常读取了。
' Y1 o, N/ N$ J- w4 `# d$ p; m( x
5 T8 g$ t( ^. t- v0 m/ f
[root@ceph-3 ~]# mount -t ceph 192.168.120.31:6789:/ /mnt/mycephfs/ -o name=admin,secret=AQBH+tRiATMVCRAAdTQnt4IFUWD45zGEZQa7A==: m0 G" i/ C) R! V0 Y
secret is not valid base64: Invalid argument.. V, d7 w" q" q
adding ceph secret key to kernel failed: Invalid argument.
& K- l; R A+ K& K' Nfailed to parse ceph_options
! w! b$ O: v( J' X& ^* e, y9 Z' H( d- C/ v% T9 N* h$ B5 |" q
[root@ceph-3 ~]# ceph df detail
8 g4 R2 t7 ^# b. B2 X0 aGLOBAL:
5 I) Z# _3 ~* Z1 l SIZE AVAIL RAW USED %RAW USED OBJECTS 1 ^& \1 q5 G8 G) k+ C
585GiB 535GiB 50.2GiB 8.58 5.04k 9 Q+ s9 ]4 G5 I
POOLS:. `- `! G( |, V, l% u
NAME ID QUOTA OBJECTS QUOTA BYTES USED %USED MAX AVAIL OBJECTS DIRTY READ WRITE RAW USED
' {+ F4 |* T, q, |8 Q: {+ ]+ b metadata 6 N/A N/A 112B 0 167GiB 2 2 176B 1.25KiB 336B
8 j$ y- v- o: }5 I2 g fsdata 7 N/A N/A 16.5GiB 8.96 167GiB 5016 5.02k 3.19KiB 4.91KiB 49.4GiB , ~/ O8 K6 q# x% \) g6 K
recovery 8 N/A N/A 2.19KiB 0 251GiB 21 21 32B 44B 4.39KiB % k0 [; Y& J4 z, a$ i
[root@ceph-3 ~]# ( n/ Q/ r* o/ L# ~5 Y0 y
: s( Y @& O, F5 e7 d* Y& q开始恢复使用一开发者编写的py脚本(文末给出了源码)恢复,把脚本放到集群任意一台节点上执行: 3 `, D# [+ L+ k' u9 j4 p& m
[root@ceph-3 ~]# python recovery_cephfs.py -p fsdata% a9 R9 N- k. \! r9 v
6 ]8 X" ?( f) W' a% |6 u在日志里看到的问题:
6 o. G( W4 c+ v! s2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done
z6 ?+ } s" s) m, G1 u% V4 e$ v# \ J/ d
8 l# I6 P! P6 ]
/ u! A+ ^' [& y
[root@ceph-3 ~]# tail -f recovery.log ) l6 k& n) j% M1 T1 i# p/ R3 l; H
7
5 \6 |" Z" ]7 Y" C( r8 l ],6 ?1 Z5 I7 w& l, R$ s1 y
"data_pools": [
* K7 b- P" _! y6 l! N "fsdata"
i6 U7 N2 P; _# d6 ? c ]
6 `0 N1 A) E1 N; q! D0 w/ [ }7 C9 G7 f5 ]3 ~9 X% Q" L" \+ [
]
9 `! X$ t! B' y. `; k, code: 0
$ k( ~# H( s4 o' ]; B4 F2022-08-05 09:59:03 : INFO === main():: recovery start
' k* u# n6 y% f# V. i2022-08-05 09:59:03 : INFO exec_cmd():: cmd: for obj in $(rados -p fsdata ls);do rados -p fsdata get ${obj} /root/recoveryobjs/${obj};done% i+ H9 g6 m! {4 V- l1 T4 \! Y
( I0 k1 b2 p7 ?' c2 R3 Y
5 f$ B! n7 S, p* b3 e* l) V* \2022-08-05 10:35:36 : INFO exec_cmd():: cmd exec out: , code: 01 w" h2 T+ l+ B T+ n6 a
2022-08-05 10:35:36 : INFO exec_cmd():: cmd: ls /root/recoveryobjs
" N/ A7 l, d% f4 a
' j/ \, d# k8 m, A1 L+ ^& q1 I6 M! X& ~0 T5 _
! S. [1 R) J- A# V- {1 K" h, S0 s0 Y/ R! ` H3 g, b: M4 M8 V
|
|