|
|
尝试二、修复down掉的osd
( F4 R& I6 B: d
# ]# ?2 R5 ~' G& h2 Q7 E该方法主要应用于某个osd物理损坏,导致激活不了
( ~3 Y9 W7 R% g v+ r4 F; ~8 D
: E: {* q. G/ h+ \8 W4 t. O/ W! _0 W1、查看osd树
- \" N+ I% I' f) L# N% N! t. a4 j! P% p3 a: c2 H" u# r: ^2 M2 B
复制代码
% K7 w- N" }2 ?7 {/ m" @root@ceph01:~# ceph osd tree
+ Z$ c" s) N d# t% c- |4 o+ RID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY 8 f+ N& W3 b8 J7 N, ?* @
-1 0.29279 root default 3 z2 x" M T+ l+ m4 W
-2 0.14639 host ceph01 / j) V6 z; F' H2 ^, p7 m$ R8 n" H
0 0.14639 osd.0 up 1.00000 1.00000
3 ~8 C7 J) f/ x5 p' ^: \$ R-3 0.14639 host ceph02 $ k" ?# s' ~' x% f
1 0.14639 osd.1 down 0 1.00000
' t$ x3 T1 k: M, E$ n) ]复制代码9 p8 L: O) l; Q5 I+ r) x4 I* l
发现osd.1是down掉的。
; U3 J: F- ?3 H% r
4 d4 _* I, T5 W6 i6 ?2 k/ {+ j2、将osd.1的状态设置为out( F$ h0 q- _/ ]7 e L* G( B- x
/ J4 ?: q0 {- ~4 B( D- \
root@ceph02:~# ceph osd out osd.14 p0 y1 }9 d9 E$ c& r
osd.1 is already out.
, w0 l0 d, g4 o. A+ w3、从集群中删除
- [+ f; V) c* ~4 h8 O
% m$ @6 C8 X: ^# f4 n' ~" C+ Wroot@ceph02:~# ceph osd rm osd.1
V; z7 O8 |( r$ p; P: rremoved osd.1, r, n! a$ p5 P" \' W1 i: B) x/ f$ r
4、从CRUSH中删除
6 t5 A% ?2 Y9 A5 H/ u. f
0 b7 P7 e2 u1 D: Z0 xroot@ceph02:~# ceph osd crush rm osd.1 $ s7 h5 w" [- }9 ~. I5 n
removed item id 1 name 'osd.1' from crush map
3 y# c- R( u S4 M0 w( I5 l5、删除osd.1的认证信息
9 _' a" ?6 u+ h7 d7 T% j( \3 S4 P4 f2 O! j7 J2 j3 Z( Q* C
root@ceph02:~# ceph auth del osd.17 g, P( J: L9 ^5 o( Y4 s
updated
% I: j, \; J# ^( U* ]; i6、umount
o# d" V3 ^/ C( @8 a* ]# d$ l* a6 s V3 y' O$ G
umount /dev/sdb1
0 j2 m5 M* V3 C/ u- E 8 q) T' ~, j6 V! R6 Y2 o- h* C
1 ^: |" m" O2 O( W- \5 b2 W
7、再次查看osd的集群状态3 u# j: R0 {/ [
7 e/ i. J, N$ V, f8 K2 k
复制代码
. C/ V4 o$ M5 c) M; qroot@ceph02:~# ceph osd tree: P* n% L8 L3 x4 v
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY % D! ^; `) m2 `" x! X* Y
-1 0.14639 root default
* g* U4 R- p# Q9 K3 {-2 0.14639 host ceph01
$ h! {5 N: V0 o5 f! d4 p 0 0.14639 osd.0 up 1.00000 1.00000 8 s' U0 @: h( N. t/ z
-3 0 host ceph02
" [6 a7 ^$ x) o复制代码
* `5 A" Q; K1 h4 \( ^3 P8 t* _# N8、登录ceph-deploy节点( P2 P; R7 \, f% V
/ o' r6 `, O9 T
root@ceph01:~# cd /root/my-cluster/2 \# b& H% Y$ L
root@ceph01:~/my-cluster#
( ]6 G1 V0 |/ F0 }9、初始化磁盘* G" {( V7 d1 i+ V% o1 g$ c3 @- w
" {/ ~6 F- D! k! L' y" l$ s
5 `! v* U& o9 x; Y' m
ceph-deploy --overwrite-conf osd prepare ceph02:/dev/sdb1$ w' s* e' U D7 Y. z- o( O2 m
, d0 B2 ^: i* G- }$ b在后面的版本中需要重新执行添加osdceph-deploy osd create node1 --data /dev/sdb/ x4 R* E: x0 A/ B" `0 C9 B
2 P! @. t7 X: O) a7 E
10、再次激活所有的osd(记住是所有的,不只是down掉这一个)
! n3 r4 f9 @4 V6 Z' V% Z4 ~
9 y" k6 T- n( \ceph-deploy osd activate ceph01:/dev/sdb1 ceph02:/dev/sdb1- J" C: x; B6 b$ L5 S1 E
11、查看osd树和健康状态$ Y1 ]5 ]! U! n& o2 h7 s" y
* L- T3 C e" k
复制代码' ?+ M) M" ^, X- ~5 l) ^( \4 D! K
root@ceph01:~/my-cluster# ceph osd tree$ F: L# B! v. @! T6 ~' [9 [7 W3 @+ E
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
; o' g, m4 C- y) ?2 o& W' \-1 0.29279 root default
q7 V3 N/ V* a3 `! S9 K. G-2 0.14639 host ceph01
) q5 |" g* u7 K t7 p 0 0.14639 osd.0 up 1.00000 1.00000
$ E& R! D+ e' `; {. v* X, R, w# ~-3 0.14639 host ceph02
( _2 a% S) t3 j! D% q* g 1 0.14639 osd.1 up 1.00000 1.00000 ' C0 z( J1 V! I: W, W) P7 L2 c
root@ceph01:~/my-cluster#
2 j. m7 j/ p! X% T* L复制代码
1 |6 f$ o! I5 N, H4 K X& V复制代码6 D3 s& h9 c; G: W* x5 j
root@ceph01:~/my-cluster# ceph -s
/ _1 H8 r1 y7 P2 C! o( V4 @! d! J2 z cluster ecacda71-af9f-46f9-a2a3-a35c9e51db9e: i, X! k1 }. q: |6 _5 I
health HEALTH_OK! I6 [7 z! Q. a* q) z) @
monmap e1: 1 mons at {ceph01=10.111.131.125:6789/0}
0 G& `2 s4 x. X6 K election epoch 14, quorum 0 ceph016 l7 ^# t/ @% c$ o V ~
osdmap e150: 2 osds: 2 up, 2 in2 B: l' h- h/ F" C2 n
flags sortbitwise,require_jewel_osds0 V9 p, Q: x7 D1 f
pgmap v9284: 64 pgs, 1 pools, 17 bytes data, 3 objects
) l/ `4 r1 b! i" K7 m# R% |9 u* Y: V 10310 MB used, 289 GB / 299 GB avail$ A, h o" T3 ?; A3 R
64 active+clean: I5 f) h0 Y9 ]
复制代码
7 S' g& i2 l# p8 m' J1 Q只有为 HEALTH_OK 才算是正常的。# a1 k. U4 Z% ?: Y% l& \
/ f; S, U- B7 j& s1 }9 Q2 q# S4 r+ v
|
|