|
|
cluster:% w i3 B7 \* I, ~) y' i; A$ i
[root@compute01 src]# ceph -s+ C0 K' F7 N# s% @. }6 V
cluster:- V. Y; v6 @+ l+ L+ O6 Z
id: 31403b11-8a1e-432f-876e-5a2c852f9dcc7 v) Y. Q, q! G7 L8 l
health: HEALTH_WARN
7 S' x; h6 O8 R( m2 a* F- F Reduced data availability: 640 pgs inactive
; J# M$ }% _8 p/ d8 d/ c
5 M# \5 U; }! p0 ]# U; z; q+ g3 m services:5 h: e0 v0 U0 \( N
mon: 3 daemons, quorum compute01,compute02,compute03 (age 42m)
% N9 V# r0 ?0 c* f3 t) b mgr: compute01(active, since 42m), standbys: compute02, compute03
& s* x9 k4 B2 C- M% H* k$ \ osd: 3 osds: 3 up (since 26m), 3 in (since 26m)
, o. B: j# v3 w4 n/ `, G$ K( ^
$ V7 M4 C% p* [( ]5 R! { data:& }; h$ P0 _5 ?5 M3 Z
pools: 6 pools, 640 pgs" C( F) e; U" O. _4 P( \
objects: 0 objects, 0 B( E _ W1 s4 z$ e7 F P
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail {# l0 ~1 T3 D; F
pgs: 100.000% pgs unknown0 G4 b" Z: o0 |8 |8 B1 z7 B: ^
640 unknown
$ u1 s5 i! F9 B! W3 L' \' \2 I" {5 Z; B7 G9 }! M2 V
遇到问题,一直处于这种状态:& V5 Z" b$ \# ?5 V, ?
7 Q5 W+ I& ~; f8 D% V( n导出文件:" X' M, P0 ?9 j9 P$ W3 F! w
* j7 ?6 E) S0 L/ A. L6 M: {
[root@compute01 ~]# ceph osd crush tree2 T( b1 l6 ~$ A! F7 U
ID CLASS WEIGHT TYPE NAME ! |" U7 O# P- R! d
-1 0 root default & T" u2 _, G. u* ^
7 p5 U& [9 U; C) S7 ]3 {
* C& X9 f: N( G0 V9 \; X发现什么都没有,缺少东西
# }$ M& f* I r- k
8 e2 |6 F+ R( T9 z+ z[root@compute01 ~]# ceph osd getcrushmap -o /tmp/mycrushmap) [! }* _ K5 ^* X( l4 s; v$ X, i
12, L* n% A! ?8 x2 V2 Z1 o
导出的数据只有12行,少了很多。
( Q3 M; Y( R) p- \6 m
, |/ O8 m3 B5 F( x' y- f
3 `6 g) k A! l% O) I转换成可以读的文件:, c! h3 `# @' \* [* X! A+ }
# U1 P. _, q' I; _
[root@compute01 tmp]# crushtool -d /tmp/mycrushmap > /tmp/mycrushmap.txt
4 B4 \7 G7 Z7 j' \; B
2 f9 b( `6 ~" \ I5 Z5 r: `% p" A- q[root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap27 ^$ t D4 ]# w; ~3 P$ B E8 ]
item 'compute01' in bucket 'default' is not defined" p' k8 f1 r4 C' Q& y
[root@compute01 tmp]# vim mycrushmap.txt
. k4 f3 J3 @9 U4 ][root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap2
7 V, l0 P5 z3 V4 f8 d5 _转换的时候发现缺少东西;$ Q) C( G! L p9 i0 {, T6 v
再次编辑:
6 L0 e/ _" }. K& t8 G. s[root@compute01 tmp]# vim mycrushmap.txt
0 ]! x, C% p& l! \6 t1 K" N4 ~6 n5 f O% g4 j: d
3 C V3 b& c9 Y# M6 h
# begin crush map1 [- J( {( ?- m, h5 n$ ?% s" R F
tunable choose_local_tries 0- I7 L9 L5 e5 u; [; @+ T O0 ?
tunable choose_local_fallback_tries 06 N; s. d6 i6 L; s- Y+ W" A9 j% D
tunable choose_total_tries 50
; \4 c% N* f/ U9 S' x$ c4 L3 W2 T6 Htunable chooseleaf_descend_once 1
' g v' p- D# f/ p, C1 @7 m2 stunable chooseleaf_vary_r 13 ? j7 A& G. s) X% D! ~& [% F
tunable chooseleaf_stable 1 d: }# x' m, P# B, J8 D! p* n
tunable straw_calc_version 1$ n. e, J# U3 B. N9 G/ w
tunable allowed_bucket_algs 54; W5 \: k/ _5 N' z1 y. L) F
# devices
) {- y1 c1 R+ r% ]; {! ?7 _device 0 osd.0 class hdd8 A! _9 ~7 M7 b
device 1 osd.1 class hdd
6 T2 s: D q+ |1 b4 b' W8 Mdevice 2 osd.2 class hdd
4 D5 t4 [0 x5 ?( p. o# types
$ f1 a7 x+ N2 x1 stype 0 osd n# A2 O6 B5 a: p+ v1 I5 Q! g0 O
type 1 host
- N. I5 O6 ^6 m- E3 {9 i& etype 2 chassis9 ?6 Z( J- ^! Q: {0 b, y
type 3 rack
4 P* ?% O3 m7 B) U0 d0 ltype 4 row
! r' ]& r5 d/ M H3 k) X# U: B) Atype 5 pdu
. P1 d/ S0 c6 d* v1 Mtype 6 pod
6 R$ B0 V8 i U# m9 mtype 7 room# P8 t! h! o1 \5 N) y$ D% Q
type 8 datacenter8 |. @: @) z+ O9 f* L
type 9 zone
# w* `$ m3 R" ztype 10 region
4 w5 z( ~% n3 ^. Itype 11 root7 w+ v. i; }% A/ [( A; Y0 a
2 G1 I2 W3 C8 ^2 b
# buckets
* h) Y3 a# ]+ i- uroot default {
3 I+ v, h2 B4 H3 z id -1 # do not change unnecessarily
0 _. @7 `8 ?1 l$ g id -2 class hdd # do not change unnecessarily
5 d) C7 z8 r; Q" N: w5 z1 E% R # weight 0.000
# {( t- }, h4 P4 R% y: C alg straw29 I! _: b' g1 b. d
hash 0 # rjenkins1
# p: V6 d& X+ i% X7 ^- L/ ]: @
9 O+ u, N' m8 X}. w$ |* U3 i0 A2 n3 L' W* z
# rules
* R( `4 [8 C$ r6 orule replicated_rule {
6 q4 z, p8 a" C* h& z2 S. S3 w$ k id 0; E/ z3 q+ N' @) d' b
type replicated
4 z! a! a* K, [8 q- m/ o3 E min_size 1# f8 u* ^' g# Y! l I1 t4 A
max_size 10
; w$ `; i9 F$ ?1 r1 l% ?" ]. |' b step take default+ I, Q/ @# O9 n' J( W
step chooseleaf firstn 0 type host+ G$ }6 w2 u: P3 H% s) p: Y
step emit
8 b8 S2 d( M& [0 z}
( a& P7 |. [0 a2 N& i" ~2 T7 `% }# end crush map+ L P" Z" m4 O* x
4 B+ \9 l+ ?! O3 o/ b' y7 R) @% }; `4 `5 S
发现少了很多东西,添加上吧:! c6 K3 v8 C1 Z) h0 Z, o
" L) G5 A, E, M' w9 k+ t& q
# begin crush map) O$ W0 I; {8 s$ R4 H5 k; A
tunable choose_local_tries 0
, ^# q5 z* v& }8 b7 H3 y% n: q$ ztunable choose_local_fallback_tries 0! B* {2 ?1 j" K" v' B
tunable choose_total_tries 50
1 h7 C- f6 o, C$ w( atunable chooseleaf_descend_once 1
2 j" b( J5 k2 u Q# ptunable chooseleaf_vary_r 1- b- }. ^! O8 A* M4 ^% A% {5 f
tunable chooseleaf_stable 1
9 h0 X9 i c" ]9 Ptunable straw_calc_version 1
; e6 x9 W! t+ x( W# b" ~3 R- \. jtunable allowed_bucket_algs 54
- R8 P3 b* t* v" t- y2 m( Z/ [# devices; T) c5 a9 `0 f: @6 J* `, n" ]3 o
device 0 osd.0 class hdd
; Y# j) d, i& mdevice 1 osd.1 class hdd
* D: l. h. L- r2 P; Odevice 2 osd.2 class hdd# b' r) I( ^2 }" P4 p
# types0 Y$ T) y$ T1 _4 {& u5 h h
type 0 osd; C* d9 r1 x" C# a- |
type 1 host
* w- H% V+ P# A( ]- [; ctype 2 chassis
3 g) q' P( N" [: v# Ctype 3 rack6 o7 {$ V# W5 B! v; v. P; |
type 4 row0 c6 r$ i9 {# W9 n. {! I3 \
type 5 pdu) V! K, }3 ]/ l% W
type 6 pod
H3 i# h0 `' w3 \1 ytype 7 room1 F* W- Z# i9 d8 `7 Q/ i0 F. p5 k
type 8 datacenter/ L& u; Z Y! Z7 ~( `9 H+ x/ N/ `
type 9 zone( w! A9 B& P, Q
type 10 region
% ?4 `/ |/ P( h0 [7 e0 wtype 11 root* l7 V% z( T9 e2 g& a' E; g
host compute02 { p6 a9 \2 j9 { d% P3 r: } t
id -3 # do not change unnecessarily# R1 m; t# f( h7 [$ Y/ \% Q5 `' W" x
id -4 class hdd # do not change unnecessarily. T- I$ A/ a$ m a! L5 w
# weight 1.000) {6 Q+ X" \( \. T( Y' j
alg straw2
# C( ~$ i$ n7 R7 [, ]/ r hash 0 # rjenkins1
& {3 R* D# K) G: t, [ item osd.0 weight 1.000
( [2 A- o" J' b. Z$ }* R. U( h}5 o( }7 M% V, O% [( N$ a8 t* y
host compute01 {
( q" G, C6 t! I& e; h# \7 n id -5 # do not change unnecessarily
& O2 T. C) E) L) U! c+ N id -6 class hdd # do not change unnecessarily9 m3 X7 d- D6 T* u
# weight 1.000 B$ b! a; D2 m; R- K3 }
alg straw2+ p/ i; d1 Y' i+ y' K ]# _ E
hash 0 # rjenkins1
0 t8 w* M, k: c/ W2 o, s6 s item osd.1 weight 1.000) e+ F' G8 ]" H7 ^1 U
}0 m) o' Z& G C) ^1 Y
host compute03 {8 d' n: Q0 z* F0 Y0 l2 M
id -7 # do not change unnecessarily
' S% {" P$ ~: H7 ? id -8 class hdd # do not change unnecessarily0 ]9 A! n/ E& \/ j, Q
# weight 1.000
3 q, C" \4 S, L6 ]; j. ^ ? alg straw27 j* N E, o2 o) ?3 z4 s4 h; T
hash 0 # rjenkins11 e7 [( D \, G3 F
item osd.2 weight 1.000& H+ w3 J* i: {. W& X) J
}
& j, j( g0 H- }& o# buckets" y8 x8 Y7 n% M2 A' ^- v5 J$ g
root default {
1 A, ~, P5 W% e: D* i# u( x7 p id -1 # do not change unnecessarily
" T2 W4 x5 G) |- M id -2 class hdd # do not change unnecessarily
; R! x6 c a2 {" d! V # weight 0.000
) n( Y+ F2 a. e alg straw2
! b+ x: ]8 K) Z" A e hash 0 # rjenkins14 V5 w$ P0 M, d4 m( j8 w8 k( m3 v; C$ o
item compute02 weight 1.000/ N1 {0 _* _1 z3 e* {7 R
item compute01 weight 1.0004 y! t8 _$ {6 ^" B% A
item compute03 weight 1.0004 e: y& g) H1 z; b1 l8 o# f
}
2 X' N! p: Q; J0 O7 w* T# rules
7 S3 F/ e% `! G7 g5 V+ Yrule replicated_rule {% V. L( \, }) e9 E4 z* M5 i
id 08 S5 u9 g; p1 v8 e5 r. O
type replicated# X" B9 a% p+ k
min_size 1; x: ~, `; Y- ^" h
max_size 103 R; \# _( ] s( k% X+ Y
step take default: n9 a J2 `& a
step chooseleaf firstn 0 type host
# L1 s+ q" p3 _0 ?& u step emit5 v3 `( v7 X7 W2 A& O
}
8 Y) e& m) V$ {. n# end crush map
7 ~. {# L3 o, W! J+ ~3 l0 u9 D: ~8 j# u4 E
$ W% O# G U5 g3 T* d& e
添加好之后,检查下对应关系,因为ceph节点和osd添加顺序的问题,导致1节点和2节点颠倒了,要注意这个地方,其他忽略;
& y" `+ \: q$ S$ [, f* }转换成ceph认识的文件:
; C% E8 ~2 n- ?3 ?. V[root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap20 \7 N) c6 y3 R$ b
; T O; U& A5 z2 G[root@compute01 tmp]# ceph osd setcrushmap -i /tmp/mycrushmap2
: H$ m3 `! t X C0 K, m130 H9 O( ~& [, s; C3 _7 C( U
[root@compute01 tmp]# ceph -s
$ n% t' ], S" [* Y/ l cluster:
6 e* W: I" {: p2 a; _ v$ Z id: 31403b11-8a1e-432f-876e-5a2c852f9dcc3 V( V$ ~% D9 D" ]3 W% V: X. x. j
health: HEALTH_WARN* C, r% t2 r# Q! q$ ]7 Q% N7 f
Reduced data availability: 212 pgs inactive
% j6 j! ^+ E6 n. n3 G) D) f 9 _; P7 a- R& ~3 I' f8 K
services:
! i, J$ I9 g2 i: n6 i: T mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m) ?* e* O$ J+ f5 Y/ Q) }
mgr: compute01(active, since 56m), standbys: compute02, compute03
) y$ ?7 @$ F0 r, ]( U osd: 3 osds: 3 up (since 40m), 3 in (since 40m)
% t, X& V- y& O& a2 O k/ L" H+ l* T# |2 V0 v
data:
I ? O3 C( T* G- q5 u pools: 6 pools, 640 pgs
1 Y# @5 P0 }" Q, r objects: 0 objects, 0 B
) P3 L. G$ n9 C3 F0 q- g3 M usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail. `6 y; c6 H& g) I
pgs: 33.125% pgs unknown
$ S( X7 Y( e' t% j6 ^) ]3 \' u- a0 S 428 active+clean
( T5 U0 b& P7 G9 Y$ T5 S" Z 212 unknown) ~' K8 L- u: W8 F! i
3 _8 h& {# g- V3 a' z
[root@compute01 tmp]# ceph -s# n* r; y9 @; K1 X( S
cluster:
/ ~1 @9 b! V: `% Q7 F7 h# N id: 31403b11-8a1e-432f-876e-5a2c852f9dcc
4 z6 B, T) O* a6 j4 c/ h& Z k# |) ` health: HEALTH_OK
2 A; q: l- _( V& e# T* ~$ |! k9 l4 ^
$ V9 P( J L; w; M9 k5 j services:
% r) a8 p. ?) @) r, F mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m)4 C# U5 f H5 P: T" x
mgr: compute01(active, since 56m), standbys: compute02, compute03
8 O) k Z* U/ Q; E. B osd: 3 osds: 3 up (since 40m), 3 in (since 40m)! O% A9 A; u! T
$ \) e8 n% g0 R
data:
9 n$ q6 l/ l9 w# _ pools: 6 pools, 640 pgs
) {8 a7 o' A2 P/ k5 a objects: 0 objects, 0 B* w$ J" V' p1 K k# S, P
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail
) c$ ~6 W7 U# ~ X& w pgs: 640 active+clean9 x, @! Z1 p- h" Y8 P: V6 [/ W
3 J% {! K, m6 o9 C[root@compute01 tmp]# ceph -s& M p2 \% x- J" s! f
cluster:
0 l0 K* l' u5 K3 ]2 B+ G id: 31403b11-8a1e-432f-876e-5a2c852f9dcc& {9 d9 q: l+ z
health: HEALTH_OK
* D E, n* e) p k+ q" u0 D& p* b, x- v" `- l
services:
! t# Z) s2 P* V& v' t) ^ mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m): d; Z+ K: [. N1 a0 z* D7 C+ I
mgr: compute01(active, since 56m), standbys: compute02, compute03
' U( p$ P% k" j6 F2 Z: }8 ?2 `4 n osd: 3 osds: 3 up (since 40m), 3 in (since 40m)
8 K$ g5 L9 H1 G% \
! {" x9 R4 o* e* p9 s data:
2 \6 _2 k$ V f5 M. Y pools: 6 pools, 640 pgs
7 w% W0 ]8 k$ W0 d g/ W objects: 0 objects, 0 B/ j2 n+ k( j6 n5 d5 V. {- K6 q
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail: f/ N6 W8 v* \$ N
pgs: 640 active+clean/ D; [# C4 I7 r
i$ u3 c$ X3 l( c* b: j恢复正常了,问题解决。
1 ?' h5 y/ p) L% }1 @& V( g7 H+ l$ K5 l6 d' s2 U
总结下:遇到这种问题,重做依然问题存在,很头疼。只能检查到底什么原因导致的问题。: d& t6 Y7 p7 v! W2 n
|
|