|
|
cluster:& S- c7 w" v0 T
[root@compute01 src]# ceph -s
6 d& h. k) ?6 c s: x- q% V cluster:
& F" K- J* p4 _* N8 X7 l/ ] id: 31403b11-8a1e-432f-876e-5a2c852f9dcc) I( Y2 c3 J) G2 f& o D1 s
health: HEALTH_WARN i) a, l0 R8 h# F! x' h
Reduced data availability: 640 pgs inactive6 C* i% ^2 b% a5 W
! e* j' ]4 R2 b8 G services:
4 | a' a2 E5 _. z5 I' q mon: 3 daemons, quorum compute01,compute02,compute03 (age 42m)
( o, H' b; o/ g0 \ mgr: compute01(active, since 42m), standbys: compute02, compute03
; Y) L# U0 w/ [/ i9 o+ | osd: 3 osds: 3 up (since 26m), 3 in (since 26m)
% g3 L" \( ~3 c& t- J; C+ Z $ O, E0 J7 R% @% P2 k3 @2 D! D3 ^
data:
6 B1 Q1 z3 h5 Q# ^& d, J3 n. j pools: 6 pools, 640 pgs2 \0 }2 \% x& t! l3 @& G+ n
objects: 0 objects, 0 B
l. q# U% U1 Y k/ z3 L/ x: _ usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail
2 @+ e) n( h/ @1 i# N- G2 e# D pgs: 100.000% pgs unknown+ ~$ a2 d6 _ m' N. _0 V. U4 k: u2 f
640 unknown
0 a7 c7 ?9 Z3 m" D- \; u3 `7 q( F$ { F- M6 z; d6 W3 \
遇到问题,一直处于这种状态:& J# g% c+ ^3 G' n6 ~3 l+ s
5 a7 R3 ~1 {6 H) a( O& C导出文件:+ x: i3 F; U4 Z* ?6 u! Y# n! V
% k' A ~. c& ?/ y0 f
[root@compute01 ~]# ceph osd crush tree
- ^2 Q* e4 T0 yID CLASS WEIGHT TYPE NAME $ n. J+ C# D% N# h
-1 0 root default
* v' x6 Q& `* Z0 d) d2 _+ y' z, C9 |6 T2 P* J0 N" n0 k
1 @2 r" S6 a" d; M+ z) G% I& ~发现什么都没有,缺少东西
5 M& D( f I4 _! k
3 l6 m8 u1 ] E7 T ~% T$ s5 g[root@compute01 ~]# ceph osd getcrushmap -o /tmp/mycrushmap
( E& n, {9 {5 U/ ^( _8 B12
- \& l5 r7 `' X3 r导出的数据只有12行,少了很多。' |: q# }: d3 ?: J! h& w
4 D5 ^( ~8 L, {2 a7 f7 b5 I! m9 U0 W
转换成可以读的文件:' L3 d8 C+ f: e4 h+ M, o* p1 U
6 @4 F; ^3 e1 {' i1 O; v( g
[root@compute01 tmp]# crushtool -d /tmp/mycrushmap > /tmp/mycrushmap.txt0 \, e& \6 y1 D' ^8 \% x& \. @
/ M1 ]/ l9 P2 Y) l- O* I# f: W2 g[root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap20 ^2 J( b" N) [# G
item 'compute01' in bucket 'default' is not defined1 Z, _& |$ d; r
[root@compute01 tmp]# vim mycrushmap.txt 0 k% E1 |- p# a& B+ M" ]0 {0 b& `
[root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap2
3 Y8 [) ?: b! R `7 t* U转换的时候发现缺少东西;
- y. @, Q: ?. g* Q* a0 v% {- n再次编辑:
7 r! R! a% _) p$ H' h: f* C: S[root@compute01 tmp]# vim mycrushmap.txt x+ [4 S( ~. t& O! B1 b/ a+ y0 Q
) }; f r# ` \. }' T
7 N- I1 O% [( T, p2 |. p
# begin crush map% S! K) t( J: B! X! t! {8 _
tunable choose_local_tries 0. _% t4 [6 j. n5 F: w8 I' T
tunable choose_local_fallback_tries 0+ J! M/ L: [" s" H! L
tunable choose_total_tries 503 r+ p) }. H+ Q) F: a" J4 K# x7 H
tunable chooseleaf_descend_once 1
$ r$ }* `6 F) o% Q$ B6 [* `- ~tunable chooseleaf_vary_r 1
! Q6 H8 ?; l$ i* A2 Q9 Gtunable chooseleaf_stable 16 x7 z: z: J& I) `" `' y5 [
tunable straw_calc_version 1# |, K, {8 n Y4 v; Y
tunable allowed_bucket_algs 54; B9 }% q+ R2 ? `
# devices
( _; L7 [$ @/ k, g" P5 Zdevice 0 osd.0 class hdd
, q* s8 h$ c& A$ m3 |/ l2 Qdevice 1 osd.1 class hdd
6 d9 Y+ F" ~9 g) A; V( Hdevice 2 osd.2 class hdd
3 n$ y& k8 G( g1 Z' a, S" l9 a# types
O3 f/ q3 P4 z/ J# l" D: Ctype 0 osd, ]2 ], i4 l9 J" b4 Y
type 1 host+ _3 r# f) W+ n* E! q& c
type 2 chassis6 R0 U/ ]2 ~4 H/ |( W
type 3 rack
* O! b2 F) A- j5 @type 4 row( C: G0 |& F" `# N4 O. c
type 5 pdu
7 f. b6 _+ Q1 P* A( i! dtype 6 pod/ z+ \% y# X7 U5 R2 y
type 7 room
( c) f" r& d( c. V9 i, S/ V; i' } X# ptype 8 datacenter- T+ M8 n7 L7 B/ s7 G
type 9 zone
9 p( |- q1 Q/ ~# C+ jtype 10 region9 l* Y; R U( C: A4 S9 ]- j
type 11 root) N8 c8 f3 V8 m$ S R' s0 {$ V& N
. `$ g0 p+ h# g7 [& j# buckets
. K4 R4 C+ \' [6 k1 Croot default {8 W5 p" C3 v5 D) W K8 M
id -1 # do not change unnecessarily
& R2 B: C1 M4 V id -2 class hdd # do not change unnecessarily: k1 d5 O; T, D7 [" X: t" F2 I
# weight 0.0009 s; A0 j+ u5 I/ t. W9 ]
alg straw2
Z) @1 Q4 \' d hash 0 # rjenkins1
6 ?! y, R6 D" `* G8 |( ?' ]9 }
, a' Y# u# F0 C0 e}! C$ t0 ~( ?) D9 Q
# rules- z3 c: [4 ]. q$ }2 S% R
rule replicated_rule {
6 b; }. P/ f8 S) d8 E0 j id 0
2 S; \4 O$ _- `8 ~; X9 d: s type replicated2 S$ E' D0 e. C3 x8 z7 x
min_size 1: o D: c/ a) I6 c( G
max_size 10. X* E1 ` T0 Q Y+ l3 Z! Q
step take default6 L8 o; i* |/ y
step chooseleaf firstn 0 type host
% C9 t: @ z1 r6 }( W step emit
: ^6 `2 B! Q% u( Z. }2 J}& i0 m s0 `. Y3 J! [1 o9 j6 s
# end crush map
+ g0 ]9 a- @/ j. Y, U, J% d7 W0 k; R! ^9 o6 I1 @2 n5 ^
$ `- Q3 \( g, c/ W% F, W
发现少了很多东西,添加上吧:$ K! }+ a7 S" V
: m' T* D- X0 |6 c" E
# begin crush map$ ]1 @2 m; t5 e$ X9 w W
tunable choose_local_tries 0
' b; `/ N# m* R( x% K. btunable choose_local_fallback_tries 0
1 C+ _0 P6 z# X# D9 stunable choose_total_tries 50
9 W! a: o1 U) U7 G7 t5 M4 q. e# qtunable chooseleaf_descend_once 11 n% E6 y1 i+ E. o6 @' e
tunable chooseleaf_vary_r 15 C/ V# ?7 F: V' E6 y( x
tunable chooseleaf_stable 1) s( ]9 s; `, x- @
tunable straw_calc_version 1
( _- F# C/ B- @2 d% n& Qtunable allowed_bucket_algs 54
% ]- Q7 r6 e* @* G, ?" `# devices5 ^# I8 G4 w( \
device 0 osd.0 class hdd+ f- U- u. }6 ~
device 1 osd.1 class hdd
6 B7 X1 W+ C2 S" A) t$ Xdevice 2 osd.2 class hdd
& P3 M# x6 W+ O' G6 H$ F# types
]# {; Q2 A$ G3 h6 etype 0 osd
: y1 [8 n- g; J5 T! N# ttype 1 host0 p; L l* c$ i# t4 h- O7 @2 S
type 2 chassis
- u; x+ y) B etype 3 rack3 [4 V p+ E4 Z% \9 [, v
type 4 row
% \2 r- l0 {! ]type 5 pdu
/ _1 m, @, V3 w i# r* d3 Stype 6 pod
7 a2 v2 R% @: {2 j/ p0 x" k# {7 Mtype 7 room
8 O5 ]+ W+ \' W# Y8 k& l+ Btype 8 datacenter
1 {& C) E( W* V3 E3 Z% h& Ftype 9 zone
3 X2 q2 _5 l# V' T" N( Wtype 10 region4 ~$ g' k$ { o, s; u1 K5 U
type 11 root5 X7 z7 G) y# X4 q
host compute02 { h: h; B6 T7 K. n6 G
id -3 # do not change unnecessarily
' p( l* _- I8 Z4 j" H id -4 class hdd # do not change unnecessarily
9 T( Y3 G. r% t* f( O # weight 1.000
) c: I! U. a7 h' g& f. {/ x alg straw2( s: T ~( Z/ o
hash 0 # rjenkins1
/ T* a. _+ q& b0 i- X' H$ [ item osd.0 weight 1.000$ h/ J' y% }. c: y, B
}
3 P8 x* B. ^/ } zhost compute01 {
2 x& ?+ i6 s& u$ T7 f id -5 # do not change unnecessarily
# ~. ?1 S. b7 c' v9 _4 A id -6 class hdd # do not change unnecessarily( F5 R1 c, ]3 b; f; T6 h
# weight 1.0001 n9 n) E; s, t& x+ v& \; q0 m5 A
alg straw2 C2 I0 @8 X, h5 `. S o
hash 0 # rjenkins1
9 W# K' `. ~8 m; I item osd.1 weight 1.000& o! u+ D1 t: `9 s
}
/ i6 h% ^1 A' w+ |6 }host compute03 {8 {! R H$ G# j- A: @
id -7 # do not change unnecessarily
2 S) d& b+ p& d; ~9 m" K- c. m, O id -8 class hdd # do not change unnecessarily
% U( ]5 V9 u/ Q# o # weight 1.000
l* z- e5 N- V1 J! U alg straw2
. v0 e0 F9 @8 R8 H hash 0 # rjenkins1
2 ]+ A5 N6 }5 E# Q7 G item osd.2 weight 1.000
3 z" o1 G+ T* I/ e1 m9 w! [, Z7 W}. |* j6 u3 k8 w' F; X$ i0 u
# buckets3 P( q+ v" N5 g8 i; `
root default {
" c) A0 J8 m& F |) o8 Z id -1 # do not change unnecessarily
8 J, q" m! l6 o5 i3 T0 Z! } id -2 class hdd # do not change unnecessarily
1 q% _" ]" V! Q1 D$ H # weight 0.0007 c& W+ o9 C# A6 v% I( f
alg straw2
; W/ ]% e4 \6 m" Z& G4 b hash 0 # rjenkins1
V8 f- E* l1 U( |6 E) ]2 Y item compute02 weight 1.000; `7 P* [+ S% _ K; R4 Z( j. Y4 j
item compute01 weight 1.000+ X$ m( J- y7 @% x
item compute03 weight 1.000+ `9 R5 e% A+ b+ U x
}
/ j( y* k. c0 S' m/ u, I& L# rules0 F$ Y& S0 v- p" X+ t% B
rule replicated_rule {( S3 O" A8 Q! v s( ~& O! T
id 0$ ?; X$ d4 \8 I; d9 |" c" w
type replicated
4 n# ^1 k( q+ F0 ` min_size 1
4 ]& N$ ~3 r/ {2 N/ ~) B" q max_size 10
$ W( l; }" ^8 r4 W step take default
, ]* Z% I; Y4 n4 z- r. c9 m @: `( w step chooseleaf firstn 0 type host
! Z" O3 R9 w, \) p( K8 q8 z) M step emit
. q2 I R2 q, P$ L& r}
1 V6 o7 \9 N* F& H3 W5 g# end crush map
- ^/ f, @" K: L
: |* E0 `; b5 v% ?, L8 ]( t$ t% ?
& H6 t$ y- M% ?- f添加好之后,检查下对应关系,因为ceph节点和osd添加顺序的问题,导致1节点和2节点颠倒了,要注意这个地方,其他忽略;; k+ F" w" n {. N) A- b+ f
转换成ceph认识的文件:
9 q' u" v- v8 a+ r& o0 E[root@compute01 tmp]# crushtool -c mycrushmap.txt -o mycrushmap2& Q9 z) {& X" Q- _- [( `
8 w7 h6 Q( ]: z/ ^ G t6 Y% I3 m
[root@compute01 tmp]# ceph osd setcrushmap -i /tmp/mycrushmap2 ) g; y7 M; q$ f e0 Y
13
+ L+ P' u k+ l& W6 Q* [1 x# P$ f' }3 S[root@compute01 tmp]# ceph -s! w1 \) B8 p$ ^- @8 G+ t9 I
cluster:' @8 G- t+ X% h/ \
id: 31403b11-8a1e-432f-876e-5a2c852f9dcc2 l# s; I0 J9 c4 y0 u: J9 r( s. y
health: HEALTH_WARN$ P$ U6 h0 q+ }* i8 g3 Q; b
Reduced data availability: 212 pgs inactive
8 \% ]% f5 r. _6 S% t5 V' T , _0 B. |/ {3 Z4 u5 e( n
services:
. m6 n1 f* d9 O3 u" A+ e* |% d mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m)
{ Y1 ]: g1 Y5 d# j mgr: compute01(active, since 56m), standbys: compute02, compute03
+ U8 i0 L8 `/ _- J5 A osd: 3 osds: 3 up (since 40m), 3 in (since 40m)
) R: ~! o6 Y( N& N7 o6 j0 v ; @9 k) e- H4 @/ h6 ]
data:* v. d0 A) w- e; }( [8 X0 {" H
pools: 6 pools, 640 pgs
6 |4 Z/ u# L) o9 | I& I objects: 0 objects, 0 B; d! v$ r R+ O, X2 X, C
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail
& _: C0 S$ y. x" m pgs: 33.125% pgs unknown; ]. m7 Z0 S* B# E' P4 j
428 active+clean, b6 u; B Y7 ?8 X2 i0 W
212 unknown6 L" Y9 G! G8 F6 Q' Y1 @! I
( I: b7 |+ W% t7 G3 T$ n: u[root@compute01 tmp]# ceph -s
0 y# x- c/ s; ]/ h( F cluster: ~* R, y6 G! B
id: 31403b11-8a1e-432f-876e-5a2c852f9dcc' \* C5 l0 t* l* _9 a
health: HEALTH_OK$ D% @! d' F1 b2 x
5 Z( W- D$ h6 Y: d services:8 K8 a; ^8 `% O" c" U7 {' K2 X. @
mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m)4 ?% b* S. l+ b7 C. }
mgr: compute01(active, since 56m), standbys: compute02, compute03
, b* V- P" r3 U4 f1 b1 y osd: 3 osds: 3 up (since 40m), 3 in (since 40m)
; i$ W$ L9 ?. J9 R1 `. r
) s, q# R, n! @# O# p& K data:
, x1 O& K- i! K! X. q pools: 6 pools, 640 pgs
2 b- z" ]( G5 p# Y' @ ^ objects: 0 objects, 0 B: a0 g$ {! S/ q3 p* y6 o
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail
/ ]) C; i* F; f8 P& i4 q3 ]$ M pgs: 640 active+clean
& ]1 _' h4 E2 R6 T# [5 E
7 _! n$ q0 b% \- T. j \5 @[root@compute01 tmp]# ceph -s$ _' T3 t P7 {6 m9 p3 Q. x
cluster:' B' n( n) f1 s5 T+ ~+ h) b7 B
id: 31403b11-8a1e-432f-876e-5a2c852f9dcc
. z/ V3 v8 K7 Y+ {/ t, B health: HEALTH_OK
5 M! L- o7 ]- q w0 `/ y
) k# V1 [! J$ `0 ~ services:
/ N3 Y4 ]' [% g5 P) C mon: 3 daemons, quorum compute01,compute02,compute03 (age 56m)5 d! C+ A$ y( S: K) V+ |
mgr: compute01(active, since 56m), standbys: compute02, compute037 d+ h- A+ \6 i. y6 J$ |
osd: 3 osds: 3 up (since 40m), 3 in (since 40m)/ D- Q: l/ a, q
' g; [$ ~% e& w5 N( `5 ] data:" Z+ M; E, k4 @3 ?9 Z( c" Q. A
pools: 6 pools, 640 pgs
- X1 P2 B; ?$ e8 B- B objects: 0 objects, 0 B. G9 w' v% S% @( y; Z4 y( ^7 y
usage: 3.1 GiB used, 3.3 TiB / 3.3 TiB avail
v0 v4 ~. p% I pgs: 640 active+clean
! c0 M3 w8 U0 ]: {# o
9 c! C3 B/ _7 i% s/ @恢复正常了,问题解决。
3 H* f0 {, b8 j! m) A) C' {' ?8 Y% F4 f9 K7 P
总结下:遇到这种问题,重做依然问题存在,很头疼。只能检查到底什么原因导致的问题。3 i7 i0 h/ T) u3 D! Q' O1 G8 m/ O
|
|