将设为首页浏览此站
开启辅助访问 天气与日历 收藏本站联系我们切换到窄版

易陆发现论坛

 找回密码
 开始注册
查看: 250|回复: 2
收起左侧

1 Large omap objects ceph health deatil

[复制链接]
发表于 2022-8-19 17:00:37 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。

您需要 登录 才可以下载或查看,没有帐号?开始注册

x
Large omap objects; ?: @( K2 y: L9 E+ d
# ceph health detail; I- ~. u$ O! l$ V* q9 I: V- `
HEALTH_WARN 1 large omap objects
. a9 {9 l7 m* Z7 Z- O. r% N1 CLARGE_OMAP_OBJECTS 1 large omap objects3 w+ y; w" C. {6 D
    1 large objects found in pool 'is_recovery' #出现large omap的pool
& }+ g0 g$ c# `    Search the cluster log for 'Large omap object found' for more details.  J# m1 r2 R# l- F$ g/ B$ O9 Q
5 o" q- r; z; r0 o1 h; t! y4 S
7 O- _: C6 f6 u  t$ p, v

: W5 X- S: }5 `1 m' F5 b" h/ r; F" {8 R( f8 U" r+ Q4 o
ceph pg ls-by-pool  is_recovery|awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x7 z/ @9 f: R+ F$ w1 K& \
ceph pg 11.0 query|grep num_large_omap_objects
7 a( h  o' p* c( ?8 c" @: tceph pg 11.1 query|grep num_large_omap_objects
4 {0 ~& s6 `; zceph pg 11.2 query|grep num_large_omap_objects
2 r3 T, r$ V4 O( i! n. d9 w
. r, e  \& }& Y; {+ E0 }
  [: t* F! x  v' O3 N* d  Y: v! E- M$ G

1 r, Y: A- A2 h5 b, f( P[root@ceph-1 ~]# ceph daemon mds.ceph-1 flush journal
) L+ l: T. e. u{
1 }% z0 W$ o' ]: v( w    "message": "",
2 E$ I3 X" Q# c" d( y* K! ?' k0 L    "return_code": 00 |& B) u) g( g  l$ J; ^2 m: J3 R
}
1 r. V2 @' u* D9 K2 J[root@ceph-1 ~]#
) X' s' [8 T5 F" u& x [root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal1 {9 C# J% b) V+ @0 B* k
"mds_not_active"
! O1 \2 r0 a1 O  F2 V9 Q3 R# l[root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal
. L1 n0 M, |! h/ b9 R3 C1 D"mds_not_active"
7 n" i2 b1 b9 m, t4 i* j0 u
( r& k/ K( N+ d( a' r; C  @. x' O* q* x8 e4 ?. T

+ \, l4 ^& o# L
2 T, z. R1 h* W  C1 w( a; z  m8 e1 ]
 楼主| 发表于 2022-8-23 09:53:54 | 显示全部楼层
index pool的 large omap 处理  x6 ?9 t' Y4 `% m4 p! u
向单个bucket压测2000W个object,默认设置shard数为16,压测到1800W出现large omap,介绍一下错误定位和如何处理。
1 ^; E0 k/ s' p5 A. h
' P  D6 B3 x* o  W/ B异常定位
, t/ I& o! S+ @9 ]+ N集群状态如下. `, Y  F, w) A6 M
+ A, S- g3 _! O$ y* Q
[root@demo123 cephuser]# ceph health detail
# W. P/ ?- H% u* }# F0 ?: LHEALTH_WARN 16 large omap objects
$ l, s' O0 H. C$ C; \( B+ vLARGE_OMAP_OBJECTS 16 large omap objects" A& C5 f  B7 [4 N/ {$ m7 s
    16 large objects found in pool 'cn-bj-test2.rgw.buckets.index'# x. C8 R- u# B9 c- p5 F; s
    Search the cluster log for 'Large omap object found' for more details.
  y* Y& u) h$ P2 z7 n' O% G* ]复制! a* }$ D, M  p# x4 Q
通过脚本找到对应的pg信息,脚本请查看之前一篇omap large处理的文章。
) X- O9 K0 X% f! h- M6 g- d# `
4 M1 G/ e# u/ J+ h[root@demo123 cephuser]# python large_omap.py
4 y. K! u! ^4 I9 PLarge omap objects poolname = cn-bj-test2.rgw.buckets.index! T9 V; a4 ~6 x: d
pgid=13.1f OSDs=[78, 9, 59] num_large_omap_objects=1
3 T+ d! B- t4 l* D" b# Ypgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1& r0 F6 \. D7 S! V4 U; |. q  U+ D5 ~
pgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=1
+ S% D6 b9 Q+ {pgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1/ E* z) O# o, `
pgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=1- d8 @' Y! {' ?  c' A
pgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1
6 s! `, z$ g; k, k+ Lpgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1
3 C" a+ W1 ^( j# n) w9 S* xpgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=14 f5 Q# {8 }9 r1 K3 g" i
pgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1
; B2 {/ y( b+ J7 y, \4 A1 Hpgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1
) A8 v: }3 y! U7 m7 g6 c; L" lpgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1: E/ R7 j% B% i8 H2 w# C3 ^
pgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
! X5 F& H' t( }6 d' U4 |% @pgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=1! g% r, e* N5 N9 U  V+ g
pgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=1; I- R8 W# X( `. `7 W5 c( C' r
pgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=1& O6 c" w' S) C5 A, n: S
pgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=1+ j4 P7 ]) _# l" ^; w$ V) ~& c
复制
% F  b+ `! [  r, b+ I& O# B9 v: }查找OSD日志,确定object名称(".dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11"),发现omap条目数达到了2378492,超过默认告警值
  s/ u( \6 W6 e- R
, ^! i2 H7 K" W( F; S% T[root@demo123 cephuser]# zcat /var/log/ceph/ceph-osd.19.log-20181231.gz |grep "omap"4 @; U; a+ d$ y' ]
2018-12-30 23:00:42.334766 7f6583f44700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 13:87443b2d:::.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11:head Key count: 2378492 Size (bytes): 491722758
( z- G/ v' q& i. V: N复制
! k6 ~9 _* b# ?0 p8 Z默认告警值为2000000,2378492>2000000,不建议去修改这个默认值,因为改得过大会加大集群出现异常的风险,属于掩耳盗铃。
* P6 e" x/ a4 I' @  Z% I* E
+ w  F9 O" G+ Q3 j) L  w% I  L% e, O[root@demo123 cephuser]# ceph daemon /var/run/ceph/ceph-osd.19.asok config show |grep large. C! {* f1 A# d6 Q
    "osd_bench_large_size_max_throughput": "104857600",) |" N. {/ X, u% F! f+ i9 N5 ~- N
    "osd_deep_scrub_large_omap_object_key_threshold": "2000000",
0 Q% C  n4 Z" ]) g' h0 v! O$ [1 f    "osd_deep_scrub_large_omap_object_value_sum_threshold": "1073741824",, h4 I  k2 [' R; n& p
复制/ E! Q. |" O+ d/ Y$ h
查看一下发生omap过大的bucket,确定相关信息2 ]- @, C, x  G
% s2 B; T. ^* @% Z" g) {3 |9 B+ ~
[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo1
0 W" t1 |& G# ]4 ^; r2 k{" _* n  q4 X7 f
    "bucket": "demo1",
0 k( Q8 C* W  S) a- ]$ V, A7 z- a    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",; z3 w6 l9 A# Y4 |: r% R5 \6 x9 q  K% L
    "placement_rule": "default-placement",5 Q; C5 B' O( A6 y8 r
    "explicit_placement": {
3 u$ J1 d  ]# R        "data_pool": "",- t- Z/ ]8 A$ {2 k6 {9 p
        "data_extra_pool": "",
& w' z# x7 I' ^  r: u        "index_pool": ""
" h6 a  J: _) r( {/ v/ b3 M    },
0 W  J$ I2 r) Z- ]' p    "id": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1", #当前bucket instance ID,
; A% ]0 q- |. P' K    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",6 _- |6 t' ]6 Q) a0 Y
    "index_type": "Normal",
- }7 c7 O0 l. R/ z5 ]* b    "owner": "s3test",! m( R% z+ _' g; d
    "ver": "0#2638037,1#2637965,2#2632835,3#2632869,4#2632799,5#2632597,6#2633289,7#2633175,8#2637227,9#2637609,10#2637997,11#2632455,12#2631337,13#2631624,14#2631983,15#2632359",: o# s8 T3 _. ]9 T$ K
    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0", #16个shard" S' g1 B3 C1 e/ c
    "mtime": "2018-11-28 16:47:45.560039",
- C7 T+ ~( Q* {2 M3 K    "max_marker": "0#00002638036.2638608.5,1#00002637964.2638536.5,2#00002632834.2649479.5,3#00002632868.2633634.5,4#00002632798.2633370.5,5#00002632596.2633168.5,6#00002633288.2633860.5,7#00002633174.2633747.5,8#00002637226.2637798.5,9#00002637608.2638181.5,10#00002637996.2638569.5,11#00002632454.2633026.5,12#00002631336.2631914.5,13#00002631623.2632195.5,14#00002631982.2632554.5,15#00002632358.2632930.5",& b9 a. f! i+ J1 A' h  h7 C
    "usage": {5 I9 M. s. O8 A, N$ @3 D7 l
        "rgw.main": {) `4 Q" Y& Q. r6 n( |- B
            "size": 1975757355553,
2 Q. T, v3 ?  ~6 C7 v5 O            "size_actual": 2047893610496,2 W, a2 K- n/ W  ~2 Q& b( c
            "size_utilized": 1975757355553,0 Y, ^- X* k  B. ]8 w" r, P
            "size_kb": 1929450543,7 j( S8 f9 I; _. o; N3 q" w
            "size_kb_actual": 1999896104,
2 J+ W2 d: ], W: a2 B            "size_kb_utilized": 1929450543,
! c# J1 X( Q+ A' ?+ o  W. C            "num_objects": 19998962 #近2000Wobject' m0 p# O1 Q) L. @) z( z4 ~# _$ h
        }$ [3 K  U3 ^( L2 G! Z( l6 T
    },
/ d6 g# Z2 p/ ~' c8 r3 G    "bucket_quota": {* I( n1 B. _2 C/ H
        "enabled": false,3 U+ C2 e( @# `( z
        "check_on_raw": false,
# V, w2 p0 f& s. P" m$ H        "max_size": -1,
6 H( [% t/ m! H9 a1 P        "max_size_kb": 0,
0 K4 w; e# S# l! F, E1 u        "max_objects": -15 y8 @# l: n6 W# N. R6 t0 J( o0 x
    }
  d. p1 }0 N. ?$ w- K1 N5 q}
* P4 V! k* b% d! W+ c复制
8 y) d. J: t0 A3 M异常处理
$ Z5 ~# `9 S& h通过bucket reshard操作,将原来的bucket 重新划分shard,shard数量从16->64。注意reshard有风险,最好停掉客户端的读写操作以后再进行,同时如果你使用了multisite,请根据官方说明立即关闭Dynamic resharding特性。! ?# x9 w. |7 T
" l; U* k1 ]5 C
Dynamic resharding 说明: http://docs.ceph.com/docs/mimic/radosgw/dynamicresharding/
  o' h$ Y2 ?5 \% v9 L, ^; ~& C# {2 u4 e4 E3 I
做完reshard需要手工删除之前的索引数据,工具也提示了下面的内容。
7 r# Q6 ?* M5 t' \
  x  m( G. i3 }) u[root@demo123 cephuser]# radosgw-admin bucket reshard --bucket demo1 --num-shards 64
- ^0 e$ _; _5 ~: m" @# D*** NOTICE: operation will not remove old bucket index objects ***
, E" b% U! N! V+ K7 {0 z6 h***         these will need to be removed manually             ***
1 h; R! A6 ~+ E% b+ d. _+ ctenant:) t8 z  M) K( U; Y* |' B' t( K
bucket name: demo1# R  V. C3 Z; e" b0 B* u
old bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.34209.1- ^! [! t: M) ]
new bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.45786.1
1 g0 z7 c& e: n" q3 ~& Btotal entries: 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 19998962
* z/ W% P( a5 {! k9 B% J* i) n& K2019-01-03 11:42:33.741314 7f74d15c6dc0  0 WARNING: RGWReshard::add failed to drop lock on demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 ret=-2
7 l2 M# H! `4 g/ g, j复制: ]% M& ^5 b: [+ D* Z8 L/ S, o
检查reshard结果
0 B  ?6 j3 s- b9 O& F" z
% r" e% [' j) C# N2 p+ t5 [3 g[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo1
6 r: d! g3 L. A3 q{
  }7 \$ F' @9 Y+ E+ n' H# r4 S    "bucket": "demo1",
$ b: N" r/ T5 E  A# q    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",
1 d( [% a8 f5 B: _3 U* x6 U& L    "placement_rule": "default-placement",. j0 y7 d' A2 i  Y3 C$ i
    "explicit_placement": {9 U, {" w7 c: u/ b# Q2 P
        "data_pool": "",
3 g5 z- Q: E$ X) T' ?        "data_extra_pool": "",
# w+ i: w5 ]. b0 V8 B        "index_pool": ""$ T" }% k' ~. x& D/ P. V
    },
9 Y0 y3 {5 \- k& ?0 s9 ?% I    "id": "afd874cd-f976-4007-a77c-be6fca298b71.45786.1", #bucket instance ID发生变化& n% v+ A! F6 J3 c, M$ l, e
    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",' z& I4 h* M3 ]& g
    "index_type": "Normal",
- L( }- t. U& X) w3 G    "owner": "s3test",
2 w6 g4 k  W, K/ I$ y+ i% c    "ver": "0#4920,1#4920,2#4883,3#4877,4#4882,5#4883,6#4885,7#4880,8#4882,9#4880,10#4878,11#4883,12#4923,13#4883,14#4882,15#4874,16#4878,17#4880,18#4884,19#4881,20#4882,21#4881,22#4876,23#4922,24#4883,25#4887,26#4881,27#4879,28#4879,29#4879,30#4882,31#4884,32#4880,33#4879,34#4917,35#4876,36#4883,37#4885,38#4884,39#4879,40#4883,41#4880,42#4880,43#4882,44#4884,45#4877,46#4879,47#4877,48#4881,49#4880,50#4881,51#4881,52#4883,53#4876,54#4880,55#4884,56#4881,57#4885,58#4882,59#4881,60#4881,61#4881,62#4883,63#4882",#shard 数量变为了649 n0 h7 F; v9 Q% \
    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0,16#0,17#0,18#0,19#0,20#0,21#0,22#0,23#0,24#0,25#0,26#0,27#0,28#0,29#0,30#0,31#0,32#0,33#0,34#0,35#0,36#0,37#0,38#0,39#0,40#0,41#0,42#0,43#0,44#0,45#0,46#0,47#0,48#0,49#0,50#0,51#0,52#0,53#0,54#0,55#0,56#0,57#0,58#0,59#0,60#0,61#0,62#0,63#0",
6 z1 d6 I+ S/ G6 }    "mtime": "2019-01-03 11:32:50.349905",- R8 H1 }9 X+ ?4 U: m4 V& P! A
    "max_marker": "0#,1#,2#,3#,4#,5#,6#,7#,8#,9#,10#,11#,12#,13#,14#,15#,16#,17#,18#,19#,20#,21#,22#,23#,24#,25#,26#,27#,28#,29#,30#,31#,32#,33#,34#,35#,36#,37#,38#,39#,40#,41#,42#,43#,44#,45#,46#,47#,48#,49#,50#,51#,52#,53#,54#,55#,56#,57#,58#,59#,60#,61#,62#,63#",& U- j" |, N# w/ Y" S9 ~3 Z6 Q
    "usage": {# {+ k$ t1 ?5 s3 o
        "rgw.main": {
6 X  T" B6 e2 t0 j2 Q! h4 ^+ t* ~            "size": 1975757355553,
. G: L# c+ d  q            "size_actual": 2047893610496,
" `3 a; x% \" k, ^" D3 l& x6 v' Y            "size_utilized": 1975757355553,: J) z, S& H. _, V7 r( t0 v
            "size_kb": 1929450543,# a; z" J0 p# l! Y3 o+ Y2 {$ @0 M( }
            "size_kb_actual": 1999896104,
9 b& i* G* K+ p9 p3 e            "size_kb_utilized": 1929450543,
& I# o/ ]% m) l$ y9 v  E: a0 T            "num_objects": 19998962
" h2 S, i' q. `& l. c, Z        }7 h2 G0 e6 I# ^" Z" l
    },
0 S+ N# |) D3 R; K  L2 J    "bucket_quota": {: _7 w, c7 t/ H+ k4 w3 ^) E: i  R
        "enabled": false,
' C) L" S2 s6 g* L4 K        "check_on_raw": false,: s9 v$ U+ }  Q& m  @/ \  c0 i
        "max_size": -1,
$ Z3 j  G+ m& h2 |3 y+ e. |        "max_size_kb": 0,* C2 Q) D5 w$ }) @  `7 X! |
        "max_objects": -1
' H) ^8 Q. }4 e' m: t6 L    }! ?2 t9 U% G1 }. l7 {/ _. ?( @* m+ W
}
- o# L. H9 k8 ]- H- ]复制
8 ?# |# X  C' \' P! V回收旧数据
1 |/ _6 l0 ]- ~; c( v% y根据之前工具的提示需要回收index和meta两个pool里面的残留数据
8 b- o/ z9 x$ ?: A# g2 o% Q3 j
4 u8 P; q, G0 r$ @4 L9 Y回收index pool数据
5 `2 d# Z1 ]6 a$ H! q' o/ _: f# V! Y, ?7 H7 R, L0 K! D
[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"
2 ~9 ~+ @" N5 n# j& o& F, E$ r.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.5  S4 r; _4 A0 r: b7 }
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15
, E! h% c/ \- x.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2- h  B0 @/ j  R# x* [
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1, U. T  G! P: @$ w# |6 a% a
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.05 u6 V0 x5 Q4 j% r/ f- _
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.48 o" x4 C  q. R" M3 F! G( H
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11  p# D: o: o' ]7 |
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.13
$ w# Z* l; L3 v1 f.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.6
# Y7 x1 S$ n. \6 R$ ?9 j( _/ @2 ?.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.3
! w9 B! i1 b5 b" f  W/ T.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7
% v4 L# F+ x9 |. `6 W4 ^1 s# o/ \.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.9- p& D, W0 \; j7 A8 ~% J: r. s. `* w
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.14
1 k+ J9 V2 K/ n. z% W.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.103 ^6 l3 h1 l! ]
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.12- e3 U( S1 y0 M+ d' H5 n, e
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8
2 I/ C: ?7 J, v( v2 v- ?9 C# r复制' \3 S4 V; g6 D' J; R& {) g
使用rados rm命令删除数据/ ]# R( U' c# x3 q! ~2 B
- o2 X. X) O, q3 @: Y
[root@demo123 supdev]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"|awk '{print "rados rm -p cn-bj-test2.rgw.buckets.index "$1}'|sh -x
9 k4 S: `; h- l+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.5
, M7 [" T9 ?* h+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15
. d3 ~& V( U3 b5 [6 h+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2
7 a6 q1 c8 L: g1 R1 c! H+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1
3 w8 P. Y* r: @" o9 |2 p+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.06 }4 w3 ~+ V8 C0 F1 J* p
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.44 ?: }; q  ~1 K; p' [
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11# N' ~! p  n& ]  t# v* p9 m1 Q& ^
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.13
5 c! t9 v. m! r* T4 Z+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.6
4 r0 ~$ {! n1 {: d, J/ ^. R4 y: |+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.3
6 ~5 N) S6 ]6 C# y! S$ l+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7
# t4 e! c4 a3 u& a) A+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.9' x9 r( C9 b+ v9 T& ^% T' n, x. `7 G
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.14
, W7 P7 c" b' I5 N6 r- U) q9 O1 v, f+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.10/ T5 o5 y9 ^& A9 }, p6 D/ H
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.122 d# P! D# \4 J' P+ c6 f( ]
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8$ ~8 ^" k' v& i; i* w- n% ]  c
复制  x; @& u7 z9 b: M- O
回收meta pool的数据
8 o4 H( n  Q: v5 K7 a. k
  p  K% R/ p8 u. B/ p[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all
+ P7 K6 {$ H. w# G( Hroot    demo1
( d7 R* u6 G% T9 H: i$ Hroot    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.1& Z5 Y( ~, T$ T: x' p( d
root    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 #残留. K' y8 [0 x+ @  o: Y, B4 N
root    my-new-container_segments
$ f) L( X# ~3 qroot    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
. n( O" ]8 E2 Y7 i) Eroot    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1
9 O5 `, v2 S: i1 M- Q1 k8 q' u( uusers.uid    s3test.buckets
1 E4 G  p% S: z, j& Y* qusers.uid    swiftuser7 ^6 w5 A% L) o
users.swift    swiftuser:swiftuser1  N5 v  A3 f7 Q* a* O7 Y" ?% d
users.keys    SNACA4LX9DS21NGMSRX4* P' r* d% @1 ^9 o
root    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4, L' {( H! k! h: O+ M4 J, f/ @. W
users.uid    s3test
, d# j- t4 H1 m" Zroot    demo22 Z4 {/ P. q' V9 i( c2 r6 C
users.keys    XP8E2452AB6EBU3RPD0C  L( W" C0 F1 Q) x9 v
root    my-new-container
3 {* K6 Q* l5 n- s* c9 E" Ausers.uid    swiftuser.buckets
8 J) `0 ]3 f5 L$ N- @4 ^" yusers.uid    synchronization-user' ~* C. x/ @- h. S! O3 x2 g
复制
. |8 b  U/ N) t  G注意这里用的ceph L版本,使用了namespace,所以要指定namespace才能删除
7 C; D7 L/ s8 t  k2 ^
# N# n( _0 s' _& p6 l( S# ~6 r5 b- e[root@demo123 cephuser]# rados rm  -p cn-bj-test2.rgw.meta .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 --namespace=root: L: f5 N  B* G4 j& G! D
[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all* T" a( D( s, K% \9 Y2 k
root    demo1
! b* H0 ]$ a2 qroot    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.1
5 j7 V' V: K; T3 ^root    my-new-container_segments- K3 M# n) i" y' z, R4 H8 x3 V7 @
root    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
3 w4 T1 s2 K) Q* W) _root    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1
( F$ P+ Q" q+ ~) g! Q2 xusers.uid    s3test.buckets$ c7 g0 i: M5 C4 b" D( k: A
users.uid    swiftuser
: Q" m+ }7 N; S, O- d$ ^users.swift    swiftuser:swiftuser1, \0 ^# B; I' S, N" L! V
users.keys    SNACA4LX9DS21NGMSRX4- ?& w  \" U  s5 u; b, Z
root    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4
' R# K0 |& L! a- N' lusers.uid    s3test+ F0 k6 d; Z  o6 w( H/ f1 M
root    demo2
$ x6 @9 @; ~) S# z7 N/ Dusers.keys    XP8E2452AB6EBU3RPD0C
7 t/ H" X. y$ i4 @  ^# oroot    my-new-container
, N  y- o9 D4 T6 O$ @9 Q. X8 H8 ]users.uid    swiftuser.buckets8 L9 F! n. u9 x3 e3 i
users.uid    synchronization-user# Y% h& T5 O: t( r' a
复制; y+ B: G1 R. r1 C/ v
清除large omap告警
2 C- R, B  x) e$ R删完了object并不会恢复告警,需要手工对相应的pg进行deep-scrub操作,具体如下) H* x) ], r" s6 R6 N
3 u6 M8 [' u' c' B  g: z
[root@demo123 cephuser]# python large_omap.py
( g8 z% P7 H4 U1 @  KLarge omap objects poolname = cn-bj-test2.rgw.buckets.index
& s1 K' v# P( X- o* N! ppgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1
% a3 c# B* V1 b! ^& fpgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=1
6 s6 I/ _1 F0 i1 Z& rpgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1: {! Q" C- b( @3 H  ^8 |9 w2 w
pgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=1- b% d" K9 N+ O1 o& N0 g
pgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1
4 k3 @- k3 ^: ipgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1  H/ J; y" p8 o. v* ~4 _# A
pgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=14 l' r0 F' k& T0 G/ B
pgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1
* F* f% ~+ E6 t, [6 Ypgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1
; g% P- `; n8 b0 j9 _7 B: hpgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1  }# d! T2 Q9 _% k8 K
pgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
# N! y. ^2 L2 X4 h; Y2 B" f$ h$ G, d4 kpgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=1; L! m% [+ \; V# |4 D6 K/ u
pgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=1
2 m/ u2 w! _" Z9 M2 gpgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=1) v' s) u5 o5 w) _% a* o& M
pgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=1' s4 a( v- L, @  Z
[root@demo123 cephuser]# ceph pg deep-scrub 13.33
0 t/ e. T9 Q. l5 [% S2 s% w7 x' y) Jinstructing pg 13.33 on osd.59 to deep-scrub
3 O3 \0 V$ K3 k1 R! s1 _  R! y6 Y复制- @$ q2 U. f( I; e4 _4 F' c  M
操作完可以看到有pg进行dep-scrub,之后状态恢复6 s2 l8 F) U& R, A5 T5 w. v; k  c
  q, N, M  x9 L% `
[root@demo123 cephuser]# ceph -s4 k0 U' ?- Q! C5 R2 C: L
  cluster:
6 s& T3 ^4 n3 ^/ O- ~0 o2 A& {    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed9
- U) }/ [5 }/ n6 P    health: HEALTH_WARN, O7 D! o2 E4 A: X( c: o6 y: F
            16 large omap objects
9 T8 |6 D; q0 c" n/ H2 k7 B  L2 Y! h5 u' i( H
  services:1 {9 r, {. s$ D- Y; J% S
    mon: 3 daemons, quorum demo122,demo131,demo141% s- t9 D1 Q( m: N2 E
    mgr: demo141(active)$ l# p9 \+ W: a& r5 q" m
    osd: 90 osds: 90 up, 90 in" e/ I1 i* B0 q( l, s  i
    rgw: 1 daemon active5 P5 k7 ?  u0 q6 F* ?2 b
0 o2 N% n4 K" o4 g
  data:  c+ ^+ o. n$ A. ~8 u
    pools:   7 pools, 3712 pgs
3 C0 k  D4 Y5 e9 l7 w+ B    objects: 20.13M objects, 1.80TiB
4 \' D8 {9 H6 q" Z    usage:   7.28TiB used, 408TiB / 415TiB avail; j5 d7 [" a9 w3 g6 b
    pgs:     3711 active+clean
; Y& r/ B# M) t" i9 f+ t             1    active+clean+scrubbing+deep #开始deep scrub
" a& g# s: [2 {; i+ }) Z% F( [& ?$ _3 x9 x
  io:
2 O2 ^, C" ]$ v' G* q7 _, y    client:   5.29MiB/s rd, 935B/s wr, 69op/s rd, 28op/s wr; w+ q( ]. a. d4 G3 ?) @8 Y, R

/ g: r% w" u* _, M9 ?& r! h7 L[root@demo123 cephuser]# ceph -s1 ]0 l& _  ]- G0 L0 X
  cluster:) f! E, |5 A0 w, r) u6 S
    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed9. @4 Y8 X- b+ A, H- |: v$ Q3 @& d
    health: HEALTH_WARN8 k5 @0 J4 n% s5 z" T  e. P! b
            15 large omap objects #减少了1个
" h* l& K2 {9 |4 F% n% l
0 }1 i3 B  Z: b8 M4 _9 @  services:, U* _7 B* A: l  S: h
    mon: 3 daemons, quorum demo122,demo131,demo141' k' N0 X& A: |
    mgr: demo141(active)/ V& d' a  [3 W1 V% I. {4 [* T
    osd: 90 osds: 90 up, 90 in0 ~. u& a$ |6 o: [1 h; x
    rgw: 1 daemon active" |- P3 n# ^+ \, S2 @8 d/ v
7 P. Z/ D, T6 [& ?. r
  data:
* n/ o5 K: V- r/ O    pools:   7 pools, 3712 pgs
8 Y' R8 Q: ?# s8 T* V6 u    objects: 20.13M objects, 1.80TiB
1 ?4 e% N! i- U: l    usage:   7.28TiB used, 408TiB / 415TiB avail
/ y* ?! s8 |2 Q& d8 ~    pgs:     3712 active+clean
/ g6 F1 W3 G: \9 n* t2 |
/ t0 p3 h5 j8 U; i2 f) \' L& ~  io:
: {3 c) F3 p  g9 y3 ~8 p    client:   5.33MiB/s rd, 680B/s wr, 36op/s rd, 6op/s wr; c* h3 P/ |+ F: c  L- T1 O. B
复制
/ D3 G$ i2 x5 B4 c* S总结
. ^0 l% o/ a9 z9 K0 xindex pool的omap告警一般就分为两类:
+ h% Z7 s* J! o; x$ n4 b
; r% L5 P/ S& ?/ }5 o% b5 P- e8 W一类是object条目数过多,导致对应的index 元数据条目数过多,可以用上面的方法处理。
9 K  _* s  P( R! d5 n5 d另外一类是bilog过多,这里的方法就不适用了,需要手工进行bilog清理,关于bilog后续会有详细章节介绍。9 L2 Y' r% @4 [1 Y" i8 B/ `
' ]( q+ `" A2 l: A5 v
 楼主| 发表于 2022-8-23 09:54:43 | 显示全部楼层
线上multisite环境出现HEALTH_WARN 32 large omap objects,已经bucket auto reshard=false,所以排除是bucket index 所在的shard omap过大引发的问题,官方的给出的告警信息无法定位到具体的object,于是有了下面的排错过程
9 }1 k* a& Q4 H0 m
# Q% U2 F; ~1 ^3 _/ A& ?排查过程
& B' g5 i+ n' V, Z4 P0 k% k) E- c[root@demo supdev]# ceph health detail
$ l% m' W$ _7 M/ h7 ?! k3 O9 rHEALTH_WARN 32 large omap objects
  q+ e$ f  z/ mLARGE_OMAP_OBJECTS 32 large omap objects1 b& t( C$ a; ~3 P$ k; _
    32 large objects found in pool 'cn-bj-test1.rgw.log' #出现large omap的pool1 c# ^  @! }! Y7 g! E, V
    Search the cluster log for 'Large omap object found' for more details.4 F! R$ M  M, s$ S0 a7 w: j9 `
+ k* Q  p( h; U7 j7 N$ Z$ @

; T2 w" P8 [. n5 I# r  f[root@demo supdev]# ceph pg ls-by-pool cn-bj-test1.rgw.log |awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x
) m  u% T  B1 P8 W: Oceph pg 11.0 query|grep num_large_omap_objects+ v' M, [" Q- u/ ?! `' H* x: X
ceph pg 11.1 query|grep num_large_omap_objects+ e  H+ Y5 v$ \0 n
ceph pg 11.2 query|grep num_large_omap_objects, j: m# {5 b9 ~* s$ h1 [
......
7 o# E$ y% Z9 e2 y8 f+ ceph pg 11.1e6 query0 B. q0 n$ \6 N" B
+ grep num_large_omap_objects0 p! m) W: j/ y1 o) {
                "num_large_omap_objects": 1 #有large omap的objcet数量: [1 s, t6 |  k0 }1 l/ K& O
                    "num_large_omap_objects": 0/ r8 T$ N( x+ J8 B; j
                    "num_large_omap_objects": 0
8 @4 g" G5 z5 a9 Z; M, o4 t$ ~
# R6 ~! |( ]$ O' \6 q  J% H7 T1 n6 _# F
[root@demo supdev]# ceph pg 11.1e6 query #查询pg详细信息$ ?8 f3 k8 b, [7 @+ v3 k7 k
{
" Z$ d" W# w  x" T  }    "state": "active+clean",
% e7 R- k- \5 D1 e5 k! u6 @.....' b; @5 m7 [4 r& y( n7 T# u4 y1 ?
    "info": {
% n; j" @" {2 O        "pgid": "11.1e6",
, ~4 J( }  [2 W; j! Q& T* _) {) B# ^        "last_update": "10075'3051746",) |/ z9 X3 o2 g. b
        "last_complete": "10075'3051746",
$ y4 T4 M# w: X, i' b5 I! O3 `        "log_tail": "10075'3050200",
$ `+ ^( Z9 w/ V! v        "last_user_version": 3051746,
0 ?! Y+ b3 W; w        "last_backfill": "MAX",
5 x$ {5 ?/ ~6 T        "last_backfill_bitwise": 0,
5 g  y' f: W( l8 F+ L+ G        "purged_snaps": [],
& c! ^( I5 j  d$ C.....
& A/ {/ z5 y3 j! K# H( O
3 F+ ^7 e. s; L5 q/ o+ e              "acting": [
/ L, @- S) k4 O                    46, #主OSD id=46" q' U$ z  \* B% g% |
                    63, #从OSD0 n" Q" |5 E, k" d7 A: U
                    23  #从OSD8 c8 ~7 Q# i1 s$ {8 r
                ],
5 ]4 `7 h% E. \& t: p: _            "stat_sum": {& ]& g; X. w% ]0 k4 }2 D
                "num_bytes": 40,
4 f# U+ B" D6 Y6 C                "num_objects": 2,
( u8 v9 A* V7 Q" [8 B/ k/ u                "num_object_clones": 0,
' s% C) b1 ?/ P. D( D: g                "num_object_copies": 6,, L5 d: m) c8 V6 Q4 D2 j
                "num_objects_missing_on_primary": 0,
/ c5 V3 I% d, j8 W/ m, m1 U                "num_objects_missing": 0,6 J5 T3 ^! E9 o6 f8 v  R! l  ^+ p
                "num_objects_degraded": 0,
% k  m- a1 V+ }6 G: \0 c2 g9 m                "num_objects_misplaced": 0,
1 |1 D$ `4 t. A* _" g  Z                "num_objects_unfound": 0,) O9 t: B% R/ Q1 Z% R- C
                "num_objects_dirty": 2,# e) e& {# s! K# a& y- `5 {
                "num_whiteouts": 0,; V1 N) }8 I1 K. `6 [$ j" n
                "num_read": 3055759,
% o% p) m, Y7 r$ G) [4 c, S6 V# }                "num_read_kb": 3056162,
! M5 d! d$ L4 ~) C$ N) Q0 K                "num_write": 5986011,
  X9 {: r3 c" _6 F; D- t4 b                "num_write_kb": 53,
- j- c: G- }- a* }6 y. X                "num_scrub_errors": 0,9 [# f: S7 q! J% f0 n0 V, Q
                "num_shallow_scrub_errors": 0,
- ~3 t/ o3 z. s, o7 E- d                "num_deep_scrub_errors": 0,
2 Y! |* L+ Y6 @/ C/ g7 [                "num_objects_recovered": 0,
  q# Q. h0 j& H! O- a9 l0 \                "num_bytes_recovered": 0,7 m1 o  p7 \8 {
                "num_keys_recovered": 0,& p2 h1 ^1 R4 U' q5 X$ s- J1 E
                "num_objects_omap": 1,, Y- K' B% p9 B$ |" k  x( w5 d
                "num_objects_hit_set_archive": 0,6 m4 H$ h( F0 v( o9 B4 X
                "num_bytes_hit_set_archive": 0,
' L) d2 H# T$ d1 n6 D2 j$ o- O                "num_flush": 0,) z& k% k( z9 {) [; Y
                "num_flush_kb": 0,' g+ `! }3 }9 n
                "num_evict": 0,
' S& R/ S. H& W  C% [. F                "num_evict_kb": 0,$ R5 ]8 [# `6 y( r7 i, @
                "num_promote": 0,
# U! R! q8 U% D6 e; |                "num_flush_mode_high": 0,0 s/ J/ {% g  U2 w9 b; p: y& v- e
                "num_flush_mode_low": 0,1 U" k& \7 X/ p5 @# ~6 t
                "num_evict_mode_some": 0,0 p2 v. Z8 M/ C& O* j
                "num_evict_mode_full": 0,- P' }1 x" A3 M4 g: |* G- ?# F( }
                "num_objects_pinned": 0,
1 F% S1 N6 K: B) k                "num_legacy_snapsets": 0,
3 F% R8 e: O/ R& _. u& d                "num_large_omap_objects": 1 #large omap的object数量
! s: e9 X; ?' G8 p( d: H            },: W/ _; h, n' R. L  d, v. u
            ...
! Q9 ^/ M' t+ x4 M' A: E                "agent_state": {}* y- ?% o' m: o2 r7 d- _
}
" b4 d( B  n- A+ K+ i2 I9 W
+ e# `6 _, m1 A" o4 m+ J) C& b5 e; z' O
[root@demo supdev]# ceph osd find 46 #根据OSD id找到对应的主机信息0 w' v2 f0 i% V9 ^" B- E7 |
{0 ^: e( i; U. D1 l5 r* ?& K! d, ^$ F9 {
    "osd": 46,$ W. R$ ^7 a6 G5 i
    "ip": "100.1.1.40:6812/3691515",; p. B' G' l7 `
    "crush_location": {
4 {, h% v& |% K        "host": "TX-100-1-40-sata",
' v9 j* R( p# a% {2 i4 q4 |, ^        "media": "site1-rack2-sata",* z, W0 j0 b. K
        "mediagroup": "site1-sata",3 g1 O* o" a* D, C
        "root": "default"
: Q. m$ N; c* m& X4 {+ Q    }
8 _6 R) C2 g' k0 o! h" @- l, U3 m}) m0 ]& C! g3 l
3 x+ p# y( g7 N

4 e. Q5 L. S4 j! n[root@demo supdev]# zcat /var/log/ceph/ceph-osd.46.log-20181210.gz |grep omap #根据OSD日志找到具体的object名称
9 C( J, f- I, m8 H: ^, e  [6 |. Z2018-12-09 23:03:18.803799 7f90e9b46700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 11:67885262:::sync.error-log.3:head Key count: 2934286 Size (bytes): 657040594 . y  r/ a! G# R3 h6 J4 J' V3 P
#OSD 46上的object名称为sync.error-log.3的omap超出标准
4 ]( I5 R: S1 {: K/ E4 z  }) m0 b7 |( D
4 ?6 Y/ j- a; l2 G1 n- N) G! E2 g* g
* W1 V% l. }- e4 w, j4 \
[root@demo supdev]# rados ls -p cn-bj-test1.rgw.log|grep "sync.error-log.3$" #确定objects存在
" Y& V" }# K, x7 L; `sync.error-log.3" m2 K+ A% |# @. H2 @# E! A5 U

, f0 Z+ J( Q( n4 [' h( S4 K#注意整个multisite的同步过程中的错误日志信息以omap形式存储在sync.error-log.* 9 @; T* L+ N( s: x# _: S5 w
#吐槽一下,错误日志分32个shard存储,代码写死了,而且错误日志目前还只能通过手工清理,无法像其他日志一样自动trim,随着错误日志不断堆积,才引发了今天的问题。0 ~4 ~  X6 O* \" T2 U; I
- ^$ f$ k' s8 b6 F  G
[root@demo supdev]# radosgw-admin sync error list|more#查看错误日志) L+ U. p' T' E
[
/ W1 O# n3 r; P    {. k3 v6 |3 N+ n
        "shard_id": 0,+ d# g5 ~- i" v% ?# }
        "entries": [# q4 l* x! r3 [& J6 K9 G* ^
            {
5 n$ j; r" E5 Q% D                "id": "1_1540890427.972991_36.1",* P7 i3 O, t, l3 U% t  _7 j
                "section": "data",* U9 c5 n. L* O3 Y# m2 m
                "name": "demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1:3",2 v# R7 Y/ l9 J$ q4 {% M& U5 Z$ m
                "timestamp": "2018-10-30 09:07:07.972991Z",
; a# [& A3 d6 Q3 E$ L) K                "info": {9 `9 F# z; m1 o
                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",8 ~: }7 o# m6 A# _; ^; o. r
                    "error_code": 5,0 Q! k$ g1 ~6 W2 W
                    "message": "failed to sync bucket instance: (5) Input/output error"& Q: Y8 x  G; A, h
                }
5 ~" A. L, p; G/ x            },
. A. [- I" T/ W( W......7 [" u/ C3 M& D! o
            {
  }- Z; c9 [& {  l4 z                "id": "1_1543395420.626552_32014.1",( X" V& J/ C# R6 |
                "section": "data",! `3 D$ x9 ~8 p1 L7 ?
                "name": "demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1:0/file1205085",
4 _0 F4 g8 T, X  B9 j  D                "timestamp": "2018-11-28 08:57:00.626552Z",
9 H" r& j+ S1 F, S$ z+ f8 ]) z8 Q- `                "info": {
! E$ j. w8 D5 ^$ K                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
& J- y1 t6 w, {  P6 I                    "error_code": 5,4 d- {: }( I/ l( y+ ]* }! k
                    "message": "failed to sync object(5) Input/output error"7 i# c, L8 g6 {" ]' t
                }
) Q$ \; _8 K: g8 p7 }5 x$ ^( w            }: m4 }1 H. `- k$ }& G
5 f" O" g9 o  {7 @
1 n+ z5 u8 n9 {: ?2 t' k
[root@TX-97-140-6 supdev]# radosgw-admin sync error trim --start-date=2018-11-14 --end-date=2018-11-28 #按日期清理错误日志记录* W: J0 ^/ G8 ]) s9 _' X
复制. W) Y- x3 s' u' D% W
优化定位效率! F8 M- h7 K  ~1 ~( U' ?
简单写了个脚本,先根据warn信息找pool,之后再根据pool找出有large omap objects的pg,凑合用,不保证没bug,在12.2.10下面测试通过。. }" U4 P# }  v

: g2 }+ r5 q9 V8 y$ m[root@demo cephuser]# cat large_obj.py
& r0 L: v" |9 H3 Mimport json+ W( {/ h8 D; m. \! }) i( I/ q3 w, L- d
import rados6 ^; n8 e( a2 m
import rbd
% o2 Q$ w8 H7 E; J) p% Q+ U% a) H
ceph_conf_path = '/etc/ceph/ceph.conf'
7 v* e* g1 T5 x( l! krados_connect_timeout = 5
0 N! E' p: q+ v+ @) f* A6 C$ f! a/ t+ L6 S6 a
class RADOSClient(object):9 @2 l( I3 u$ x0 B1 m0 L
    def __init__(self,driver,pool=None):
6 k9 |# j8 g, b0 Y' A        self.driver = driver1 @4 T; c# ~& E: D! ~* k4 h2 P
        self.client, self.ioctx = driver._connect_to_rados(pool)' t5 f1 {4 Z# b5 i
    def __enter__(self):
. y7 \! O+ V/ r3 u$ }8 A4 \: I        return self+ K  {" ]$ x+ U: j
    def __exit__(self, type_, value, traceback):% s& @0 Q2 `% a) T. W
        self.driver._disconnect_from_rados(self.client, self.ioctx)9 g0 o6 ?& P4 W

. h* {9 R: [: F' Z% Cclass RBDDriver(object):/ `' ~5 W$ w: T' [
    def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):% K, ^) s& g8 ], D
        self.ceph_conf_path = ceph_conf_path6 _, H+ a" T1 R9 }
        self.rados_connect_timeout = rados_connect_timeout4 f4 k8 y) y3 F. j6 o
        self.pool = pool
4 U  ^' Z1 c( F5 R! R2 c6 }: m# Y    def _connect_to_rados(self, pool=None):. ~8 Z7 A8 D/ E5 [* I# o
        client = rados.Rados(conffile=self.ceph_conf_path)4 K- U. `+ E: Z, u
        try:. J- t; m  H  i7 M" P
            if self.rados_connect_timeout >= 0:: V# a. |. c+ t4 [# v
                client.connect(timeout=
' S. E) T1 W- `% ~- |                               self.rados_connect_timeout)
$ a# ]& D1 K9 W            else:
. R1 m5 d: F- _0 J: J5 y- i; G8 y                client.connect()
% p1 c) s2 L- S' v* _. E2 W            if self.pool == None:
0 {# p: A3 r5 [  h+ E0 j+ b; F4 v                                ioctx = None: X+ A) Y8 |; j* b1 l( e
            else:
) L& ], x  [- A! n8 A                                ioctx = client.open_ioctx(self.pool)
8 r) d. v. k* @. p- ?' Z+ n5 z6 f* T            return client, ioctx
3 w0 B& g) g+ v% o( D        except rados.Error:
% W8 }$ j- ^3 S% U( K3 @- B            msg = "Error connecting to ceph cluster."
( s- F6 ~; x, |, |            client.shutdown()
& X9 [( Z; t5 M9 q5 ^            raise msg- H6 u) Y: O* g/ d/ d9 ]
# l! W9 I0 b9 D8 `( N1 q; C5 L
    def _disconnect_from_rados(self, client, ioctx=None):
4 |$ t1 K' u* ?, H& q4 f! `                if ioctx == None:* r) @1 q7 ^5 l
                        client.shutdown()
0 S/ e& U4 b) j& q/ r. f, V! A0 u$ B                else:3 x  @( `" c  z3 K( O- E
                        ioctx.close()
8 c; \) H" |9 J: `7 @4 T) j! C                        client.shutdown()! {+ Q9 ~# e7 i6 |- x, k0 w0 u

( c+ B% S2 ?5 ~8 p7 ^3 A; sclass cmd_manager():1 G' N3 A# B: z% D/ v) g1 C
    def get_large_omap_obj_poolname(self):
$ h; y: m) [- v# s        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:! Z2 ~( {. W3 s7 c! n
                result = ''4 _& H- T# n0 s6 p, q1 x. }
                cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'
7 r8 g7 J  e0 `8 r                result = dr.client.mon_command(cmd,result)
% Y5 s0 g5 A& S* `: z$ I                if result[0] == 0:
  ^8 _+ Q3 O& P                    res_ = json.loads(result[1])
$ e6 m! ~) C* n7 C! q                    if res_["checks"]['LARGE_OMAP_OBJECTS']:* y% m' M8 h8 [& N
                        return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]
4 v, X7 |8 P3 O6 `                else:& a9 o. L3 x; ]+ M4 ^; N! j
                    return False
3 D1 G) o3 Z9 O$ R) Z: R' r    def get_pg_list_by_pool(self,poolname):9 s: p& w- h1 g9 c- z
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:8 O# ^' B+ Q% c( p5 o
                result = '': ?* J" k0 _- n) p
                cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + poolname + '", "format": "json"}'
: e6 V' B$ B0 Y# Z  f$ d                result = dr.client.mon_command(cmd,result)
) [. t5 y$ C( J9 `' Y, `6 N                if result[0] == 0:
! b! N( G; W% e& R- _) a* y$ D7 R4 a                    return json.loads(result[1])
2 v& A: M  Y' W5 g( y% R/ q0 [                else:
. R8 D; o. l6 n& j9 t& Z) h                    return False6 E0 u1 |& R7 `0 h

2 S& S% ?" u; d4 V& Ecmd_ = cmd_manager()5 Q' a) T- F7 T! p; J5 A
poolname =  cmd_.get_large_omap_obj_poolname()
3 p5 v# T) k9 G2 H' t1 W3 |7 y! tprint "Large omap objects poolname = {0}".format(poolname)
1 p" k& q8 r0 L* V, s# t% Zres =  cmd_.get_pg_list_by_pool(poolname)+ o  {5 G  j& h3 {$ _  U1 b
for i in res:8 Q  a1 t, z8 _4 X1 P8 D
    if i["stat_sum"]["num_large_omap_objects"] != 0:
# I- [% Z7 s2 Y* k        print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"],i["acting"],i["stat_sum"]["num_large_omap_objects"])# y" M2 }. c6 r+ l$ n5 l
复制3 ?, m- u* B% v+ c, m9 x
再爆一个雷
" u" H3 x. }) {. X如果你认为通过上面方式清除omap集群就能立马恢复状态,那就太天真,告警信息“HEALTH_WARN 32 large omap objects”依然挂在那里不尴不尬,虽然omap清理了,但是因为对应PG状态没更新,所以告警信息依然存在,只能通过手工或者其他方式去触发PG的状态更新,我这边是通过ceph pg deep-scrub {pg}去触发pg信息更新,注意如果你用scrub是没用,必须deep-scrub,这里又要吐槽官方的逻辑设计,真是WFK!当然你也可以放那里不管,等后台自动deep-scrub也能恢复。' M/ R; U! m+ w" w

1 c2 t/ ^$ p& B6 D
您需要登录后才可以回帖 登录 | 开始注册

本版积分规则

关闭

站长推荐上一条 /4 下一条

如有购买积分卡请联系497906712

QQ|返回首页|Archiver|手机版|小黑屋|易陆发现 点击这里给我发消息

GMT+8, 2023-6-2 07:25 , Processed in 0.054124 second(s), 22 queries .

Powered by LR.LINUX.cloud bbs168x X3.2 Licensed

© 2012-2022 Comsenz Inc.

快速回复 返回顶部 返回列表