将设为首页浏览此站
开启辅助访问 天气与日历 收藏本站联系我们切换到窄版

易陆发现论坛

 找回密码
 开始注册
查看: 82|回复: 2
收起左侧

1 Large omap objects ceph health deatil

[复制链接]
发表于 2022-8-19 17:00:37 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。

您需要 登录 才可以下载或查看,没有帐号?开始注册

x
Large omap objects+ V1 z, P, C& j. ^  H, K; k0 |) M: p
# ceph health detail
$ P8 x0 S; a' C! U! t( b/ `HEALTH_WARN 1 large omap objects( z" d8 r: d1 [. Z
LARGE_OMAP_OBJECTS 1 large omap objects
( U3 V$ c3 h4 `  e, U, H6 w    1 large objects found in pool 'is_recovery' #出现large omap的pool7 X' U5 L5 r/ i
    Search the cluster log for 'Large omap object found' for more details.
6 s" i0 Z3 _2 b9 `
: B. H- b* y8 }* ~/ j. T' ~& q. b) ?1 M: q

. K/ v4 A, g' u3 ~7 M& [6 [1 h) D: K  ~3 e1 R9 d# c) O
ceph pg ls-by-pool  is_recovery|awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x
2 B: e- g2 R* q0 I7 U- \# n/ Zceph pg 11.0 query|grep num_large_omap_objects
+ p' l( q$ v1 P! [  Hceph pg 11.1 query|grep num_large_omap_objects7 T- t- w9 l2 z4 \& b' l  l% B
ceph pg 11.2 query|grep num_large_omap_objects3 g* Z  {! H! c3 k9 {* w

* t. ~5 y, m, P; z& m! d$ C7 G  i9 g- B1 B. x

; C# Z* ^/ o3 Z8 b" A# e% @
2 @6 `5 w2 S; k- c4 r[root@ceph-1 ~]# ceph daemon mds.ceph-1 flush journal
$ \3 o- u6 M. x{
  N! i7 S; N8 v% W+ j; _# u& Y    "message": "",
: }6 S; h8 a# v' O    "return_code": 0
/ j  P$ A- ~& B}
) g! Q+ c! n7 u6 f[root@ceph-1 ~]#
7 B' S: v. r) \  \6 n. ^ [root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal$ u( R+ N  b1 U
"mds_not_active"4 R4 Y$ }  B9 J( W% R
[root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal
1 C1 _5 f5 Q8 a) o8 q; F# z"mds_not_active"% i) k/ n# h; W; P# x. K3 m( [5 b

& p8 b  }# A  D) ~+ D8 Z: ?# _% O# I  b! q6 [7 V

8 H8 v. L+ M  M; z( }0 Y) i0 G/ i- Q4 X4 u0 W8 |

0 G  M0 A1 z/ l2 V, e+ K: a/ S+ B
 楼主| 发表于 2022-8-23 09:53:54 | 显示全部楼层
index pool的 large omap 处理
1 ]* i6 N9 p% `+ o9 g3 m% \0 J, t向单个bucket压测2000W个object,默认设置shard数为16,压测到1800W出现large omap,介绍一下错误定位和如何处理。  A: {" u# s0 r2 x0 ]

8 L. \( E" I0 l- M3 w) L" Y异常定位
1 x# W3 K& M4 w  }' m+ l集群状态如下
  [* [& j+ g/ ?- M& U8 ]; V7 t' W  X% B
0 P. ?& c+ N4 _2 c, A1 X[root@demo123 cephuser]# ceph health detail+ y0 r2 f8 m- X& Y0 i  b# K
HEALTH_WARN 16 large omap objects: x% ]9 W  V! ]! c
LARGE_OMAP_OBJECTS 16 large omap objects+ N* |* [9 l) Q
    16 large objects found in pool 'cn-bj-test2.rgw.buckets.index'
" [/ x0 v$ n* l- `    Search the cluster log for 'Large omap object found' for more details.
6 i2 g$ S3 P4 F7 S; i复制! P4 c5 P# i, J6 {! z# B# S
通过脚本找到对应的pg信息,脚本请查看之前一篇omap large处理的文章。
- H; L* k4 U7 ]2 B* W1 v; ]  h* C% }9 h3 R  e" s' g
[root@demo123 cephuser]# python large_omap.py% j) g9 O) R: z2 D2 A( f$ d6 t9 T
Large omap objects poolname = cn-bj-test2.rgw.buckets.index# Z: j  g+ ~) Q5 Q/ z0 {; Z* g
pgid=13.1f OSDs=[78, 9, 59] num_large_omap_objects=1
* ?6 l2 M9 m# w( d1 Q# E1 U( Npgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1" I" e2 G5 ], r; ]- f+ t  @2 b
pgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=1. c8 D6 F' R. @: w$ u0 L& A
pgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1
8 B2 w& T4 g9 I1 ~# U- {0 jpgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=1
$ ~+ h1 _2 @6 ]' q/ z+ Q8 Z2 opgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1
# n  s; E4 h1 B' Y; p6 }5 {pgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1! G9 J! N6 `8 D7 w! y
pgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=1; _. n) @) d; {; E
pgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1; C# @! N% p$ l  g7 d
pgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1( c. e9 @8 w) N5 w; [2 `' ?" R/ i
pgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1; C& P0 H2 P: i" ^8 Z
pgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
' D( K8 l. |9 L' f) f4 {pgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=1
$ x3 N4 V) M; m9 k/ Wpgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=1  p4 Q: p1 P' H6 U% c; @
pgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=1
& p$ I# @* k( X8 W- O# a" g9 `- _, Ipgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=18 f6 F0 y6 H- v/ F1 L' H2 G
复制1 V! h# i9 @( s+ ^. x7 G8 e
查找OSD日志,确定object名称(".dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11"),发现omap条目数达到了2378492,超过默认告警值
- K/ M$ \# F* ^" g* ]# H0 f
' a# [, E( `" y- T0 u[root@demo123 cephuser]# zcat /var/log/ceph/ceph-osd.19.log-20181231.gz |grep "omap"$ W, ^* q; Y' x( ^- _: O* U' t
2018-12-30 23:00:42.334766 7f6583f44700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 13:87443b2d:::.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11:head Key count: 2378492 Size (bytes): 491722758! N% k& w" w% M3 m0 F0 k
复制7 R1 T7 S+ u2 {$ |) [* j2 y
默认告警值为2000000,2378492>2000000,不建议去修改这个默认值,因为改得过大会加大集群出现异常的风险,属于掩耳盗铃。
% ]' d$ W) T9 _  I. l6 ?9 w' ]  V0 m1 d7 ~9 w- b
[root@demo123 cephuser]# ceph daemon /var/run/ceph/ceph-osd.19.asok config show |grep large
: J  @9 T$ J1 c- V/ Q    "osd_bench_large_size_max_throughput": "104857600",% I1 ]! s; y* {3 ]' h6 `6 i# C3 p
    "osd_deep_scrub_large_omap_object_key_threshold": "2000000",
, m9 _7 r$ H" y5 S. ^    "osd_deep_scrub_large_omap_object_value_sum_threshold": "1073741824",
% Q2 d; R' U! e3 o复制( h) n0 O0 G% o2 i
查看一下发生omap过大的bucket,确定相关信息% |/ O7 G9 Q0 Z" S1 u4 E+ [

; X( P+ G% q* f' t[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo13 }1 Z# i$ B* m$ m1 K
{
4 \! b& t3 [; l5 b" t    "bucket": "demo1",
, s) p- I! N* l3 z: |- I/ X8 j# T    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",: K3 p6 r4 c0 ]4 t
    "placement_rule": "default-placement",' S3 C$ w+ u# ]
    "explicit_placement": {
6 z, y+ w# \/ f4 o9 x$ `, I9 f! j        "data_pool": "",$ s+ s( ?2 _: b! E0 t
        "data_extra_pool": "",8 [! w6 P1 p; z! ^
        "index_pool": ""
7 Q6 K. |* W  {# S- L    },
* F9 l% v& c% u6 L    "id": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1", #当前bucket instance ID,' b/ E! _& `" S+ t
    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",# I7 z; H7 h9 o7 i
    "index_type": "Normal",0 d) F( j4 @- R) I
    "owner": "s3test",5 K) p8 j) ?9 P5 {
    "ver": "0#2638037,1#2637965,2#2632835,3#2632869,4#2632799,5#2632597,6#2633289,7#2633175,8#2637227,9#2637609,10#2637997,11#2632455,12#2631337,13#2631624,14#2631983,15#2632359",5 j% }9 u$ S( d3 J5 M! j' ^" N* _
    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0", #16个shard0 m) I/ G) e& P  C0 b
    "mtime": "2018-11-28 16:47:45.560039",# o3 g" K* Q1 r; s  e7 V' M
    "max_marker": "0#00002638036.2638608.5,1#00002637964.2638536.5,2#00002632834.2649479.5,3#00002632868.2633634.5,4#00002632798.2633370.5,5#00002632596.2633168.5,6#00002633288.2633860.5,7#00002633174.2633747.5,8#00002637226.2637798.5,9#00002637608.2638181.5,10#00002637996.2638569.5,11#00002632454.2633026.5,12#00002631336.2631914.5,13#00002631623.2632195.5,14#00002631982.2632554.5,15#00002632358.2632930.5",
, R* O/ v) R2 p+ l) B    "usage": {2 g6 T1 n3 n" ?% u
        "rgw.main": {
# o, v. s& C4 N0 L            "size": 1975757355553,
& H" k3 X# Z; J' q            "size_actual": 2047893610496,) K' l! q# D: ~1 ?4 J, r' T
            "size_utilized": 1975757355553,
& D- I0 ~0 K5 _7 ]9 Y            "size_kb": 1929450543,
# y  n1 V: b+ w            "size_kb_actual": 1999896104,
5 h, g+ t/ [& C9 n- t  ^" {            "size_kb_utilized": 1929450543,0 s4 h$ P7 }3 F/ d: c4 T! M
            "num_objects": 19998962 #近2000Wobject( }( V0 F- i& H, X( [* q# S+ Z
        }6 F5 l+ I3 h7 F9 o$ F* _
    },) D6 S4 J) _; r3 c- f$ j
    "bucket_quota": {
" O  [# |2 D- ?8 P        "enabled": false,/ C+ t9 O) T6 j
        "check_on_raw": false,. a9 y2 b( Z  P0 Q  O3 Z. i
        "max_size": -1,5 M  ~( c4 l  P* b
        "max_size_kb": 0,
6 Y1 M" y- X" G' c9 `# |        "max_objects": -18 C) C7 |9 Q/ |$ K7 t0 X# O! f
    }+ h+ K7 J  i0 z0 i- I2 U
}0 y4 M* l7 ~& W: M
复制
# W8 A! B1 G+ P, {  D+ ?& C异常处理
. h! ^5 K! Z. n1 p, q8 j通过bucket reshard操作,将原来的bucket 重新划分shard,shard数量从16->64。注意reshard有风险,最好停掉客户端的读写操作以后再进行,同时如果你使用了multisite,请根据官方说明立即关闭Dynamic resharding特性。  \' h* y5 N6 Q  u' \1 f
* r% {3 E8 X5 h$ O2 O
Dynamic resharding 说明: http://docs.ceph.com/docs/mimic/radosgw/dynamicresharding/
5 M: `( L0 X& R7 ]1 B2 w0 P. R  {  h: c
做完reshard需要手工删除之前的索引数据,工具也提示了下面的内容。9 Q/ d( R7 ?; N; W. d0 {6 @) U

+ V; a0 V5 \* m8 f$ l5 ^$ F' ]$ w[root@demo123 cephuser]# radosgw-admin bucket reshard --bucket demo1 --num-shards 64- [5 g* Z( r6 v+ c, \# W2 A" ]
*** NOTICE: operation will not remove old bucket index objects ***& \9 _( d$ {  u% v1 E
***         these will need to be removed manually             ***5 D9 n: F& Y0 u* O0 Q
tenant:9 P4 A: i" z* v1 [
bucket name: demo1
* n+ I4 N& W0 M9 @$ Q! S8 B4 F5 sold bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.34209.1
0 z' c4 Q  {) `5 ]3 N* h5 b! Knew bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.45786.15 z! S. \8 F6 Q  {0 ]
total entries: 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 19998962
, ]3 ^2 i* K( ]; U2019-01-03 11:42:33.741314 7f74d15c6dc0  0 WARNING: RGWReshard::add failed to drop lock on demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 ret=-2
9 X/ @- F# s1 j  M" n0 A复制
; E5 j  U$ [# D. @检查reshard结果8 V* _; k1 D% K4 q. p
9 l* b  r" U0 l
[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo1
5 `3 k- ^3 t; C. X% Z( }{! g* Z8 n$ m, o, w3 K
    "bucket": "demo1",
" B2 V! N" u. ?( g& n1 ?  A    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",
: v# ?1 l5 N6 U9 P2 z    "placement_rule": "default-placement",
, T; M& D, i5 ?4 @7 J    "explicit_placement": {" N, a" o) ]7 h) f2 i6 W; Y
        "data_pool": "",3 M8 l1 b( z! y% s; f- t
        "data_extra_pool": "",
# u% k( |6 ]3 _) V2 y        "index_pool": ""
8 U: K5 n% I: K    },
/ u+ f; c. T5 Q2 ^' n' a4 t$ R9 k    "id": "afd874cd-f976-4007-a77c-be6fca298b71.45786.1", #bucket instance ID发生变化7 {5 L: y1 J  n# E; h$ J4 G
    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",+ t! J& D. Y, q0 D* r7 H
    "index_type": "Normal",
& B3 V: {- B$ V& N    "owner": "s3test",1 v7 G4 F, E, U0 _/ C& [% j
    "ver": "0#4920,1#4920,2#4883,3#4877,4#4882,5#4883,6#4885,7#4880,8#4882,9#4880,10#4878,11#4883,12#4923,13#4883,14#4882,15#4874,16#4878,17#4880,18#4884,19#4881,20#4882,21#4881,22#4876,23#4922,24#4883,25#4887,26#4881,27#4879,28#4879,29#4879,30#4882,31#4884,32#4880,33#4879,34#4917,35#4876,36#4883,37#4885,38#4884,39#4879,40#4883,41#4880,42#4880,43#4882,44#4884,45#4877,46#4879,47#4877,48#4881,49#4880,50#4881,51#4881,52#4883,53#4876,54#4880,55#4884,56#4881,57#4885,58#4882,59#4881,60#4881,61#4881,62#4883,63#4882",#shard 数量变为了64
! B. H' L7 g1 ?6 a' g    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0,16#0,17#0,18#0,19#0,20#0,21#0,22#0,23#0,24#0,25#0,26#0,27#0,28#0,29#0,30#0,31#0,32#0,33#0,34#0,35#0,36#0,37#0,38#0,39#0,40#0,41#0,42#0,43#0,44#0,45#0,46#0,47#0,48#0,49#0,50#0,51#0,52#0,53#0,54#0,55#0,56#0,57#0,58#0,59#0,60#0,61#0,62#0,63#0",
( F" j2 H0 \$ Q! C7 D# Q9 [/ e) @    "mtime": "2019-01-03 11:32:50.349905",
7 H  t6 f% t( Y, v  u8 d    "max_marker": "0#,1#,2#,3#,4#,5#,6#,7#,8#,9#,10#,11#,12#,13#,14#,15#,16#,17#,18#,19#,20#,21#,22#,23#,24#,25#,26#,27#,28#,29#,30#,31#,32#,33#,34#,35#,36#,37#,38#,39#,40#,41#,42#,43#,44#,45#,46#,47#,48#,49#,50#,51#,52#,53#,54#,55#,56#,57#,58#,59#,60#,61#,62#,63#",# P8 @- U( ]4 V# J8 B3 ?
    "usage": {
1 L$ K6 n# i: d5 _+ b6 p2 [# ~2 Q/ c! z        "rgw.main": {- _1 `+ @. ?5 ?) {9 Z9 F! I
            "size": 1975757355553,1 Q! _: z: O; O* s% F( p$ _& A
            "size_actual": 2047893610496,* ^0 H$ W# z2 N9 w/ j% s) @
            "size_utilized": 1975757355553,/ V2 w- X: D, G, D6 J" ~
            "size_kb": 1929450543,
; Q: Z) a0 q* Y  @/ V0 @: A            "size_kb_actual": 1999896104,, R% {: N) C4 D4 X* C. g
            "size_kb_utilized": 1929450543,
, s0 D, r2 I5 G- a* l* @            "num_objects": 19998962
# R. M; q! X3 h6 q' m        }% p6 R0 G; ?& N; h5 Y; q: n1 l6 R
    },
3 G- z, e; Y$ y8 Q6 w3 M: L    "bucket_quota": {3 i7 |8 O# O1 j+ N$ [7 ]
        "enabled": false,$ a4 w1 v6 x2 t9 j; j  n# e2 o$ ]
        "check_on_raw": false,7 O4 \9 F7 t  O. i% c. j: U
        "max_size": -1,
+ s2 {) j; C& m7 C7 u        "max_size_kb": 0,
; [5 [& s9 M! j9 y, _        "max_objects": -18 W* t! B( V* D8 L
    }
1 v# M3 n4 e# _  x: Y0 l}
; C" C) f# P, U2 w复制
# ~. S& s2 H% p# Y0 {2 Z回收旧数据
$ s0 G4 e4 ]# r9 t7 I9 F根据之前工具的提示需要回收index和meta两个pool里面的残留数据
2 j5 G% ~2 t" i$ V) b8 j  @2 R  H/ v$ i) l
回收index pool数据4 ]* b; j1 X) t0 \

# M1 f; e  T1 a[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"
( o' w' y( a; M& {% ~# j.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.5/ s8 A% G% Q$ u/ @5 w4 h
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15+ e' @2 Q4 R  d( O) f
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2
/ v6 H  k$ @" l% I8 g! A3 O.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1" i. w6 U( R8 }8 I( R9 u
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.0
; A* E/ u+ ~" B4 r6 I2 N6 `.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.4' W3 }* Q* }6 w8 J3 ^4 r) |
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11
* n; f  u0 O" N0 M' B.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.13* U0 T& z& G  A/ ]/ F  a: u  }
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.6
% u$ {! C% k8 L" [.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.3
) Z4 R2 s2 F. W( ~! R5 h1 E7 y.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7
& ~! a+ Y" V3 u6 F1 k8 g.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.9: Q& P& G6 T6 e" r/ ~6 t( W
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.14
. T' y5 f6 x6 z8 {) v.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.10( X; X% |' W0 i/ t- n3 W
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.122 ]9 R2 p+ ]1 {6 u& l+ W: k) D
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8* @, }2 }" m0 G" e5 d
复制
$ G- y4 l% g2 a* Y) q4 B使用rados rm命令删除数据$ S) o+ _1 v& n- W! q3 g

) k* Z3 V, U3 M* }, f4 ]; B[root@demo123 supdev]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"|awk '{print "rados rm -p cn-bj-test2.rgw.buckets.index "$1}'|sh -x
+ z$ [+ t/ ]( B( E+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.53 F( p. z; x# m. C# C* [" r
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15- I" k8 l7 q  }- `% G3 {& J3 Q
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2
! D3 C8 n0 X7 Y6 M, X+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1' h- g/ Q, Y: U
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.0
( W; s  s4 d1 H% ?' m+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.4
$ z) q) Q1 v. B* k, H6 j+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11
0 m) u9 W* b1 E+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.13' g: V( h% x+ X* B* C
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.64 H# t- ^" g' @& W
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.3/ O% f1 C" ?2 x, ?/ X, m5 r( k
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7
/ M5 Q4 e4 L, j. T+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.98 f- ^# O0 _/ y
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.14! W0 v% o% M+ q3 ?, E  f
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.10% u5 f3 a9 m7 `, F( G/ ~$ Q3 J
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.12
- x* X$ A, c1 W  u+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8- m5 H8 G6 ^2 D: P% |7 V9 a' H- X$ [
复制
1 W( U/ k5 F  X8 ^回收meta pool的数据6 ^# ^( }; G; f2 Q' i( E0 ]  f
' i  G; l6 |4 `
[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all* C& X/ s* q4 a1 l9 N0 u8 r
root    demo1
$ a8 U5 K3 H2 a6 D( O. Mroot    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.1
$ [" s. [  \: Q4 k& {root    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 #残留
0 Z& A8 z; W( Z# \root    my-new-container_segments
" w/ h( U9 c+ O: ^' \: w  }7 Jroot    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
- d, w1 |, y, G# p; e8 eroot    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1' s6 G2 F8 g( ]4 Y1 X" c
users.uid    s3test.buckets, K% h# E: g+ `5 U9 a/ f5 _/ Q
users.uid    swiftuser% ^6 R6 t* n# x* C& ^- ^( a
users.swift    swiftuser:swiftuser1+ Z( L( T# Q+ b  r
users.keys    SNACA4LX9DS21NGMSRX4
# l# R. d6 B+ d4 Z7 Aroot    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4
7 q1 ?, L1 S2 c+ {: k, ]2 K- g( Zusers.uid    s3test8 [: ~. z( k" h$ r, d
root    demo21 c2 j# B( @. G  L0 V
users.keys    XP8E2452AB6EBU3RPD0C
* s/ m; `% p2 j8 Jroot    my-new-container
+ ^( m9 O6 r) a' B5 b  ?: {users.uid    swiftuser.buckets- V0 a2 ~/ w! v& t# z* G" d
users.uid    synchronization-user* [# `: W: |5 b- w
复制
: @# x* ]- p& z1 c7 x! {注意这里用的ceph L版本,使用了namespace,所以要指定namespace才能删除
/ R8 J  l0 ?. l1 l3 |. i+ T6 U8 Z, U" a% f% O4 W7 ~5 X/ V
[root@demo123 cephuser]# rados rm  -p cn-bj-test2.rgw.meta .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 --namespace=root
& v8 o( Z# `, g. b; O[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all. S' K6 g$ I, H$ a1 I$ w$ j
root    demo1/ {, Q  }" u7 F! J3 |+ Q
root    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.1, k* z+ F- k9 g% \  e
root    my-new-container_segments/ l, E; J! r  p, C* y
root    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
& ]" g9 [7 L2 t7 J4 i5 x' y  w8 G* o7 @root    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1
  [1 O7 e6 o' ?users.uid    s3test.buckets
8 p2 u# L1 `% \; R4 J$ W& jusers.uid    swiftuser
1 D. U5 n% h) I# gusers.swift    swiftuser:swiftuser1
& n( [4 [  a# y) z3 q: Cusers.keys    SNACA4LX9DS21NGMSRX4" y5 l- X) U2 j3 S  n
root    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4! \1 U& p- Q. L9 g  U: ~) t% ?  e9 {
users.uid    s3test
* Q! p1 q- Q3 broot    demo2) V0 k8 S/ d/ S, V- D
users.keys    XP8E2452AB6EBU3RPD0C3 L- t; y* x  g# T3 o6 y
root    my-new-container
0 u7 O5 y% d% v+ M" p9 r7 eusers.uid    swiftuser.buckets
6 o% }6 V8 ~# d5 ?users.uid    synchronization-user# u( \" I6 Z( V) }8 J
复制
/ D6 I- c/ O" T. f清除large omap告警
2 Y) o4 D' k3 _3 b4 p5 O9 O删完了object并不会恢复告警,需要手工对相应的pg进行deep-scrub操作,具体如下
5 I- k3 A- D) }  k
5 {: p8 Y4 h5 K[root@demo123 cephuser]# python large_omap.py- r6 c, q7 N3 j+ s( f
Large omap objects poolname = cn-bj-test2.rgw.buckets.index
" b# F. o9 x# r( q9 s/ ypgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1& s' A9 W. K% A3 {  g3 M. A% D
pgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=17 E5 m% X5 B. W; C) U. V
pgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1# E  `2 I5 ]% b
pgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=19 ?' H) w2 j& @9 k+ A" D
pgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1
+ `6 r! X3 ~* X  U" @* v8 I$ Epgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1" m9 H3 S: M( N. E4 m
pgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=1  r/ Y+ W, D4 O
pgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1
; ^$ h6 u+ O5 m- E& R% w* u/ A$ Rpgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1
2 r; q- l: G% G3 {( \pgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1% O6 n. k: V& a% u$ I+ Q1 ]
pgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
' `  I" n4 P) ppgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=1
+ u$ _& w7 u  J( Apgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=19 w  _. c% v6 H
pgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=1
/ r/ J/ ?) d2 b  n3 p! P* ~" Rpgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=1
' _9 B$ E" d" V[root@demo123 cephuser]# ceph pg deep-scrub 13.33! I1 o' Z! {& y/ g$ R1 O
instructing pg 13.33 on osd.59 to deep-scrub
  ]+ ^1 O. q- L2 {6 T5 b; k复制
+ Y6 Y5 i( _2 {* O操作完可以看到有pg进行dep-scrub,之后状态恢复
2 J. C* b' M( ]& M# d% z" a" S* A( w, K/ D/ o/ u& R8 ~' _, C3 |
[root@demo123 cephuser]# ceph -s: N( B9 g. z. Z
  cluster:4 C- u+ E6 s6 e  \1 K7 w( x0 Z
    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed9
* Y; F1 d/ l8 L0 D    health: HEALTH_WARN
$ |& H5 Z3 h1 a+ B7 O0 b            16 large omap objects' i5 `& z( {+ l; f& X. e3 M
/ C( H& ~; a% |% ^! E7 L
  services:) g0 r) K) W+ k+ Q" @! j! O& P
    mon: 3 daemons, quorum demo122,demo131,demo1410 ^, w7 C. f5 Z! i3 D* ?( Q" U
    mgr: demo141(active); s7 t8 Z$ {7 |+ |- R
    osd: 90 osds: 90 up, 90 in  @( N. l4 P! T7 ~
    rgw: 1 daemon active' v3 {4 k6 B0 P+ E) e8 S& `

: T: h4 F7 T7 J* a& Y1 v  data:
; J# k) |. q8 \- S- O0 y8 |    pools:   7 pools, 3712 pgs
2 y" p0 X# Y( X! v, x# h' O    objects: 20.13M objects, 1.80TiB, G" C1 J5 p4 X, F' e
    usage:   7.28TiB used, 408TiB / 415TiB avail4 y, Q# {2 w9 O5 D9 Z1 v
    pgs:     3711 active+clean+ E# X1 Z7 q+ O* i1 f
             1    active+clean+scrubbing+deep #开始deep scrub, m2 Z+ s) n: O) J

; h3 A. u2 m5 a9 O' a5 j& L) X. p  io:
, K1 x( k4 O: e4 b7 q    client:   5.29MiB/s rd, 935B/s wr, 69op/s rd, 28op/s wr* W4 U6 i* \3 ~8 o( m  n

9 _: p" t3 `1 C# H2 E[root@demo123 cephuser]# ceph -s
2 V* I  k9 J6 F  X9 y( Y1 _* ^9 o# f  J  cluster:
7 c6 h4 m. R8 D. m3 B2 F( n! ]  A% G    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed9. u; u; v) ^# D4 k( r% Y; R5 i
    health: HEALTH_WARN8 [. C9 D+ K6 O. X% S( k
            15 large omap objects #减少了1个
3 ?  G! I) N; g- x  u5 S
0 c/ [7 E" A  D  services:0 H# |; W; q. [( C5 R% g2 H9 N
    mon: 3 daemons, quorum demo122,demo131,demo141' o5 R' y( u: [6 a. @
    mgr: demo141(active): _. X: ?7 L" |6 V2 U; Z
    osd: 90 osds: 90 up, 90 in
6 X. c% I4 d" i$ b5 w    rgw: 1 daemon active
* z, L4 i/ I/ S. C4 M. V' u! [+ W8 S6 U* Q* D/ B6 W
  data:: R( m$ S( m8 i7 V
    pools:   7 pools, 3712 pgs
# u4 s. [8 m6 t, g, s& A; y    objects: 20.13M objects, 1.80TiB
0 k: k# Z, Y8 h, d( K. S3 \- h0 j    usage:   7.28TiB used, 408TiB / 415TiB avail
% e1 w9 Z2 Z* U2 ~$ v: `  R    pgs:     3712 active+clean
& W6 Q3 j; B& ^2 Z6 F
9 x8 C$ K# s$ n  io:
8 G/ B$ U( T- |5 n; p" k' |    client:   5.33MiB/s rd, 680B/s wr, 36op/s rd, 6op/s wr
% ^! C1 n5 p7 }7 N1 C' c' E复制, B2 Q2 \- u+ V- K( u, Y5 a
总结3 j+ B/ S, M% N, N
index pool的omap告警一般就分为两类:  O1 }; r0 p1 k6 {
4 A; ]2 C; N' N, x& A( k) E
一类是object条目数过多,导致对应的index 元数据条目数过多,可以用上面的方法处理。+ P, u: s, \1 S/ b" e4 m
另外一类是bilog过多,这里的方法就不适用了,需要手工进行bilog清理,关于bilog后续会有详细章节介绍。
5 F, x5 b& b  g5 e
( o5 n) m5 o! P5 `& s4 K
 楼主| 发表于 2022-8-23 09:54:43 | 显示全部楼层
线上multisite环境出现HEALTH_WARN 32 large omap objects,已经bucket auto reshard=false,所以排除是bucket index 所在的shard omap过大引发的问题,官方的给出的告警信息无法定位到具体的object,于是有了下面的排错过程# t: y. Q, ^; S# q: ~

) d( Q3 B0 y* S0 h+ ~6 k排查过程/ V7 i- z2 m1 x6 v7 j, h
[root@demo supdev]# ceph health detail/ L( O; D+ m7 i8 s) T$ m2 z& ^
HEALTH_WARN 32 large omap objects, U) }3 b; \, c  X5 e! `9 E
LARGE_OMAP_OBJECTS 32 large omap objects
" t' w9 V; f6 m3 y    32 large objects found in pool 'cn-bj-test1.rgw.log' #出现large omap的pool: ^0 S  T+ s6 }/ ^6 ~# m! ~3 x
    Search the cluster log for 'Large omap object found' for more details.
5 N7 F0 a' M7 u1 }+ k, Z% L# K. i# u9 |, K
* s5 B" ]& H% M( W- z
[root@demo supdev]# ceph pg ls-by-pool cn-bj-test1.rgw.log |awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x& W- N/ c0 r9 a% X$ L; s2 _
ceph pg 11.0 query|grep num_large_omap_objects2 J# t+ ^1 P8 W3 T
ceph pg 11.1 query|grep num_large_omap_objects, z7 L0 z6 N# O9 b5 U
ceph pg 11.2 query|grep num_large_omap_objects8 ?$ W- J; T3 a: O
......
+ w6 ?$ x6 j( E2 j5 \+ ceph pg 11.1e6 query
; H3 t; y4 ]* f- u* I: A, a+ grep num_large_omap_objects
9 P+ Q. n" l+ ?! e5 j( p                "num_large_omap_objects": 1 #有large omap的objcet数量
! ]/ c$ ], P' C1 z" W# V7 g                    "num_large_omap_objects": 0& d( C* a: s4 q* v( ^+ j, [9 ]
                    "num_large_omap_objects": 0
( S0 w+ R" W3 k) b- H3 F/ m% L5 L1 ], o( F  @

! x, ^: p% O3 U4 K( M, V) S! M[root@demo supdev]# ceph pg 11.1e6 query #查询pg详细信息9 b- z$ _- f5 c( a4 v* w2 E( s5 a
{4 G6 l; P5 }+ e; D% {' g
    "state": "active+clean",1 @* I5 }5 b4 I% x. o' o
.....
2 N4 @2 a2 @! p: Q    "info": {
. U5 N; H; [: x, z; e1 ~        "pgid": "11.1e6",% i9 {1 e( ]6 p6 h
        "last_update": "10075'3051746",
2 J* g) v# ^% M6 n" D3 O$ J$ z        "last_complete": "10075'3051746",/ V4 s; p! E; d; _, H2 B& f! B
        "log_tail": "10075'3050200",; }8 Z8 @. S1 ^* w! D1 A0 M
        "last_user_version": 3051746,
, F2 n, Z5 s% o        "last_backfill": "MAX",
) R: c. D" V- o! O+ k' s        "last_backfill_bitwise": 0,8 R5 M7 ~6 R1 D9 a$ }5 l6 w
        "purged_snaps": [],5 G. \* ]7 \& _# c" b6 ]
.....0 o5 {# w$ Y; @
: x, T" A3 t: y# M" `9 H
              "acting": [
- X- Y: v# g0 N1 L; b+ G" \1 x                    46, #主OSD id=46
6 e) u# u: {2 v3 q                    63, #从OSD
/ M! \" k: y' t+ X/ @+ v! C                    23  #从OSD% b2 _5 U- g/ h- A/ f! x% y
                ],
+ Q1 m) Z- i: I2 ~+ Z/ L9 p& [            "stat_sum": {
) w9 f3 W) s" F8 ]                "num_bytes": 40,
6 V" A/ A2 N. U/ R! y( u                "num_objects": 2,
7 Z" E% O2 ^# a2 R; P) G                "num_object_clones": 0,$ T/ U) Y( \  M+ I6 M
                "num_object_copies": 6,
8 F; W! J9 k! \5 N                "num_objects_missing_on_primary": 0,$ l2 p& Y6 c& ^0 E4 r9 T& l
                "num_objects_missing": 0," }& p+ t' t4 C
                "num_objects_degraded": 0,
/ f; w( }4 \* e( U% u8 a                "num_objects_misplaced": 0,5 T/ w1 M, D  m/ ]: e& V  M
                "num_objects_unfound": 0,
4 I  D( Q9 ^' S4 }! j3 M                "num_objects_dirty": 2,
/ F% D/ m9 D0 c* {  L/ v( ~1 A                "num_whiteouts": 0,
# _. `7 W, p- N  x& M                "num_read": 3055759,
; ~: z5 `) e6 i% F: W- Q7 c                "num_read_kb": 3056162,: \4 P2 e2 I# `, `/ Q. n3 @! w0 |
                "num_write": 5986011,
- z$ p, t7 K4 \6 M0 N                "num_write_kb": 53,
# i6 d8 L2 J/ Y% p( U) j& o                "num_scrub_errors": 0,
3 f' ]' m6 [3 X  Y2 w, I7 j                "num_shallow_scrub_errors": 0,) I* {3 t. L8 W# z: E" `3 o* u1 g
                "num_deep_scrub_errors": 0,
. Z5 }6 ?+ }* |4 v% h0 f                "num_objects_recovered": 0,
  D- c4 j! d7 D                "num_bytes_recovered": 0,
9 E1 K) M7 u% U/ H& I, A6 p5 [4 |                "num_keys_recovered": 0,3 l: A: \! U" @
                "num_objects_omap": 1,
; z3 c" A2 C7 E; Y0 |0 Y                "num_objects_hit_set_archive": 0,
" M0 d0 u3 L; w  r0 {7 q                "num_bytes_hit_set_archive": 0,# y7 Q$ _% |6 e+ h$ I
                "num_flush": 0,
. s* R- K% @* J' c% \+ F                "num_flush_kb": 0,
( |) E' E, Q3 h& ~1 N& O- w1 ?* b" f                "num_evict": 0,$ D/ s. V4 p6 T
                "num_evict_kb": 0,7 o5 O  @0 ]' R
                "num_promote": 0,
( ^& P% k2 E% f! ~/ J3 w7 O% \7 ?                "num_flush_mode_high": 0,
, I' P% ?! |/ r8 D" c4 Q                "num_flush_mode_low": 0,
. O1 e+ }$ s$ f8 f1 D, s                "num_evict_mode_some": 0,0 W. D3 P: ~3 i
                "num_evict_mode_full": 0,0 \- \# a" q8 u" }) Y
                "num_objects_pinned": 0,8 T6 E+ ~  ]3 Q
                "num_legacy_snapsets": 0,5 k& a6 H! P7 C# |
                "num_large_omap_objects": 1 #large omap的object数量
$ e  e+ V- u& @/ C  e5 o: C; W) I            },
6 i* C: {' D; J# \& X9 i7 d            ...3 h% y9 ?- p% ^% Q" V  p
                "agent_state": {}
! F$ j: K% m+ ?3 z}
! B) r9 m% @8 B3 M. R! w8 d+ k8 b2 j0 ?. O

. l0 U3 W) \4 f; `. W! W' R1 B: i" Y[root@demo supdev]# ceph osd find 46 #根据OSD id找到对应的主机信息' l. p+ b1 y& W$ @
{- P% y, ]+ D. h# ~& U7 ?
    "osd": 46,- j; \: G- \4 Y
    "ip": "100.1.1.40:6812/3691515",
' `; [0 C" V+ ^    "crush_location": {
8 o4 \$ X3 N( D: b& w        "host": "TX-100-1-40-sata",+ d  |, Q( F; F' v5 ^  Q9 ^& o
        "media": "site1-rack2-sata",# z2 I6 a) [* n' h
        "mediagroup": "site1-sata",; f) l" }! g" O8 O4 @
        "root": "default"
( L, }+ q8 z1 U- J" [    }3 T$ M1 R: h  k3 v+ Z. m" B
}
; R1 O) O- r% @( e/ H( }- k1 b- f$ r/ ?, l' u) V$ I' P3 B

% U% V0 o- N2 q" ]- s. ^% r[root@demo supdev]# zcat /var/log/ceph/ceph-osd.46.log-20181210.gz |grep omap #根据OSD日志找到具体的object名称. ?" S% Y2 f9 @* w9 `( Z
2018-12-09 23:03:18.803799 7f90e9b46700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 11:67885262:::sync.error-log.3:head Key count: 2934286 Size (bytes): 657040594 2 f" k5 }& ]; K
#OSD 46上的object名称为sync.error-log.3的omap超出标准" e/ [8 z" q! o1 c

: H. t" ^, p# ~3 b7 S" T# c6 f* N; L0 f* x& n/ M2 W4 R

2 }. I, E% u5 z) ]+ h5 W+ f9 u[root@demo supdev]# rados ls -p cn-bj-test1.rgw.log|grep "sync.error-log.3$" #确定objects存在1 R+ {0 a2 m. D( o# U; w6 a
sync.error-log.3
3 Y! X; m! s$ v) |
/ }8 P. x' x0 u, J' g#注意整个multisite的同步过程中的错误日志信息以omap形式存储在sync.error-log.* 6 q- p# q6 B9 z; E: @! y3 T$ i& K" J
#吐槽一下,错误日志分32个shard存储,代码写死了,而且错误日志目前还只能通过手工清理,无法像其他日志一样自动trim,随着错误日志不断堆积,才引发了今天的问题。, M- S$ V) h2 l+ g2 ?

: ], u! s8 L# {# V: C9 N2 Z4 B5 E[root@demo supdev]# radosgw-admin sync error list|more#查看错误日志$ T/ t* e% @/ _3 m8 D
[, w! p& c1 [$ O! o( y. f
    {. w; ?: B4 [  K2 Y
        "shard_id": 0,! n/ c3 G7 _: P, `; S. z
        "entries": [
4 d6 q& x$ H8 e            {2 b0 ]/ m+ K/ a3 Z* A8 i0 x
                "id": "1_1540890427.972991_36.1",
' S6 m8 W/ j7 D" v2 c                "section": "data",. f( r' x+ [9 q/ r7 x
                "name": "demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1:3",
: K$ }1 {2 L! r7 K                "timestamp": "2018-10-30 09:07:07.972991Z",' f( b4 a- F* U4 R
                "info": {
4 @" f5 `* u5 E8 }                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",. [' O: H: M3 {# t3 v
                    "error_code": 5,9 m! \2 e8 C( H* J0 \
                    "message": "failed to sync bucket instance: (5) Input/output error"" q  G! d, o  f) ^8 _
                }
( b) ?. ?1 x( e: w1 K- u            },
3 V) y" s# m* C  J4 e, }......% E$ d9 u8 j; S7 V* H' h- h
            {" ^6 h' m5 D+ x) e9 Q. N
                "id": "1_1543395420.626552_32014.1",
, n0 N3 x" o+ [9 `$ W; N                "section": "data",
' [7 V# R; F- T& Y( U) J2 w" D                "name": "demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1:0/file1205085",+ Q+ ]( C& S8 N1 Y" C+ G. n; O
                "timestamp": "2018-11-28 08:57:00.626552Z",
# o7 ^8 {6 D9 `3 z                "info": {; v- T1 v: ]2 Z" g" s% i
                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
" r# G5 t' n3 w, k                    "error_code": 5,
2 s6 H3 S: b9 f: R                    "message": "failed to sync object(5) Input/output error". R5 o' C& K2 v& @. ?! N
                }- u& L9 ?3 z4 p1 y2 R# {" O* L$ M
            }
  o" ~& p' {4 o9 i4 [
6 t' O6 `. M5 V  j9 s5 L
9 E! V: @# v; t, |/ A[root@TX-97-140-6 supdev]# radosgw-admin sync error trim --start-date=2018-11-14 --end-date=2018-11-28 #按日期清理错误日志记录
$ _8 d4 n* b2 w6 W4 S- J复制3 P) ]: v* [( A# i3 h7 u. @
优化定位效率2 m3 Q1 h) A2 U8 `
简单写了个脚本,先根据warn信息找pool,之后再根据pool找出有large omap objects的pg,凑合用,不保证没bug,在12.2.10下面测试通过。
$ H2 I' A0 N; O! [% u
7 H- S, f. S9 r; e& B) a[root@demo cephuser]# cat large_obj.py- {, L6 P( \0 j; B" w) }+ s! D
import json
1 y& Q  B" Z* b/ H6 Qimport rados! t) y. n) [* @) y' l5 `  l# D6 O9 m
import rbd, ~7 i& n/ M, U8 w6 I6 w

/ g4 a& H. m6 _ceph_conf_path = '/etc/ceph/ceph.conf'4 N" Y5 z. @1 D* s& ^3 @
rados_connect_timeout = 5
' C# ~, V* C9 k) O2 d2 N% r
" X# K/ b" l8 E( R, y4 v7 ]class RADOSClient(object):% r6 e0 D: x* q. t
    def __init__(self,driver,pool=None):
2 {( O' z0 Q  ~1 c3 S, ]1 Z        self.driver = driver, `& V# o4 G4 S7 x9 b
        self.client, self.ioctx = driver._connect_to_rados(pool)
2 @. }8 K5 V+ x9 \) i6 y8 H2 S7 h4 R: l    def __enter__(self):) G* a7 M5 x* n
        return self
0 U8 L4 N! [- c7 E) b/ N    def __exit__(self, type_, value, traceback):" I. j2 ~" Z& @7 T4 I+ w
        self.driver._disconnect_from_rados(self.client, self.ioctx)$ ^* C- c# Q" Y* V% k( q! |1 R; c$ p
9 ~5 ^* t1 n8 G+ Q/ E+ X. q) @- C( E+ C
class RBDDriver(object):
4 U, I- a$ u( |    def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):
* u1 B9 }5 [7 g. ?7 A7 g        self.ceph_conf_path = ceph_conf_path( L; ~6 S* @; L2 V/ I5 e) e; }# h; @
        self.rados_connect_timeout = rados_connect_timeout
3 ~8 S  r, e5 m9 Z8 h        self.pool = pool$ y+ P2 Z/ {) a2 A
    def _connect_to_rados(self, pool=None):
  h) b9 L$ y' U6 a" f9 ~3 a        client = rados.Rados(conffile=self.ceph_conf_path)
$ C% A& a% t! W, K/ g: R/ L        try:8 K% G' k8 [& j
            if self.rados_connect_timeout >= 0:+ V( ?/ f# J+ p8 X7 C3 v$ K
                client.connect(timeout=# Q- h, H1 v' e9 X
                               self.rados_connect_timeout)9 w4 z) u; h$ |7 k* m& Z
            else:; W4 E1 V$ D: I: O7 j9 l
                client.connect()
' [4 s/ d- }( G4 F            if self.pool == None:: y) e: \1 t; B: B7 x: j
                                ioctx = None2 Z7 ?6 J8 `) r! y. o
            else:8 O8 |: y7 d0 \+ \5 l# [+ H! k. v
                                ioctx = client.open_ioctx(self.pool)8 K/ s5 i2 S) M! X/ H1 P, {
            return client, ioctx
: `7 i( y" ?" R. f. R        except rados.Error:. N7 u% [+ |. e* O8 e& ~8 Y
            msg = "Error connecting to ceph cluster."- N5 [! p2 _. y7 C" w
            client.shutdown(). `+ y. A3 U- \
            raise msg
/ m+ L+ D, V+ a, B" o6 N9 `) [. R- r* s0 i
    def _disconnect_from_rados(self, client, ioctx=None):
# l! z1 Y, k6 q# {) R  O. G                if ioctx == None:0 O  R4 o' q& M0 Y. s- f
                        client.shutdown()
$ [% M/ t' n6 c% c                else:  S5 M2 n5 P, B3 |
                        ioctx.close()
# Q( m) ?9 w$ r, f, c3 t  n& _6 t                        client.shutdown(), a5 X8 M6 Q$ ^7 E! {% \

, E0 ?- @! _7 f+ N3 Mclass cmd_manager():
+ Y; |0 S* K; Q: l& c    def get_large_omap_obj_poolname(self):4 u& ^& F9 n: r" T( K" y
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:  ^4 R: C# M" P% |4 r- z
                result = ''
5 X* l, B$ K* W6 q8 U                cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'
9 o3 V5 p0 t; {3 a5 T0 B                result = dr.client.mon_command(cmd,result)
+ p' K4 M/ p) W: `! @: s: C                if result[0] == 0:1 j' r# C! G4 c$ s2 X& e- w
                    res_ = json.loads(result[1])% Z& h: h! k& k1 \8 b* T- R
                    if res_["checks"]['LARGE_OMAP_OBJECTS']:
% p: y$ t1 ~1 S3 O                        return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]
( l; i7 v1 c! R                else:2 ^3 N2 Q6 y7 j9 P
                    return False' E7 E/ Y$ s  g; k: T# S
    def get_pg_list_by_pool(self,poolname):
8 C$ k! H  P. ]! s9 C        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
/ U" R! H6 I. S                result = ''
6 |" Q9 }) R& h( n& v: h. r                cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + poolname + '", "format": "json"}'0 A; n0 y7 |- U6 V5 P
                result = dr.client.mon_command(cmd,result)
/ J5 a! |% U6 N& g: a  ~                if result[0] == 0:
5 l+ x1 x: y( D/ `                    return json.loads(result[1])6 x5 Y" P& M+ z) ^
                else:
# p+ q- K4 R- j. C                    return False
) U5 _1 P  B2 H, x. R9 N! W
; E/ l! R0 y# Q: i- g* ~- Scmd_ = cmd_manager()7 q/ G) Z+ M# t
poolname =  cmd_.get_large_omap_obj_poolname()  k$ J% q8 q4 W  v8 V9 V
print "Large omap objects poolname = {0}".format(poolname)( W  N( I3 L; m
res =  cmd_.get_pg_list_by_pool(poolname)- y( D9 i! |; q/ `3 p
for i in res:
( h  H# x3 V1 q& L; A    if i["stat_sum"]["num_large_omap_objects"] != 0:
( Y& Z; G7 n* f) V0 p& s; i        print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"],i["acting"],i["stat_sum"]["num_large_omap_objects"])5 Q9 p5 M3 s, a) [( E/ R
复制
7 H( f: M2 {& `, V2 g/ m' G再爆一个雷
2 P/ W  h# V+ ^5 Z, `如果你认为通过上面方式清除omap集群就能立马恢复状态,那就太天真,告警信息“HEALTH_WARN 32 large omap objects”依然挂在那里不尴不尬,虽然omap清理了,但是因为对应PG状态没更新,所以告警信息依然存在,只能通过手工或者其他方式去触发PG的状态更新,我这边是通过ceph pg deep-scrub {pg}去触发pg信息更新,注意如果你用scrub是没用,必须deep-scrub,这里又要吐槽官方的逻辑设计,真是WFK!当然你也可以放那里不管,等后台自动deep-scrub也能恢复。5 [! `$ w9 O% N

4 I& N" \, T5 X3 M3 t; F
您需要登录后才可以回帖 登录 | 开始注册

本版积分规则

关闭

站长推荐上一条 /4 下一条

如有购买积分卡请联系497906712

QQ|返回首页|Archiver|手机版|小黑屋|易陆发现 点击这里给我发消息

GMT+8, 2022-10-4 06:05 , Processed in 0.047430 second(s), 22 queries .

Powered by LR.LINUX.cloud bbs168x X3.2 Licensed

© 2012-2022 Comsenz Inc.

快速回复 返回顶部 返回列表