找回密码
 注册
查看: 596|回复: 2

1 Large omap objects ceph health deatil

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-8-19 17:00:37 | 显示全部楼层 |阅读模式
Large omap objects
8 g/ c- S, j0 z# ceph health detail; F) T3 L( @6 s+ P0 I
HEALTH_WARN 1 large omap objects
5 L! ~  P' k" K- R8 YLARGE_OMAP_OBJECTS 1 large omap objects
% T) z# E4 t$ k0 e% U) k    1 large objects found in pool 'is_recovery' #出现large omap的pool' A# I5 j, V; P8 z$ x  j) ?
    Search the cluster log for 'Large omap object found' for more details.5 \" R0 N$ J$ k- q& N+ ^4 p
- K0 i+ b/ ^% M. s0 k" D/ x

: B1 q2 H2 r" `/ c4 g2 j
) g2 H% @7 p) p7 o1 ~" R6 K& h
! C3 }1 A8 J0 E! B  m/ yceph pg ls-by-pool  is_recovery|awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x
) B) X9 M4 }0 H  G) I0 G: x4 ?ceph pg 11.0 query|grep num_large_omap_objects 0 ^7 j- G6 r' s
ceph pg 11.1 query|grep num_large_omap_objects8 o1 P& Z, q( z! T
ceph pg 11.2 query|grep num_large_omap_objects
2 |6 M4 y: g6 L$ }# N" T% C- ]
9 o% Q! \' [6 o2 h' R) S3 c! k6 t2 `2 J1 a% k1 |; p
$ S  R. ^+ g! K' ^1 l' q
1 N! p+ V3 p- E  \* m8 x7 I" g* r
[root@ceph-1 ~]# ceph daemon mds.ceph-1 flush journal$ j/ c  F! Z5 ?. T
{
' m7 Z2 I& f; t: y1 p/ s    "message": "",
7 ]# x: X0 N7 f1 A2 `/ ?  j    "return_code": 0
: X5 \7 G) t, }/ Y$ b. _}
: s3 m. h  k; F3 ?4 r[root@ceph-1 ~]#
) W* a7 R$ s4 }+ [  N( S4 @/ v; m7 H [root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal
' U5 G' b& v0 M" d"mds_not_active"
2 t9 }- d# R3 P, p, R: I[root@ceph-2 ~]# ceph daemon mds.ceph-2 flush journal0 y  q& f7 ~* ]' m7 V
"mds_not_active"# Z: l) L: i! G/ G7 e% R2 o

' M5 q$ C: a5 v( ?% r( w0 F+ ^7 F6 K0 |- o- c9 w. e: @' n) a

( s6 f* E, n0 ?8 x3 k# a3 v7 r
: I6 w) b5 t& a: n0 I# \: a* L6 _/ d* I4 k" v6 B' `! d! }

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-8-23 09:53:54 | 显示全部楼层
index pool的 large omap 处理
3 I" `8 T2 T" c: r向单个bucket压测2000W个object,默认设置shard数为16,压测到1800W出现large omap,介绍一下错误定位和如何处理。
& ?1 ]( P$ b8 D0 x6 h/ ?7 y
% l! E4 I# i7 F7 z5 y异常定位) }* T; W: [( f, t& H
集群状态如下
. k4 q( y( J, {( X, M) d1 T# O# @) G: S7 W
[root@demo123 cephuser]# ceph health detail
% ]3 P! _$ B. p! t# l- pHEALTH_WARN 16 large omap objects
1 T. B+ n% `# pLARGE_OMAP_OBJECTS 16 large omap objects
7 o6 Y; s; m5 i$ M/ G$ H6 v    16 large objects found in pool 'cn-bj-test2.rgw.buckets.index'2 d  @! v! L5 i
    Search the cluster log for 'Large omap object found' for more details.
4 R. `3 g' G6 ]' ]! j复制
  Q9 D  y) \1 i8 t1 f- `9 W1 J通过脚本找到对应的pg信息,脚本请查看之前一篇omap large处理的文章。& L8 X) U) J$ a5 C0 k! m! @
9 `# B! Y/ {- E# k1 Y/ a
[root@demo123 cephuser]# python large_omap.py( Z) T  ]! X; e& X6 S
Large omap objects poolname = cn-bj-test2.rgw.buckets.index2 X9 b0 ^) J) `* D" B
pgid=13.1f OSDs=[78, 9, 59] num_large_omap_objects=1$ o3 R  }. f8 E2 `1 b$ n0 S! ?" Y( f; V
pgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1/ j* V/ w  O" d/ k7 Q5 f
pgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=1
# u0 q8 y5 k. L% C* ?pgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1
" J0 Q4 O8 p6 o- E* Dpgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=1
" h4 Z2 a; \! T" }pgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1
: |4 B. R. l2 P2 s/ {pgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1
# W% I( `: s1 Xpgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=1
1 N. S3 Q; J9 X' o" Z9 Upgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1
# L7 E2 b. {1 Q- P% ]1 ]* n: j4 A- Ppgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1
) x, [8 O- I8 t8 o- Spgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1. N& m; F3 U3 \/ {. T' Z- w
pgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
( I% s. ^/ U/ n# `( tpgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=12 I3 \* D& I; [3 a/ ~
pgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=1
. x7 i: G8 M9 [" O; p$ @pgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=16 L; V! `* n& E
pgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=1
( y5 {' \( }, g' {; ~复制
) p0 @7 N) h8 N% U0 z查找OSD日志,确定object名称(".dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11"),发现omap条目数达到了2378492,超过默认告警值
$ x: z! K; T/ [1 h9 z
7 u0 u* o- O7 x7 v  M[root@demo123 cephuser]# zcat /var/log/ceph/ceph-osd.19.log-20181231.gz |grep "omap"  a+ K: Y6 L' u# J/ r& Q; u
2018-12-30 23:00:42.334766 7f6583f44700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 13:87443b2d:::.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11:head Key count: 2378492 Size (bytes): 491722758! g- |. C6 t* _$ A; a1 @
复制, {! n7 ?. P5 b3 ?+ N% r
默认告警值为2000000,2378492>2000000,不建议去修改这个默认值,因为改得过大会加大集群出现异常的风险,属于掩耳盗铃。
) i5 G# x) {6 K3 X* h8 o- F: X( M. b
" D3 a3 w. V" Z[root@demo123 cephuser]# ceph daemon /var/run/ceph/ceph-osd.19.asok config show |grep large
/ P1 T  g; r# |7 ~7 ]' k    "osd_bench_large_size_max_throughput": "104857600",* @2 n) `& {2 ^* s  {8 ~+ I$ |( V
    "osd_deep_scrub_large_omap_object_key_threshold": "2000000"," S9 n7 k4 W9 `* A  w
    "osd_deep_scrub_large_omap_object_value_sum_threshold": "1073741824",/ P, Q- v/ p$ N* g
复制
9 H: E. y- t. f查看一下发生omap过大的bucket,确定相关信息
. u# j5 L' Y9 }4 Q$ M! @4 H* M5 [- s2 n9 M! M$ U
[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo1( V; i8 C. b$ n0 _
{, k, S7 @( d! s5 a8 x& E
    "bucket": "demo1",
* i0 r3 F  U: M, l    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",
: F1 C- }/ ^9 X& e, |/ R. ^    "placement_rule": "default-placement",( ]) ]$ V2 l: j5 z2 p# {
    "explicit_placement": {
2 m. N; Z! E5 T2 v/ s        "data_pool": "",
0 N* o3 L' S1 C% a% a) ~& t        "data_extra_pool": "",5 R' o8 n* G, S  V  r, V' ?8 s
        "index_pool": ""
8 I! N6 B) l7 j, F    },5 ^" W+ E6 F& c% d4 ]! [3 u0 l
    "id": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1", #当前bucket instance ID,
" v- j2 r7 P- R1 [: S5 M1 |0 b& g    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",2 f  I: ]9 D) q2 \1 j
    "index_type": "Normal",% F  V  P0 y" V
    "owner": "s3test",7 Q# e4 m7 d0 n% f
    "ver": "0#2638037,1#2637965,2#2632835,3#2632869,4#2632799,5#2632597,6#2633289,7#2633175,8#2637227,9#2637609,10#2637997,11#2632455,12#2631337,13#2631624,14#2631983,15#2632359",
0 y4 u) Q* F3 ~# s4 b. j3 ^7 V    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0", #16个shard
% f( M; x) _/ {* T    "mtime": "2018-11-28 16:47:45.560039",0 b) l$ g7 j7 j
    "max_marker": "0#00002638036.2638608.5,1#00002637964.2638536.5,2#00002632834.2649479.5,3#00002632868.2633634.5,4#00002632798.2633370.5,5#00002632596.2633168.5,6#00002633288.2633860.5,7#00002633174.2633747.5,8#00002637226.2637798.5,9#00002637608.2638181.5,10#00002637996.2638569.5,11#00002632454.2633026.5,12#00002631336.2631914.5,13#00002631623.2632195.5,14#00002631982.2632554.5,15#00002632358.2632930.5",0 X* S& T8 U: x3 i, |
    "usage": {# L* M5 d) r7 p: M
        "rgw.main": {
5 A) t2 G0 X: k: R( C            "size": 1975757355553,
3 Q/ v$ Y/ ~# g  G: I* G8 \! J9 m            "size_actual": 2047893610496,* V& ]+ A' X6 }  L! a3 Y; f( p& A
            "size_utilized": 1975757355553,+ s: f0 A  Y0 [- J5 ^
            "size_kb": 1929450543,
, u9 B# r4 H: b8 \# n  L$ Y- ?7 f            "size_kb_actual": 1999896104,4 Z1 `  P, m0 \' y
            "size_kb_utilized": 1929450543,
  s" A+ R" Z% t: w, [            "num_objects": 19998962 #近2000Wobject2 T% I  E3 [, a. w( B0 w
        }
, a; [. \5 u3 l! Q# L    },9 d) H3 G4 L) B
    "bucket_quota": {
5 |% f1 `0 ], C. ]0 u        "enabled": false,
! @6 r  z) k1 G" j4 E' }! s        "check_on_raw": false,
$ k) c9 {0 ?8 d. U3 W0 a! H        "max_size": -1,
  o  a6 @. K# g2 O# ~; z        "max_size_kb": 0,
7 C) p/ f3 X1 l* T        "max_objects": -1
8 {5 Q9 Y; q2 n+ P3 g7 \; V    }* |2 R' t* P7 |1 D
}
4 l. q; p. B* m1 `5 W; I( M复制0 S* M, K4 y& t0 p! M
异常处理
# P. E/ U; r2 {1 B  l9 t& u通过bucket reshard操作,将原来的bucket 重新划分shard,shard数量从16->64。注意reshard有风险,最好停掉客户端的读写操作以后再进行,同时如果你使用了multisite,请根据官方说明立即关闭Dynamic resharding特性。
+ Z- s  y7 x8 x6 `
4 f6 Y3 ^. |7 G, y  k+ L  ]Dynamic resharding 说明: http://docs.ceph.com/docs/mimic/radosgw/dynamicresharding/1 i3 V6 [7 e, s4 [% b

  ~1 ~3 ]% i9 L5 P- K做完reshard需要手工删除之前的索引数据,工具也提示了下面的内容。
$ A1 U5 r" [/ s, U1 d! A; }8 s6 R7 \7 b4 d* ~- s7 ?2 l/ _
[root@demo123 cephuser]# radosgw-admin bucket reshard --bucket demo1 --num-shards 64
: a6 w4 e5 ]$ A/ W9 g' l( u1 X5 b*** NOTICE: operation will not remove old bucket index objects ***
9 }% A8 X  _. h+ k4 ]( b6 g: g***         these will need to be removed manually             ***4 C% G6 Q: S/ |
tenant:  r/ g. k: F& ]
bucket name: demo12 m$ Q) Y. g$ u. Z) B
old bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.34209.1, J* }/ ?" r* {' g% K# F" J
new bucket instance id: afd874cd-f976-4007-a77c-be6fca298b71.45786.1
0 m( n! F+ h6 h# ^- Ktotal entries: 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 19998962
* v  O  r* d0 m) q. l: Z2019-01-03 11:42:33.741314 7f74d15c6dc0  0 WARNING: RGWReshard::add failed to drop lock on demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 ret=-2# p4 w: f2 W5 ^  j. @
复制
; _# t2 k" O1 d$ U! k: _# M5 o检查reshard结果, S; a$ z4 M# t0 i! H: V) c

2 f4 x5 _+ A1 b* c( q[root@demo123 cephuser]# radosgw-admin bucket stats --bucket=demo1( D9 n- g, B9 C( O% Z* z
{
- D0 J) r! ~$ R$ N+ {, O    "bucket": "demo1",$ ]% S5 f3 V2 j5 M0 p- m& J8 {
    "zonegroup": "68f1dcf5-0470-4a48-8cd2-51c837a2cafb",
4 |, w4 |& E! ~" v2 v1 L! ^    "placement_rule": "default-placement",- ^, G- T: y9 Z) t9 c
    "explicit_placement": {6 \  f+ Q9 x% D+ d0 b; A* p; F
        "data_pool": "",
- c* t" v$ ]1 b1 g4 `. b* X        "data_extra_pool": "",' P$ V1 c7 P2 @
        "index_pool": ""# d  ~# k" }+ H* h; i
    },
* L, F/ ^9 j  P* E/ F$ ~2 J    "id": "afd874cd-f976-4007-a77c-be6fca298b71.45786.1", #bucket instance ID发生变化
. C, h) G* w* k/ h    "marker": "afd874cd-f976-4007-a77c-be6fca298b71.34209.1",
- e# i4 X; _5 }8 ]    "index_type": "Normal",
1 G* g6 ?- i; N' f0 W6 n2 j    "owner": "s3test",0 K" T& m) N$ `/ C5 t
    "ver": "0#4920,1#4920,2#4883,3#4877,4#4882,5#4883,6#4885,7#4880,8#4882,9#4880,10#4878,11#4883,12#4923,13#4883,14#4882,15#4874,16#4878,17#4880,18#4884,19#4881,20#4882,21#4881,22#4876,23#4922,24#4883,25#4887,26#4881,27#4879,28#4879,29#4879,30#4882,31#4884,32#4880,33#4879,34#4917,35#4876,36#4883,37#4885,38#4884,39#4879,40#4883,41#4880,42#4880,43#4882,44#4884,45#4877,46#4879,47#4877,48#4881,49#4880,50#4881,51#4881,52#4883,53#4876,54#4880,55#4884,56#4881,57#4885,58#4882,59#4881,60#4881,61#4881,62#4883,63#4882",#shard 数量变为了64
! m; X5 a6 z, z: U3 ^4 m    "master_ver": "0#0,1#0,2#0,3#0,4#0,5#0,6#0,7#0,8#0,9#0,10#0,11#0,12#0,13#0,14#0,15#0,16#0,17#0,18#0,19#0,20#0,21#0,22#0,23#0,24#0,25#0,26#0,27#0,28#0,29#0,30#0,31#0,32#0,33#0,34#0,35#0,36#0,37#0,38#0,39#0,40#0,41#0,42#0,43#0,44#0,45#0,46#0,47#0,48#0,49#0,50#0,51#0,52#0,53#0,54#0,55#0,56#0,57#0,58#0,59#0,60#0,61#0,62#0,63#0",
3 `9 F0 X7 R: L  [- ]$ }5 G    "mtime": "2019-01-03 11:32:50.349905",
6 r5 h% T+ f8 Q1 z2 m    "max_marker": "0#,1#,2#,3#,4#,5#,6#,7#,8#,9#,10#,11#,12#,13#,14#,15#,16#,17#,18#,19#,20#,21#,22#,23#,24#,25#,26#,27#,28#,29#,30#,31#,32#,33#,34#,35#,36#,37#,38#,39#,40#,41#,42#,43#,44#,45#,46#,47#,48#,49#,50#,51#,52#,53#,54#,55#,56#,57#,58#,59#,60#,61#,62#,63#",5 m& X4 A& P' M2 y% M2 N
    "usage": {
0 m$ k2 p& U8 P        "rgw.main": {' _" N# a0 b6 n+ q! b
            "size": 1975757355553,1 P6 S9 V: w5 x; a8 W
            "size_actual": 2047893610496,
! q# Z8 X( Q6 d5 c9 `! Y, z            "size_utilized": 1975757355553,
9 b) U: z# x( |; l; U& b6 \1 a            "size_kb": 1929450543,6 h4 O. @0 R  N0 H0 i; r
            "size_kb_actual": 1999896104,
/ i& B; i7 ]) h6 M: |, ?            "size_kb_utilized": 1929450543,& @5 u5 d% a% ]- ]
            "num_objects": 199989622 {; V4 F' E4 S2 F3 m
        }1 n8 `3 {+ B! M9 f4 r
    },$ M) I5 H: P# m: S5 c+ j* P$ Z
    "bucket_quota": {  F' v) r9 t7 d8 w
        "enabled": false,
/ F0 ]! O6 |7 V1 k2 \        "check_on_raw": false,
2 |2 `1 h! e0 Z! S! w' @        "max_size": -1,
5 ]4 C2 K; o4 x0 ?3 c        "max_size_kb": 0,# x( G- }+ h! Q* b3 [3 o; U6 A4 Z
        "max_objects": -16 ?) g+ K" U5 d( c$ P2 ]! \
    }
: _  [( Y+ h: {9 w5 Z  N/ p}
5 v: J2 `1 U2 c) T5 i复制
- `& U# J3 A# M5 T# @: \回收旧数据& M; Z0 ?3 h& _2 t1 R, |
根据之前工具的提示需要回收index和meta两个pool里面的残留数据6 S" u* z: c0 S/ i6 `/ S) \9 _
* W' g$ [0 h: B
回收index pool数据
1 N1 ~; \: d& f. X0 u. H0 q9 U$ ?7 F7 P& I4 `: `$ P; P3 v- X
[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"
, m  t& S- l6 p& o3 D, u.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.5
" K+ d! @! J3 g$ Q( r/ x1 V. E.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15
/ g  D2 g1 }5 x- N  O.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2* R: Z. A6 X, \3 p
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1! [, b: ^# J1 y* d' O7 K0 d8 o
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.0
* W) J- {: z$ k; i$ x+ D' f0 s.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.4* D5 I# u! h4 f% P# Q8 g! l
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11: ^" o/ K6 a% j" A  o
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.138 ]# f8 q1 R; z
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.6& _* K, z+ ~! {3 f- T+ m5 V8 {
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.30 _! ^" l7 c. l- u0 f, y
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7( J0 ~8 F+ @1 e% u+ |
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.9
: a; u5 d% F# [) Y* {7 e6 o.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.146 P$ H! X2 y" J% m3 R( S
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.10; U6 ^( l' _/ u  T5 S+ s5 d
.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.12
- q1 W: P; d1 ?+ v: x  i( H0 m.dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8" `' l  Q# t+ v
复制
% I5 q; h; F# c$ p# f  z" z使用rados rm命令删除数据. a, {3 _" E' l
2 x$ O  |/ G% r* n$ Y
[root@demo123 supdev]# rados ls -p cn-bj-test2.rgw.buckets.index|grep "afd874cd-f976-4007-a77c-be6fca298b71.34209.1"|awk '{print "rados rm -p cn-bj-test2.rgw.buckets.index "$1}'|sh -x
# f/ ?: j" S  ^( z; h) W! ~5 h! ^% n3 c+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.5
, i8 z7 N7 }. ]7 A, z# j0 ^+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.15
9 t' ~: K$ G5 ]3 b3 k- `+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.2  H) O( w/ p- k4 d
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.1
) ?/ a6 f; y) g+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.0
: |3 j7 S! p: [+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.4
( Z" N+ ]( k8 T  Y$ M; a. B+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.11
* G  F3 K& c" I4 _+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.134 {! e: s2 w$ I  J# N0 p
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.6$ W8 k7 B7 O- W% }* U% Y" c
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.3, C; A5 W) X! V5 f
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.7
$ d; H$ ^6 e$ b4 U4 S# J' I  A+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.9
! i% _9 a. d/ V: }: J: W+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.14
5 Y6 Q3 L$ K. M2 q0 q4 p5 U  x0 ^+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.101 n, \' E! e2 f6 M  X
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.124 n% d0 _" u/ a6 y* ?/ A
+ rados rm -p cn-bj-test2.rgw.buckets.index .dir.afd874cd-f976-4007-a77c-be6fca298b71.34209.1.8' Q/ F7 V9 r4 S) j% h6 J, u
复制+ J& \  G0 ^/ h3 t7 g& o- A# D
回收meta pool的数据
& l7 c0 i& m. Z9 E
3 b1 l; N* X& I4 F2 H6 s[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all/ U; k+ M1 v2 O# n9 k
root    demo14 I$ s9 I+ g5 f6 x
root    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.19 U, i$ b% s7 t
root    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 #残留. g8 a: T3 Y5 p8 s+ A
root    my-new-container_segments
% v8 w6 ]; m* a8 G2 T* O9 Z) xroot    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
6 b/ C7 e; Z& H, y5 c, g0 l" s  froot    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1  S1 i5 L& b, G& c+ p: N$ l
users.uid    s3test.buckets
: v2 S2 j* u$ [# f0 Busers.uid    swiftuser
- Y4 Z1 c: b8 [; W: wusers.swift    swiftuser:swiftuser18 C+ L! Q5 ~" T, F* g" c# A
users.keys    SNACA4LX9DS21NGMSRX44 v2 c" G$ O1 s# u2 L  D* t
root    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4
; H( r* R3 O4 {0 ^users.uid    s3test
+ d% f3 o  }. }& r1 Oroot    demo2
3 E  }, x" E- t5 Tusers.keys    XP8E2452AB6EBU3RPD0C4 g# ^! O  w. p8 f8 E9 W
root    my-new-container5 {5 @, p) q3 Q2 q: N
users.uid    swiftuser.buckets
* Y$ ^6 @8 R" Eusers.uid    synchronization-user! {* }8 i- a- o. V  `
复制2 _+ Z+ F7 [2 W' I$ i1 {
注意这里用的ceph L版本,使用了namespace,所以要指定namespace才能删除! k' w9 m( u5 b' R- C2 i

2 P4 z: d# F0 u4 r# Z* Z* l! i7 q[root@demo123 cephuser]# rados rm  -p cn-bj-test2.rgw.meta .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1 --namespace=root( t: Z6 M6 @; E! V; P$ n0 \
[root@demo123 cephuser]# rados ls -p cn-bj-test2.rgw.meta --all
) [: H! f) @. I5 F% froot    demo1
. C3 o* N3 q. h# h0 a2 o. aroot    .bucket.meta.demo1:afd874cd-f976-4007-a77c-be6fca298b71.45786.1# g( N. I- W0 O# u' N8 Q
root    my-new-container_segments
7 v+ }: M9 f* \5 n, ?: uroot    .bucket.meta.demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1
& h: W) H! E+ Uroot    .bucket.meta.my-new-container:afd874cd-f976-4007-a77c-be6fca298b71.7991.1+ o7 Z: Q! c6 \. [
users.uid    s3test.buckets
4 c% }, a0 L% Y/ Busers.uid    swiftuser+ G4 K$ k6 J% R- w1 u0 Y0 n
users.swift    swiftuser:swiftuser1# p5 x0 Z. a9 i9 H+ a
users.keys    SNACA4LX9DS21NGMSRX4: O! m2 u! {5 N: A$ l
root    .bucket.meta.my-new-container_segments:afd874cd-f976-4007-a77c-be6fca298b71.7991.4
1 u2 A; l, O7 z, F+ u- Fusers.uid    s3test* h0 w2 l/ X; h9 F3 e! A
root    demo2  O5 C% o2 x) {/ c5 a0 n/ }- _
users.keys    XP8E2452AB6EBU3RPD0C8 g1 t& s! P: Z3 D& H5 L" V7 X
root    my-new-container* B2 W9 J9 ?  ^
users.uid    swiftuser.buckets5 ]8 w1 e! [" G- M$ D
users.uid    synchronization-user
! p2 w+ O0 \' n3 D2 M% T2 _复制
* E. g" p# u  a* f2 \+ k0 f清除large omap告警
* Y* M, T8 v0 v2 u删完了object并不会恢复告警,需要手工对相应的pg进行deep-scrub操作,具体如下5 G$ l  l: N$ J  B+ m: c; ^
3 g% p9 ?8 _- e1 z5 O$ y
[root@demo123 cephuser]# python large_omap.py
+ r" u5 b6 ?, n8 [- C' |Large omap objects poolname = cn-bj-test2.rgw.buckets.index/ p; x3 b( q0 o5 f
pgid=13.33 OSDs=[59, 79, 19] num_large_omap_objects=1: h: O. t' C/ L# n
pgid=13.3c OSDs=[49, 29, 78] num_large_omap_objects=12 b! b' {& @3 N) L" j3 h" P
pgid=13.3d OSDs=[48, 69, 9] num_large_omap_objects=1
2 ~$ a1 }( m$ l( tpgid=13.45 OSDs=[88, 39, 28] num_large_omap_objects=1. [0 H4 ?! _5 U6 L* {
pgid=13.4d OSDs=[38, 29, 89] num_large_omap_objects=1, I8 ], ^% K" y* \
pgid=13.50 OSDs=[68, 19, 59] num_large_omap_objects=1
# x+ G, i9 ~( l9 H' [' Y! wpgid=13.6b OSDs=[39, 79, 8] num_large_omap_objects=1
; J" S4 B4 ]- m( @) A8 p! o2 u- ^pgid=13.8e OSDs=[38, 9, 78] num_large_omap_objects=1
8 f8 S0 l4 R7 g0 n7 O" S' G  xpgid=13.d1 OSDs=[9, 88, 38] num_large_omap_objects=1- P7 [% y4 \! W
pgid=13.d2 OSDs=[59, 88, 28] num_large_omap_objects=1
: V5 N/ v6 `7 `0 h* J4 k2 gpgid=13.e1 OSDs=[19, 88, 49] num_large_omap_objects=1
' L. h/ S3 O, Q& ]0 G/ p# ]5 Q7 z! Ypgid=13.e4 OSDs=[38, 19, 89] num_large_omap_objects=1( A- C/ B2 @% K$ u% s/ \) T
pgid=13.e7 OSDs=[19, 89, 38] num_large_omap_objects=1  [1 h6 Q2 v- F" e
pgid=13.ec OSDs=[89, 28, 48] num_large_omap_objects=1
- ~, F. T" X* l% n3 w) epgid=13.f5 OSDs=[38, 88, 19] num_large_omap_objects=16 ?" V/ a8 i$ r
[root@demo123 cephuser]# ceph pg deep-scrub 13.33
$ N2 c  ~+ J/ Finstructing pg 13.33 on osd.59 to deep-scrub0 O1 V1 Z4 F5 r
复制
0 ?  X# }# g; b操作完可以看到有pg进行dep-scrub,之后状态恢复4 \7 f, s% F6 l; k8 O  O, E
! ?" e' f3 V6 B5 H& w6 @. O' B
[root@demo123 cephuser]# ceph -s: h1 A" H. b$ r
  cluster:
; S# ^( Y8 B% ~    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed90 ^1 v, Y6 V* c& D8 n! r% V8 ~1 q+ A
    health: HEALTH_WARN
  ?9 ?; M: K, D/ t% q            16 large omap objects
$ W" h7 ~+ p5 X7 r2 t; [3 n, i( v4 b; L" E" u+ }
  services:
& V2 z9 c6 B" T# j  i$ G. |. r    mon: 3 daemons, quorum demo122,demo131,demo1418 S; g* I6 w0 S2 v6 J! E7 v- k
    mgr: demo141(active)* q. m: n( c0 L7 Q
    osd: 90 osds: 90 up, 90 in
) ^( i5 r! R6 d) @) `3 w    rgw: 1 daemon active! g7 }8 a9 v- x3 [
0 o! X2 a7 P6 n% d4 _
  data:
, V, @& Q9 ?' @3 L+ X. }) C( m' h    pools:   7 pools, 3712 pgs6 n6 {' }( E9 _+ a
    objects: 20.13M objects, 1.80TiB2 B+ B/ s' ?  e- ~
    usage:   7.28TiB used, 408TiB / 415TiB avail
3 [5 g1 j6 M4 y    pgs:     3711 active+clean
* U3 q. {1 y$ J             1    active+clean+scrubbing+deep #开始deep scrub
+ [) n  Q3 t( ]& R  h- y& w8 d1 Q# n: d) V
  io:1 H6 c- z" f$ u  b  U# P) y" `
    client:   5.29MiB/s rd, 935B/s wr, 69op/s rd, 28op/s wr
; J% ?# S1 w& Y1 `- b% T2 z5 b
* d: B1 W: v! D[root@demo123 cephuser]# ceph -s, q4 }3 C4 k7 p- r: I
  cluster:
) d6 x; H3 V+ [; b    id:     21cc0dcd-06f3-4d5d-82c2-dbd411ef0ed9
8 ]% `7 @# S! F0 ]    health: HEALTH_WARN
, P6 s; R* F# ~* [- y/ f3 @  j            15 large omap objects #减少了1个- h- h9 A. p. m- y, y( i# @

2 I/ I* r  ^6 d% `  services:
4 n5 d. X2 e& \# q$ g7 a    mon: 3 daemons, quorum demo122,demo131,demo1416 F+ _+ i! p+ i- n, L0 a
    mgr: demo141(active)# q! C0 q6 t. V* P# G# g
    osd: 90 osds: 90 up, 90 in
# s' k. j, r" L    rgw: 1 daemon active
" r* V0 X* ?8 l& G
1 [* H, w# `  F$ w2 j  data:% ?- O2 E* x4 V5 l* \( E
    pools:   7 pools, 3712 pgs, G9 X( A* ^) z! B" M
    objects: 20.13M objects, 1.80TiB
( c& r/ o1 a9 {; [* P    usage:   7.28TiB used, 408TiB / 415TiB avail  t2 t; c  B3 s
    pgs:     3712 active+clean
- o! A7 `  a% x  X
6 E7 U& |. ?7 B( Y5 p5 z3 B. y8 _  io:, f( b0 r6 U# K9 n3 l' g% g( c, G9 \  a4 p
    client:   5.33MiB/s rd, 680B/s wr, 36op/s rd, 6op/s wr# X; _$ d  k# K" R5 |# o
复制- a$ F6 P5 H' I8 p6 c
总结) M/ F9 j. P0 Z' K6 T% y
index pool的omap告警一般就分为两类:
4 m/ |2 C& A! i, f$ r+ b
! b. m9 T1 ?/ C$ K9 C一类是object条目数过多,导致对应的index 元数据条目数过多,可以用上面的方法处理。
* k. }) o+ g' t另外一类是bilog过多,这里的方法就不适用了,需要手工进行bilog清理,关于bilog后续会有详细章节介绍。
' _) K3 A& f* I! i# g9 a, p# _6 I8 k( E2 k2 s

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
 楼主| 发表于 2022-8-23 09:54:43 | 显示全部楼层
线上multisite环境出现HEALTH_WARN 32 large omap objects,已经bucket auto reshard=false,所以排除是bucket index 所在的shard omap过大引发的问题,官方的给出的告警信息无法定位到具体的object,于是有了下面的排错过程
- Z+ ^. A5 \. y9 S: @# [7 h3 X; F7 t0 P& H
排查过程& X+ F' A5 C" r7 h
[root@demo supdev]# ceph health detail
% T$ i( T, ^& u4 c7 R% E' z7 }1 e$ dHEALTH_WARN 32 large omap objects
) I6 w  m" t" I# k2 u# ULARGE_OMAP_OBJECTS 32 large omap objects$ G& g; o7 P+ H2 s9 v( H
    32 large objects found in pool 'cn-bj-test1.rgw.log' #出现large omap的pool* H" O  ]# W5 t1 y' C3 |  x2 A4 b2 M
    Search the cluster log for 'Large omap object found' for more details.
4 ]# t- b( X6 b/ B, [4 n7 P" r: v9 g8 k; ^( B4 F) E
5 T) M. V4 A4 w( y  @4 J) C3 y7 ^5 X
[root@demo supdev]# ceph pg ls-by-pool cn-bj-test1.rgw.log |awk '{print "ceph pg "$1 " query|grep num_large_omap_objects"}'|sh -x4 U( ]& G: o8 M
ceph pg 11.0 query|grep num_large_omap_objects
$ y$ a& i7 R& h. K5 Pceph pg 11.1 query|grep num_large_omap_objects, Q" Y2 a% I5 p3 j8 ?* B/ M) V$ D
ceph pg 11.2 query|grep num_large_omap_objects( j/ k5 c! E9 T5 l  ?0 x
....../ W( Y# t- W* x; O
+ ceph pg 11.1e6 query( c1 Q$ v7 d6 Q7 o3 o/ m
+ grep num_large_omap_objects4 J, K) v- [7 l5 ^7 b
                "num_large_omap_objects": 1 #有large omap的objcet数量# S" ^: ^0 |. Q6 [; w  U
                    "num_large_omap_objects": 0
4 W: C1 `( O$ \; @9 H4 {                    "num_large_omap_objects": 0
) V/ ^7 ?9 e! v6 o8 B
2 A9 e* r- U/ P, H  x) N8 _3 B( {
[root@demo supdev]# ceph pg 11.1e6 query #查询pg详细信息1 ^/ T* g# s0 v4 h/ q! D3 M! K
{
, T+ ]% K! b2 o: c! Q' l    "state": "active+clean",# [5 M! I# q; _( j
.....
" A" p4 z) S+ k, T- F& r! y1 v; B% C0 q    "info": {8 T9 ?; x% j4 L+ J
        "pgid": "11.1e6",: K3 r4 R* l9 i+ I$ q
        "last_update": "10075'3051746",
# _$ d" V5 x7 ~( t$ O3 G- w        "last_complete": "10075'3051746",# s( I% Q# Z& [! n
        "log_tail": "10075'3050200",
6 |& K5 F( u/ K' N. C" ^        "last_user_version": 3051746,3 q, b1 @6 \$ V5 p- p  L# B% a
        "last_backfill": "MAX",
2 J9 X, C: X# I! c! K+ U$ Y        "last_backfill_bitwise": 0,
2 F& i6 \8 X! w. n7 ^/ Q) }        "purged_snaps": [],/ O% u7 |  x3 ~" l' [9 b. f
.....
/ x/ C; l( g, _: W" f7 {
7 O# u1 I# k, e5 R              "acting": [2 K0 u, ^. x! F9 I% ]: [& Z' O
                    46, #主OSD id=46
% S7 s4 m6 J' m, c% G6 p: {                    63, #从OSD$ |* w+ E. B7 o1 @; d) r& V
                    23  #从OSD
2 V0 ^# M3 R$ c* \  L# H0 g                ],
7 D0 N% ^: [4 p1 C4 C, R; C5 k            "stat_sum": {
$ R/ E1 ^+ F9 M4 t0 W0 A5 C                "num_bytes": 40,! {5 P% K0 E: [# W) ]  i
                "num_objects": 2,1 i* F+ h9 Y7 f5 T
                "num_object_clones": 0,
9 P3 N. x* P8 {/ y+ ~  t                "num_object_copies": 6,
$ r% n; _  G4 ~5 C                "num_objects_missing_on_primary": 0,
; m% ~6 T  A& \                "num_objects_missing": 0,& p' p( |# a5 X9 Z2 N
                "num_objects_degraded": 0," o& |* U  M: P2 R, A3 c6 [
                "num_objects_misplaced": 0,1 r2 i1 ^/ m, x  ]- G2 `9 [, J6 M
                "num_objects_unfound": 0,
) L3 d; M* t  L                "num_objects_dirty": 2,$ \2 s$ S# R/ T) C* I
                "num_whiteouts": 0,* |* o3 q: w, W; |  W4 a, `
                "num_read": 3055759,
2 |# `9 s+ |3 a% j                "num_read_kb": 3056162," h$ K( N- k* I3 J" V0 L
                "num_write": 5986011,
* }) h/ d1 }# A                "num_write_kb": 53,, g/ Y4 }1 `8 p1 c, O- z* c: m& r% m
                "num_scrub_errors": 0,6 x/ {+ G4 [- c) _( [
                "num_shallow_scrub_errors": 0,
  v: ~5 D) I* O% e                "num_deep_scrub_errors": 0,( W6 i( L% ]+ L: R6 X) L: i
                "num_objects_recovered": 0,
8 m. @! ]) O$ r% S  |% S                "num_bytes_recovered": 0,
1 b( L* K: U& F2 m' p                "num_keys_recovered": 0,. R5 M5 T3 V. M/ W3 m5 B
                "num_objects_omap": 1,. _8 i1 i' C9 C0 v  K8 Q* q7 P
                "num_objects_hit_set_archive": 0,
. x, t# q8 P' A! i% N8 N                "num_bytes_hit_set_archive": 0,
: C6 w( v: t- G! i                "num_flush": 0,
- ^8 b7 @6 S( T0 Q                "num_flush_kb": 0,' I6 A* J' q+ h% ]# v2 v" b
                "num_evict": 0,
5 r" D1 J2 Q# ^; C2 v/ Q; _" j! B                "num_evict_kb": 0,
0 T5 J' }/ A" q- a4 |                "num_promote": 0,! G# S; _6 x. D# B6 O5 [
                "num_flush_mode_high": 0,2 V$ ]! w/ ?" ]$ u9 j# H
                "num_flush_mode_low": 0,7 A) `4 J4 T, Z7 Z0 L" c
                "num_evict_mode_some": 0,
& x6 {  c; a3 j+ H# Q8 }7 o                "num_evict_mode_full": 0,  f1 G( v- q5 p( s% c, f. K
                "num_objects_pinned": 0,
% L* I: p0 r( Z                "num_legacy_snapsets": 0,
. w4 c' b1 R( q                "num_large_omap_objects": 1 #large omap的object数量- C5 P, ^" H% {! K: n' p+ d
            },: L6 Y1 X0 F$ A$ u) V& ?
            ...0 ~' u" M* w* s3 P- @+ K
                "agent_state": {}
0 w' x& l9 o9 \2 @$ E}
( z6 Z. n) R4 Y. ]8 w/ {/ t+ K  S) a4 X5 a2 i

/ L( r! _, |2 t5 s. s[root@demo supdev]# ceph osd find 46 #根据OSD id找到对应的主机信息7 W3 x' _2 o( b0 i  h0 _+ U& [
{* [! Q! ^9 ]/ Z8 k1 C
    "osd": 46,
* D. _# m" Q* C5 D    "ip": "100.1.1.40:6812/3691515",4 Q# ~5 S# ?6 ?0 E
    "crush_location": {
' `# I: h7 D6 ^& `# J; s& p        "host": "TX-100-1-40-sata",
$ u9 K1 u, H1 Y  B% ^        "media": "site1-rack2-sata",
9 g+ U% K! u/ K4 G0 E5 m        "mediagroup": "site1-sata",
# J3 m( K9 ~3 g* s- F9 D1 X* e+ R0 p        "root": "default"* [" k; r. x( x
    }
- X, @6 _, N& f* }2 g5 `}
, d! f/ z6 q- C. L+ z! v
6 x* |6 l( K/ H% V- W) q/ a
) Z2 G3 B" i+ {5 A[root@demo supdev]# zcat /var/log/ceph/ceph-osd.46.log-20181210.gz |grep omap #根据OSD日志找到具体的object名称
5 |1 M: t) X9 G$ J2018-12-09 23:03:18.803799 7f90e9b46700  0 log_channel(cluster) log [WRN] : Large omap object found. Object: 11:67885262:::sync.error-log.3:head Key count: 2934286 Size (bytes): 657040594 5 d1 s( S  c  k" Y5 L
#OSD 46上的object名称为sync.error-log.3的omap超出标准8 w( P7 `( g* O

8 z$ J, Y$ j: O4 T- k" B
, U+ f; p8 }$ G' P. K, G2 ]3 d! M2 ]& W8 B
[root@demo supdev]# rados ls -p cn-bj-test1.rgw.log|grep "sync.error-log.3$" #确定objects存在( o. K/ ~, t- r1 d. V7 w" Y" `
sync.error-log.3+ @4 l8 j1 [& y! G
/ U: t: D/ E6 L( w! I3 u% \: i! N# ]
#注意整个multisite的同步过程中的错误日志信息以omap形式存储在sync.error-log.*
! S9 a, E% U5 _: J#吐槽一下,错误日志分32个shard存储,代码写死了,而且错误日志目前还只能通过手工清理,无法像其他日志一样自动trim,随着错误日志不断堆积,才引发了今天的问题。, N2 e4 S  e5 i) W0 s* ]9 K' Y
' n! _% P- D# {+ R4 s/ Q. G
[root@demo supdev]# radosgw-admin sync error list|more#查看错误日志4 x$ m, P( k& B" X
[
) [: r3 g: N) O! m+ D    {6 z; y. l, x. c9 ?+ M- k
        "shard_id": 0,
9 P# X( o9 K. g) _% `        "entries": [
& N; b9 z- A0 K            {0 n# {0 h8 a' l0 ]2 B
                "id": "1_1540890427.972991_36.1",( d0 Q. g! M# h2 c: G2 n- I
                "section": "data",
- S6 K2 y# P9 R: m                "name": "demo2:afd874cd-f976-4007-a77c-be6fca298b71.34353.1:3",
, U3 `6 k% K0 `% [1 w# V, p                "timestamp": "2018-10-30 09:07:07.972991Z"," x2 y# N; ], s/ z9 q8 C! [& I3 v
                "info": {
$ ^: p: K7 _7 M) O  [) g$ ]3 R                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
& Z1 J8 O3 x; W                    "error_code": 5,
% W6 {1 s% \6 @                    "message": "failed to sync bucket instance: (5) Input/output error"1 B% a3 K* e/ q9 b  e" q/ @
                }
. s4 r7 x/ y, c1 R9 E5 ?            },5 ?5 G% r0 f: D& [) W2 i$ |/ J
......
9 B# s' k9 O9 Z- p- a+ p            {9 _+ O) w. k8 B- c+ N
                "id": "1_1543395420.626552_32014.1",6 k: f! i3 w4 c! u5 [+ ]
                "section": "data"," u# r7 T: V$ a6 G1 @) W
                "name": "demo1:afd874cd-f976-4007-a77c-be6fca298b71.34209.1:0/file1205085",* i& h# K* y5 j) B
                "timestamp": "2018-11-28 08:57:00.626552Z",
2 p" _2 Z, F' n                "info": {  ]/ h# G: Z) Y1 `3 o7 Y4 R. i
                    "source_zone": "afd874cd-f976-4007-a77c-be6fca298b71",
/ `1 j- Q( w. o% q% _                    "error_code": 5,  ~, m3 Q( Y% F& G/ V& b
                    "message": "failed to sync object(5) Input/output error"
" f: t8 X7 |3 R' g7 C                }
3 k  w. F3 u- A- M1 n            }6 O) k- J4 f) I$ R, B! d

( F0 ?' `- U& ]+ x0 X6 C
2 @) q4 X' S1 P- k[root@TX-97-140-6 supdev]# radosgw-admin sync error trim --start-date=2018-11-14 --end-date=2018-11-28 #按日期清理错误日志记录
- _1 ]( ]9 n2 U$ m5 w- e% R4 z复制
- M' N! i- h. j4 a2 p( c& g优化定位效率  \; b9 T/ G* J8 _" ?+ ?: z& F6 l! {
简单写了个脚本,先根据warn信息找pool,之后再根据pool找出有large omap objects的pg,凑合用,不保证没bug,在12.2.10下面测试通过。# M( o& b3 i1 I% g

, V% }& u( r" U6 w' a# s2 k[root@demo cephuser]# cat large_obj.py
' L; m4 f+ Z, ?/ Y- z, dimport json: ~& c3 \( I+ V4 K
import rados7 F2 h5 [: e) O/ \3 t
import rbd2 J4 d0 `. T. s+ w1 r
- [) C7 ?( t8 F) n! p  M- |9 E- c
ceph_conf_path = '/etc/ceph/ceph.conf'  J0 L. j& o, r* V5 L4 q
rados_connect_timeout = 5* J5 n9 W) J0 S

6 t7 J5 V) _6 p/ f  Pclass RADOSClient(object):1 L1 l5 K: Y$ X1 X! [) B0 \
    def __init__(self,driver,pool=None):
0 S5 }! B; s) T& ?3 P2 q- R- r        self.driver = driver* z: l0 p+ t1 B' b6 j- G7 X: t& K
        self.client, self.ioctx = driver._connect_to_rados(pool)
& \8 R7 _2 |% L: p    def __enter__(self):
) H4 Y4 b1 O8 @9 {7 c        return self
$ r" f7 s* M6 {) O4 O/ X/ i    def __exit__(self, type_, value, traceback):& ?1 s5 o; |. m% C
        self.driver._disconnect_from_rados(self.client, self.ioctx)  O( k, [/ X* F, _6 _3 I. L1 H
0 l+ J+ f) D4 H& l3 ~; o" ?
class RBDDriver(object):
1 D1 ^5 p* R( ]% P7 W    def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):
% U4 E4 s% J9 B        self.ceph_conf_path = ceph_conf_path! @# u( H7 P; y  r6 h" k
        self.rados_connect_timeout = rados_connect_timeout
7 }# g# U) A1 W) t1 u0 O3 o6 Z% k8 o        self.pool = pool" A% J# S9 V, ?
    def _connect_to_rados(self, pool=None):$ V  V( c' Q2 b, B2 @" l
        client = rados.Rados(conffile=self.ceph_conf_path)" ~; L) \" R6 }; z* R* l
        try:9 I+ ?0 X2 H9 V% x' b" ^# {
            if self.rados_connect_timeout >= 0:/ ~) F! _1 L; c" w* d
                client.connect(timeout=
9 Q+ X& l5 [6 [4 w: o( G                               self.rados_connect_timeout)
8 |& Q" p  w1 x* u: G% o            else:: f/ T, t7 R) e; S
                client.connect()
+ W' o  \% I; G" C, r% y            if self.pool == None:
5 D, Y+ B7 z" c: L& m3 k- ~                                ioctx = None
6 U9 {# y; m8 O9 D            else:
1 }# k/ `9 c# D+ b# |                                ioctx = client.open_ioctx(self.pool)
" N3 g1 j! s6 c1 G  p, z            return client, ioctx6 p" D9 h: R. `* V- C3 v; I  U
        except rados.Error:
- R8 f; U! t7 k! n& F- {            msg = "Error connecting to ceph cluster."
0 L7 Q, ]3 [: W) v/ f            client.shutdown()
. g- ]$ k! r0 I0 K. f8 J7 c            raise msg. Q& j. W# g8 I( J0 O# d5 R
; W' B* s5 T7 K- |/ M
    def _disconnect_from_rados(self, client, ioctx=None):# m' z4 l8 ^9 S6 T7 D; e
                if ioctx == None:
# M7 E# @* z# N. d( _& J1 L0 I                        client.shutdown()5 a# {) _0 A% r; f! A/ P
                else:- R" {2 S1 J6 d7 F' J+ f, r# K
                        ioctx.close()6 F- ]2 c# u6 e% _3 o% x3 _8 O
                        client.shutdown()% T) m7 g" ?# Y- L5 C

6 ^1 w# D# J9 p0 L, Y7 iclass cmd_manager():4 ?# l" C+ T  m# g
    def get_large_omap_obj_poolname(self):
) ?2 u7 ~3 O$ _8 A        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:4 l2 g8 Q9 S2 M
                result = ''1 _, _5 |+ R6 P" p$ I
                cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'' g1 l; {$ ]& e: z+ v0 t5 O
                result = dr.client.mon_command(cmd,result)4 z5 {6 w  G% p" d& i+ |
                if result[0] == 0:
& w: ?; O  \- {( Y                    res_ = json.loads(result[1])
' ]* P8 e: P* w; _" N- B, o& [                    if res_["checks"]['LARGE_OMAP_OBJECTS']:
( X1 a* V) p2 {- g' t  L( p' E                        return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]/ h1 c" R9 t9 G) _5 L
                else:* }0 U+ p7 F8 t) M# g
                    return False
2 `0 w4 i# E; j' Y' o# |    def get_pg_list_by_pool(self,poolname):- V, S- ~7 m7 \1 S* L4 _) Q
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:9 @. |: O4 L) X) Y$ I$ z$ ~
                result = '') e: z! u) F$ I6 [: h; h
                cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + poolname + '", "format": "json"}'# ?6 W* b5 Y  Y2 M/ I
                result = dr.client.mon_command(cmd,result)' ?) g5 O' S8 `4 h
                if result[0] == 0:
3 ~# k' {7 q1 s, s                    return json.loads(result[1])
( d" L4 F+ r! p# i' x                else:
# v# f) j2 m/ d+ U                    return False
7 B$ I/ m3 x$ v, ?' B1 [8 e0 c$ S& q) ?& w, e3 C
cmd_ = cmd_manager()
& A: I, r2 T; E7 q/ Upoolname =  cmd_.get_large_omap_obj_poolname()5 ^* C; O3 M0 `3 u8 O$ b% [0 i' L
print "Large omap objects poolname = {0}".format(poolname)# p; @5 }/ C% n
res =  cmd_.get_pg_list_by_pool(poolname)
' V8 g  H, i2 k9 S4 p7 E- Tfor i in res:
% L0 T, [; O8 b7 `! I    if i["stat_sum"]["num_large_omap_objects"] != 0:; C7 u2 `: [9 X" M2 c) W" A
        print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"],i["acting"],i["stat_sum"]["num_large_omap_objects"])" f. Z5 a8 p& H! o( e
复制- L$ a. B( E! ?, B0 T% S1 \
再爆一个雷
/ |3 ?6 @+ B9 O0 n8 T, Q9 S# s  t如果你认为通过上面方式清除omap集群就能立马恢复状态,那就太天真,告警信息“HEALTH_WARN 32 large omap objects”依然挂在那里不尴不尬,虽然omap清理了,但是因为对应PG状态没更新,所以告警信息依然存在,只能通过手工或者其他方式去触发PG的状态更新,我这边是通过ceph pg deep-scrub {pg}去触发pg信息更新,注意如果你用scrub是没用,必须deep-scrub,这里又要吐槽官方的逻辑设计,真是WFK!当然你也可以放那里不管,等后台自动deep-scrub也能恢复。
, X0 g5 o. n) _4 l$ y  g3 j0 p( d. \4 F, Z# r9 T' V
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:04 , Processed in 0.023521 second(s), 23 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表