找回密码
 注册
查看: 1314|回复: 0

HEALTH_WARN 1 daemons have recently crashed 解决过程

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-3-1 19:01:12 | 显示全部楼层 |阅读模式
ceph 出现告警,解决流程:
0 t. \' f$ A7 ^( ?, |) J[root@hostceph1 ~]# ceph health detail
' C# O8 [/ o4 L9 [5 b$ y& sHEALTH_WARN 1 daemons have recently crashed
& H% _, B  T1 n, NRECENT_CRASH 1 daemons have recently crashed
) I; u+ {" L7 }    osd.29 crashed on host compute08 at 2022-03-01 10:31:17.079004Z
8 I0 C  P/ `$ v
! d3 r- m( H- |0 @% o3 y      
; W; V4 F2 O$ J- N[root@hostceph1 ~]# ceph crash ls-new% e- [9 I3 d9 l! Y  M
ID                                                               ENTITY NEW 6 o) _. L/ S, Z( H0 b
2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10 osd.29  *  & E! w0 I7 b! g
[root@hostceph1 ~]# ceph crash info  2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10/ L3 l, `. ?: _: n
{
8 P7 l! }& [! M& o) }    "os_version_id": "7",
- ?& M6 ~) U: |$ b    "assert_condition": "e.version > info.last_update",
1 J! k# Z  t# ?' M9 L    "utsname_release": "3.10.0-1160.el7.x86_64",
' M* u3 i3 J% E( l5 }    "os_name": "CentOS Linux", ; }1 T4 g' A$ V7 d
    "entity_name": "osd.29",
9 \3 [5 D- \2 I& T1 ^( X4 |4 I5 A    "assert_file": "/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc",
; N5 e) z$ h- p- X    "timestamp": "2022-03-01 10:31:17.079004Z",
+ _. C" V' @) Y! d2 C, q    "process_name": "ceph-osd",
% u4 ^" `; j, E5 `5 H    "utsname_machine": "x86_64", . o* {: N9 N# }" }4 D% m0 u6 t
    "assert_line": 3964,
+ O* c7 X3 n" V( I# z    "utsname_sysname": "Linux",
* @( U. j$ {0 T- m/ h0 {    "os_version": "7 (Core)", 9 P  L* ^/ L, P6 l1 _2 b) K7 |
    "os_id": "centos",
& k& K4 f! X" o9 d7 y5 Z" e    "assert_thread_name": "tp_osd_tp",
7 O8 h2 ]( q5 q1 ?    "utsname_version": "#1 SMP Wed Nov 18 03:43:48 UTC 2020",
8 l3 a5 [' A" \& X6 D' Q    "backtrace": [& R" D% H% U4 k+ w/ o
        "(()+0xf630) [0x7fb551f8f630]", . ~6 n( \- H8 J, m+ T
        "(gsignal()+0x37) [0x7fb550d82387]",
) [9 c& D* Y" {! L6 G( L/ }        "(abort()+0x148) [0x7fb550d83a78]",
7 }8 |4 v+ D/ W5 C        "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x199) [0x55adc93aa704]", / ]1 _# W5 @- S5 y8 @: z
        "(()+0x4cc87d) [0x55adc93aa87d]", 6 D: C$ I5 K2 B3 s1 p/ ?% m
        "(PG::add_log_entry(pg_log_entry_t const&, bool)+0x1f5) [0x55adc953f3f5]",
0 [" b3 U9 ?- U/ j3 J8 _- T: I        "(PG::append_log(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, eversion_t, eversion_t, ObjectStore::Transaction&, bool, bool)+0x10b) [0x55adc956f01b]", 8 @$ [3 s7 ~9 c7 l4 n% z
        "(non-virtual thunk to PrimaryLogPG::log_operation(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost::optional<pg_hit_set_history_t> const&, eversion_t const&, eversion_t const&, bool, ObjectStore::Transaction&, bool)+0x95) [0x55adc96598a5]", ! J: K+ M0 \7 |' s5 g( Z
        "(ReplicatedBackend::do_repop(boost::intrusive_ptr<OpRequest>)+0xaa9) [0x55adc977a7a9]", 1 S( r/ {+ j; p  `& k3 s; L
        "(ReplicatedBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x257) [0x55adc9788f57]",
9 x/ U& d( Q! o+ M1 n9 _* Q* S$ h        "(PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x4a) [0x55adc9699dea]",
3 v1 Y: C7 W) W# x. V        "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5b3) [0x55adc964a1d3]",
" b" i  W* w8 p( K        "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x362) [0x55adc948ab62]",
' r. ]! u3 F0 m9 w7 v4 A2 c3 a7 O" b        "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x55adc9719752]",
+ ]0 [/ s3 k& e- L# ]        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x90f) [0x55adc94a5b5f]",
: a  j7 d6 k' S7 G4 \2 j' j6 q        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b6) [0x55adc9a49dd6]", 8 u: G0 E" s# b  D/ B
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55adc9a4c8f0]", & W9 o( J- Z  T. `7 X. p
        "(()+0x7ea5) [0x7fb551f87ea5]", : |  j6 o7 x6 A: G- T% Y
        "(clone()+0x6d) [0x7fb550e4a9fd]"
$ T5 ]6 B" E5 V+ j5 d7 u    ],
' b: G# L! D+ \& O5 Q5 m' e    "utsname_hostname": "compute08", & g. a, B7 Z6 ?# ^" ]' ~
    "assert_msg": "/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc: In function 'void PG::add_log_entry(const pg_log_entry_t&, bool)' thread 7fb52ad89700 time 2022-03-01 18:31:17.054438\n/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc: 3964: FAILED ceph_assert(e.version > info.last_update)\n", 4 h" h  j3 F# k6 i% k! Y* j; R2 P
    "crash_id": "2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10", ) M. D1 [5 M, E- v3 j
    "assert_func": "void PG::add_log_entry(const pg_log_entry_t&, bool)", ) N5 ?4 B* c$ k, S# |" {
    "ceph_version": "14.2.8-111.el7"
4 T6 K; k6 S6 e9 L. h+ x}
8 S0 d( [6 @0 Z0 k$ K+ X& J% D5 B: X6 d
3 q& [' @1 A( }& x2 u: N
[root@hostceph1 ~]# ceph crash archive 2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10
  w" m9 S- m1 a9 e[root@hostceph1 ~]# ceph health detail
" r9 e2 n( d9 r# N( `HEALTH_OK
3 R7 J# S# g  O" L) d; E7 k  f# P
- \* O; e2 f3 e7 F0 z- r! I& A

9 u0 H! ?0 a9 X" T, H解决完成。- a; F2 T+ v! J6 }1 ^1 d) t
; ]! \' r0 K6 l: x5 y
以下只是查看命令:5 \- X+ y; l0 Q1 j
[root@hostceph1 ~]# ceph config get  mgr/crash/warn_recent_interval * |  ^  M+ J- p" m3 L, V1 j
Error EINVAL: unrecognized entity 'mgr/crash/warn_recent_interval'6 F0 e  L# c; ^: K6 H3 x; N- y
[root@hostceph1 ~]# ceph get mgr/crash/warn_recent_interval
; W+ f0 H: m" x/ R' d) Ino valid command found; 10 closest matches:; d7 f7 C; Q  M
osd pause
$ H1 V8 ~/ U& q% m5 z. w3 w6 X2 Tosd unpause8 L8 a- l, G$ i
osd get-require-min-compat-client# J2 K# r1 `7 D
osd set-require-min-compat-client <version> {--yes-i-really-mean-it}
  u* y) m+ M2 r3 b. W: V+ ?- Uosd set-backfillfull-ratio <float[0.0-1.0]>
. o$ l. E8 D: i1 ~7 Q; }* yosd set-nearfull-ratio <float[0.0-1.0]>
8 L) @. K. \$ A( l8 u9 y1 d" T9 l5 ~mds count-metadata <property>
( ^) `/ j/ c& G- s( Kmds metadata {<who>}2 Y, Q7 k2 S2 [' `) o/ e
fs dump {<int[0-]>}
' K6 n5 ~" G- z% Rversions8 d/ V0 b% d" y" b6 J
Error EINVAL: invalid command
% N# I' y' D5 [4 d- f% f[root@hostceph1 ~]# ceph config set mgr/crash/warn_recent_interval  0. D% D1 p- s; N' C3 a
Invalid command: missing required parameter value(<string>)# F% {$ T/ j7 G* D0 N9 \, z( G
config set <who> <name> <value> {--force} :  Set a configuration option for one or more entities
9 C* k( N. h' W, `: Z' @Error EINVAL: invalid command
+ _" j: M* p2 n7 y[root@hostceph1 ~]# ceph crash archive-all - z) Y" a4 d' k. e- j0 E" F
[root@hostceph1 ~]# ceph -s
' M. Y, Z5 ]+ `" N. z) v5 y& {& O  cluster:0 N' K/ b- Z# b
    id:     29046cc0-0682-496b-98b1-912e599642821 `) D; `: j: N' x! N
    health: HEALTH_OK$ m  C7 y# T4 e0 ]0 _) x
! D* ]/ n5 i* D( {' [2 K4 R
  services:6 A3 a1 d3 f6 G7 Z
    mon: 3 daemons, quorum hostceph1,hostceph2,hostceph3 (age 27m)% q: R7 K6 @+ a. D
    mgr: hostceph1(active, since 53m), standbys: hostceph2, hostceph3
- F$ B& k2 v1 O7 h    osd: 34 osds: 34 up (since 27m), 34 in (since 45m)
+ P5 {& Q' a4 a# m. c, c, I$ X
( q. q. Y. w1 r' J, w2 n$ e  data:
# F0 v8 k' h- @* j9 H' o; l    pools:   9 pools, 9344 pgs1 g9 S* b. V! t4 p& v& b
    objects: 1.21M objects, 4.6 TiB
' P5 l4 W/ r! z& ~3 n    usage:   16 TiB used, 110 TiB / 126 TiB avail
# I; l$ k1 f1 @& M9 ]    pgs:     9344 active+clean( e" m' r. e' i5 L

, @" D2 U$ r% t& y( N( u( X( U  io:- G2 b7 \) l! h4 |- {. u# }2 O3 d1 R1 H
    client:   2.7 KiB/s rd, 13 MiB/s wr, 0 op/s rd, 97 op/s wr
. q. @( P. F: @+ B8 b
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 00:07 , Processed in 0.025069 second(s), 25 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表