找回密码
 注册
查看: 1317|回复: 0

HEALTH_WARN 1 daemons have recently crashed 解决过程

[复制链接]

1

主题

0

回帖

12

积分

管理员

积分
12
QQ
发表于 2022-3-1 19:01:12 | 显示全部楼层 |阅读模式
ceph 出现告警,解决流程:
5 u5 F9 h1 H4 i7 `[root@hostceph1 ~]# ceph health detail
% `4 ~3 y6 Z7 ]8 @HEALTH_WARN 1 daemons have recently crashed3 o8 C1 L$ \2 d5 |1 d
RECENT_CRASH 1 daemons have recently crashed6 p& T3 I  v. M! n  V
    osd.29 crashed on host compute08 at 2022-03-01 10:31:17.079004Z8 q/ ]+ f* F  Q1 F0 A4 x
) p  W  G7 ?2 u( a$ v' g& J0 D
       # N, F: o7 V; Q/ l6 ~
[root@hostceph1 ~]# ceph crash ls-new+ r+ v. U0 a$ ]* `
ID                                                               ENTITY NEW , m, r& M* {; @# n2 Y/ p
2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10 osd.29  *  
" O; F. |- H& U% I[root@hostceph1 ~]# ceph crash info  2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10
. ~6 p" _; ]9 G0 i1 S& t) R; W{% o6 |- R2 n$ O4 p* l8 E
    "os_version_id": "7",
& W' k. e  G# g0 I4 l, E  @    "assert_condition": "e.version > info.last_update", + Q, A8 V0 s6 U
    "utsname_release": "3.10.0-1160.el7.x86_64", ( ]/ c2 L  P! Y. q4 C# S' @. Q
    "os_name": "CentOS Linux", 7 [2 h4 L. Z0 d! a% C3 {
    "entity_name": "osd.29", * V( |5 D- j0 _6 C- j' a0 v" O! k
    "assert_file": "/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc", ; a& |, f! m9 y4 }* {* E, h
    "timestamp": "2022-03-01 10:31:17.079004Z",
/ a% a( A3 ]7 H" [) i% T) J    "process_name": "ceph-osd", 3 v; a8 E1 G& h% `/ r
    "utsname_machine": "x86_64",
0 |* O/ v6 r& Q+ n+ z  u    "assert_line": 3964, 4 n* w! I8 c  P4 }
    "utsname_sysname": "Linux",
* X7 l  E. S7 i' c; d% v    "os_version": "7 (Core)",
3 f' o  ^5 j/ ?) W6 K/ y( ^    "os_id": "centos",
$ }# k( I; ]# g, Y- H8 x    "assert_thread_name": "tp_osd_tp", ! C/ X  r: c2 W
    "utsname_version": "#1 SMP Wed Nov 18 03:43:48 UTC 2020",
0 ]# n, @- y4 n+ G! d$ J- a    "backtrace": [6 r" P  w5 i" B' [& d! ?) z* j8 b
        "(()+0xf630) [0x7fb551f8f630]",
, g1 @6 F/ m. u        "(gsignal()+0x37) [0x7fb550d82387]",
" d: }0 w- ]- g6 T/ x6 x. f  ?  c8 F        "(abort()+0x148) [0x7fb550d83a78]",
! J5 @, z$ m0 e- D1 a; ~        "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x199) [0x55adc93aa704]",
: L! E+ Y) h- F0 g4 W6 L! e# c        "(()+0x4cc87d) [0x55adc93aa87d]", * R/ l0 k( D& e9 v4 m& R- e+ x
        "(PG::add_log_entry(pg_log_entry_t const&, bool)+0x1f5) [0x55adc953f3f5]",
( x9 ~0 E) \" e: _1 Q. z% I        "(PG::append_log(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, eversion_t, eversion_t, ObjectStore::Transaction&, bool, bool)+0x10b) [0x55adc956f01b]", ; m  h0 K. _* Q! R  L% Q6 `
        "(non-virtual thunk to PrimaryLogPG::log_operation(std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost::optional<pg_hit_set_history_t> const&, eversion_t const&, eversion_t const&, bool, ObjectStore::Transaction&, bool)+0x95) [0x55adc96598a5]", # X9 F9 e& l2 c8 o( D+ r
        "(ReplicatedBackend::do_repop(boost::intrusive_ptr<OpRequest>)+0xaa9) [0x55adc977a7a9]",
2 c5 J2 g. b7 p' F        "(ReplicatedBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x257) [0x55adc9788f57]",
& Y' j4 h* c/ m6 L- }        "(PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x4a) [0x55adc9699dea]",
9 m2 D% T/ i5 e* l6 x# {  ^        "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0x5b3) [0x55adc964a1d3]",
+ n/ Y6 O# ?* Q% }4 x        "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x362) [0x55adc948ab62]", # M, n2 ]( w& z6 h, F
        "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x55adc9719752]", ! l9 q; h, S9 }# S, E: j$ p- N( `
        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x90f) [0x55adc94a5b5f]", 6 \+ J& o! W2 \7 G
        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b6) [0x55adc9a49dd6]", : C! Q; i# B& H
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55adc9a4c8f0]", 8 Q, ?; [' Z5 q% G( w
        "(()+0x7ea5) [0x7fb551f87ea5]", . |7 v6 q5 ]8 N# B# x) F* R
        "(clone()+0x6d) [0x7fb550e4a9fd]"
+ b& Y5 t0 q  k! \    ],
: E8 p) F+ ^6 m2 ~* t( m  ?    "utsname_hostname": "compute08",
' o- d5 [9 \7 Q" g( u" X    "assert_msg": "/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc: In function 'void PG::add_log_entry(const pg_log_entry_t&, bool)' thread 7fb52ad89700 time 2022-03-01 18:31:17.054438\n/home/miles/rpmbuild/BUILD/ceph-14.2.8/src/osd/PG.cc: 3964: FAILED ceph_assert(e.version > info.last_update)\n",
( B; _; B2 o0 }8 p9 r8 P' L    "crash_id": "2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10", . Y: f8 P1 u% ~
    "assert_func": "void PG::add_log_entry(const pg_log_entry_t&, bool)",
+ O, M* K1 ~$ g* [+ o    "ceph_version": "14.2.8-111.el7"$ z5 l5 _- P) y8 s! X- R; S
}
4 Y3 S' c' B' j2 u9 d; J( W1 O+ _6 @7 P1 R- r! l
* F5 z* v2 v7 n
[root@hostceph1 ~]# ceph crash archive 2022-03-01_10:31:17.079004Z_11fa7732-990f-4166-8de5-943ff6f07c10
2 z2 q7 ?2 F* I$ d. c: l[root@hostceph1 ~]# ceph health detail
5 V, O/ k% I4 [  eHEALTH_OK
. g: o* v/ E* b4 o8 M; y7 s% ^9 T9 a  @) K$ \" m

/ K1 |2 A8 h# M( T8 l7 G6 v5 V
  g' o; `/ f9 y+ f+ z( B( C解决完成。
9 V+ K: M# B5 z* S  l7 R7 T6 u' p& ~4 W' }! a
以下只是查看命令:6 E- b  u( w" G% }
[root@hostceph1 ~]# ceph config get  mgr/crash/warn_recent_interval 8 X8 Z. i6 x. z  q9 y
Error EINVAL: unrecognized entity 'mgr/crash/warn_recent_interval'* m: E$ j9 C" B3 D, H# F" c4 e0 l
[root@hostceph1 ~]# ceph get mgr/crash/warn_recent_interval
  i( h' W) d) A  e0 Q4 N1 Q+ dno valid command found; 10 closest matches:
( R( X' v+ r: d# j! [" t9 gosd pause1 X5 J( h0 A3 I. D1 y: N
osd unpause
5 \: o5 R1 @; G# y% ^/ w/ `1 `3 m' josd get-require-min-compat-client
6 |) L* w5 I* Z4 W0 Uosd set-require-min-compat-client <version> {--yes-i-really-mean-it}4 E7 ?# z6 a% y! |, D
osd set-backfillfull-ratio <float[0.0-1.0]>+ K* B/ p5 u6 g# O
osd set-nearfull-ratio <float[0.0-1.0]>, j" E, _3 F. a) U0 N3 I' E0 \
mds count-metadata <property>
$ `! ]3 d; Y; ^$ @! mmds metadata {<who>}
+ ?4 g0 Q" a/ K" c3 U: qfs dump {<int[0-]>}
+ U. H/ w. F( {" Dversions
+ j+ a" A0 {  _/ F& FError EINVAL: invalid command
- i: S* x( H9 t  Y[root@hostceph1 ~]# ceph config set mgr/crash/warn_recent_interval  0! H+ ?; Q. `& T1 a; @
Invalid command: missing required parameter value(<string>)+ T. k0 I3 X2 i( v% @
config set <who> <name> <value> {--force} :  Set a configuration option for one or more entities
) S$ Z+ B, d# Y( Q0 s* bError EINVAL: invalid command7 q& H1 f: E5 D% b4 F* m
[root@hostceph1 ~]# ceph crash archive-all
; Z& [* \4 ?4 ?" l+ ^) |) v- o[root@hostceph1 ~]# ceph -s+ D& f# {8 t0 f/ `6 A4 W2 z
  cluster:: Z! `3 Y% y5 Q
    id:     29046cc0-0682-496b-98b1-912e59964282
9 o6 b% t7 L; A4 s, f" }    health: HEALTH_OK: C' w' C  `! j$ ^' J+ u

+ t6 t% p& H1 _* J  services:
$ J; _3 G* c1 P' @/ Z7 X    mon: 3 daemons, quorum hostceph1,hostceph2,hostceph3 (age 27m)) G4 M$ ^  m$ p% Q" B8 M% l& |5 }! n
    mgr: hostceph1(active, since 53m), standbys: hostceph2, hostceph3* _; J$ n; G: g  Q
    osd: 34 osds: 34 up (since 27m), 34 in (since 45m)
; ]' z2 z2 [. H$ b 8 ?$ B( R9 h4 J4 ?
  data:: `5 J. h$ Y3 Y) ?8 K2 j- V* \
    pools:   9 pools, 9344 pgs
# D' S) q1 y' e. m+ D+ C/ Z    objects: 1.21M objects, 4.6 TiB
* W% m, k8 v# }8 @+ n$ E, T; V    usage:   16 TiB used, 110 TiB / 126 TiB avail: O8 \( ]6 e8 r: @! c
    pgs:     9344 active+clean
; R3 p' f6 Q4 [; P- C) Q7 [% U 2 p1 m3 n- l7 }% u7 D* D4 z) f
  io:  h7 }% Q) a& [
    client:   2.7 KiB/s rd, 13 MiB/s wr, 0 op/s rd, 97 op/s wr: t9 }9 n2 `( n6 p4 t
您需要登录后才可以回帖 登录 | 注册

本版积分规则

返回首页|Archiver|手机版|小黑屋|易陆发现技术论坛 ( 蜀ICP备2026014127号-1 )

GMT+8, 2026-6-12 01:00 , Processed in 0.014951 second(s), 22 queries .

Powered by Discuz! X5.0

© 2001-2026 Discuz! Team.

快速回复 返回顶部 返回列表