ceph crash archive-all% [3 P V+ b/ a! C
[root@node03 ~]# ceph health detail7 Z4 {5 b7 U" n% J& m+ j3 j
HEALTH_WARN 1 daemons have recently crashed9 D/ @' D: l6 g" N
RECENT_CRASH 1 daemons have recently crashed
! g( z9 Z/ p& h- X+ G client.rgw.node03 crashed on host node03 at 2023-01-06 07:12:28.313044Z
" [ X- ^, F* _* q
! @" H- |9 b+ Z3 ^) y: o0 k2 n& j系统中所有的崩溃可以通过以下方式列出:! X3 ]/ y7 o8 e- H
[root@node03 ~]# ceph crash ls
% L8 V. ^& A4 q+ _% xID ENTITY NEW . v% A+ r4 j2 N" P
2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e client.rgw.node03 * 7 @5 h' R1 a2 S
[root@node03 ~]# ceph crash ls-new r. K8 U0 m% n
ID ENTITY NEW
) L8 a+ F# r+ I7 ~9 _, F: S2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e client.rgw.node03 *
. k4 i) W/ f4 H- \7 H1 `
& L5 m5 ?+ Y. t7 Z有关特定崩溃的信息可以通过以下方式检查
, e$ H; M0 i/ V3 W4 _; {[root@node03 ~]# ceph crash info 2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e" ^9 M4 o/ [/ Y
{
2 G" ?5 [: Y( D0 c9 G5 A- [ "os_version_id": "7.9",
; ]! n! m9 W) q& |- t* F5 D" U "utsname_release": "3.10.0-1160.el7.x86_64", % }7 p; v/ G0 i1 W% ~
"os_name": "Red Hat Enterprise Linux Server", - K/ U9 {4 s w, R% r1 ?
"entity_name": "client.rgw.node03",
# h! G: r/ `: H, t "timestamp": "2023-01-06 07:12:28.313044Z", . c# ~. `" { `: B5 s' s1 R/ f& \! _$ a
"process_name": "radosgw",
8 A6 q& a3 l/ x "utsname_machine": "x86_64",
- A, H1 U) q; S+ c "utsname_sysname": "Linux",
& @. A) v( F T% }' i+ [2 g' w "os_version": "7.9 (Maipo)", $ u% ^& d" e3 ?
"os_id": "rhel",
: @: w4 q( o7 Q- U "utsname_version": "#1 SMP Tue Aug 18 14:50:17 EDT 2020", ; |4 r2 L8 ~9 v- v- x; W/ y# t4 @
"backtrace": [
7 x' F" V- L5 M. x) s "(()+0xf630) [0x7f5588ef4630]",
Y' s3 P Q7 ^ "(()+0x84556) [0x7f5594e49556]",
! ~ P5 N: s Y "(()+0x86b0b) [0x7f5594e4bb0b]",
" P9 i+ T4 D+ b1 ^ "(RGWSI_Notify::unwatch(RGWSI_RADOS::Obj&, unsigned long)+0x2a) [0x5607b503643a]",
6 K0 ^; V5 {: I. V- X) g/ ` "(RGWWatcher::C_ReinitWatch::finish(int)+0x34) [0x5607b503bb44]",
/ a% u+ c6 m! s* \$ L3 n "(Context::complete(int)+0x9) [0x5607b4b59489]",
) _: \( j9 R" P9 ~, x; y; G1 U2 b "(Finisher::finisher_thread_entry()+0x16f) [0x7f558c0b8fff]", 6 }& ^8 t7 J% c
"(()+0x7ea5) [0x7f5588eecea5]", $ w8 L9 u* b/ z7 W) U+ g
"(clone()+0x6d) [0x7f55883f596d]"4 k' x, J: Y% L5 q
],
/ h" i8 r9 E3 g2 i5 L$ r "utsname_hostname": "node03", * T5 X2 o% O; e. R6 G
"crash_id": "2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e",
7 W& i* f1 t1 @4 s "ceph_version": "14.2.8-111.el7": ^, f4 z4 a% [* P& y* S
}
( b# d: s" I4 d: G# h! i- o F- ?
1 x+ q; S1 N6 x% A& `可以通过“存档”崩溃(可能是在管理员检查之后)来消除此警告,从而不会生成此警告: n! H/ I. E1 |# A/ o' P6 f" g
[root@node03 ~]# ceph crash archive 2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e
7 d7 h- j7 ]' A7 E9 A" s
, F+ u( ]3 g3 p0 r9 I" y% t6 _ @
u( i# ?$ B% A0 L' `% U" v
( k, m, U+ J- c q& b9 P ^通过ceph crash ls仍然可以看到已存档的崩溃,但不是ceph crash ls-new即可看到。 “recent”所指的时间段由选项mgr/crash/warn_recent_interval控制(默认值:两周)。 可以通过以下方式完全禁用这些警告: ceph config set mgr mgr/crash/warn_recent_interval 0 2 B9 l; \, t5 R& z% E8 d; k1 t7 P
|