ceph crash archive-all
0 |, z3 U1 P( @' L[root@node03 ~]# ceph health detail
, g; [* o& r- n3 N2 pHEALTH_WARN 1 daemons have recently crashed# D/ {$ w5 G) e4 l. K& Z R% a; n G
RECENT_CRASH 1 daemons have recently crashed) _3 \; Q2 z+ n5 J6 d) b9 Z
client.rgw.node03 crashed on host node03 at 2023-01-06 07:12:28.313044Z* P2 T Y! H! Q+ G" G. r& r9 A
; C- X3 f+ L1 C! A b
系统中所有的崩溃可以通过以下方式列出:
' L, P% N5 L7 Y0 }$ T[root@node03 ~]# ceph crash ls4 k7 e0 D5 @/ }6 K4 I
ID ENTITY NEW ( @/ H+ x$ `" t$ q1 a1 s
2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e client.rgw.node03 * 1 G1 E x1 P) Y7 o+ ^1 d
[root@node03 ~]# ceph crash ls-new9 O( k: J6 t: @& R$ J& |
ID ENTITY NEW ! k, h# k8 |" g( W6 v3 |
2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e client.rgw.node03 * - G ?7 ], U" |+ _" g) q8 C+ ~
) E4 U# X, w' t" u有关特定崩溃的信息可以通过以下方式检查
$ i: c, O) s9 R. S[root@node03 ~]# ceph crash info 2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e
: x& B2 k4 G1 k* ^ z8 c{3 ?% Y6 ^; f* E& e7 |$ Y
"os_version_id": "7.9", 1 U4 F2 {. L) N8 `# r' l
"utsname_release": "3.10.0-1160.el7.x86_64",
# L& H. I4 ]5 p/ Y "os_name": "Red Hat Enterprise Linux Server", 1 ^" d! `0 c" y1 `
"entity_name": "client.rgw.node03",
1 Z: y, W1 g7 H. K "timestamp": "2023-01-06 07:12:28.313044Z",
5 S" j( H# G ~, B "process_name": "radosgw", 4 R: ^: k8 v0 y* ^* v
"utsname_machine": "x86_64",
: U6 s& t: Y2 P5 t& f6 w "utsname_sysname": "Linux",
% G0 K9 I7 a$ p7 j# B2 |7 V "os_version": "7.9 (Maipo)", $ G ]3 f& Q E' Y* X+ g( B
"os_id": "rhel", - p1 k# T! o6 K
"utsname_version": "#1 SMP Tue Aug 18 14:50:17 EDT 2020",
- A: u7 ~' \& j "backtrace": [
# U1 [5 x; I9 e "(()+0xf630) [0x7f5588ef4630]",
' Q, @' \( U }( ? "(()+0x84556) [0x7f5594e49556]",
0 \2 V3 P! o) w- f* A "(()+0x86b0b) [0x7f5594e4bb0b]", 5 B3 i' ~6 k2 k3 A
"(RGWSI_Notify::unwatch(RGWSI_RADOS::Obj&, unsigned long)+0x2a) [0x5607b503643a]",
$ q7 h& Z% F- H$ A0 i "(RGWWatcher::C_ReinitWatch::finish(int)+0x34) [0x5607b503bb44]",
$ N6 c! |5 |! @8 Z# { "(Context::complete(int)+0x9) [0x5607b4b59489]",
5 V1 s8 Q/ ~7 J9 g "(Finisher::finisher_thread_entry()+0x16f) [0x7f558c0b8fff]",
* ?& i: y; G* W" [4 y "(()+0x7ea5) [0x7f5588eecea5]",
0 l; X- Q; r4 _3 y3 ~; P "(clone()+0x6d) [0x7f55883f596d]"9 O/ T' |2 K' C9 P i1 Z
],
7 r- g* _# Y+ ]+ h! c "utsname_hostname": "node03", * P0 M3 l- q$ d* w7 h o3 }. x
"crash_id": "2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e",
% T0 }# A8 I' w& c3 n9 b! w "ceph_version": "14.2.8-111.el7"
5 L6 W0 w5 P7 r. K* y8 t h6 U}
+ y$ w; ?2 _9 i' Q
( w. F# W) ?+ U. C4 ^4 X+ p) Y) n可以通过“存档”崩溃(可能是在管理员检查之后)来消除此警告,从而不会生成此警告0 e$ M. F- k! ~( L6 E3 x, T M/ o' i
[root@node03 ~]# ceph crash archive 2023-01-06_07:12:28.313044Z_df9f4fa8-d138-40f6-b8ec-c7c86299bb3e 1 e$ [2 t6 g: a8 N
5 z# @( \' E$ f& p9 C. E' @: k* _
& m$ M. J) a. c! w6 M+ E
3 U% h1 O% ^! p通过ceph crash ls仍然可以看到已存档的崩溃,但不是ceph crash ls-new即可看到。 “recent”所指的时间段由选项mgr/crash/warn_recent_interval控制(默认值:两周)。 可以通过以下方式完全禁用这些警告: ceph config set mgr mgr/crash/warn_recent_interval 0
' G3 H$ [1 f3 k& Z |