|
|
[root@compute1 ~]#
' D; S$ ]" U; c) a$ UMessage from syslogd@compute1 at Dec 13 17:56:10 ...
( J" G& ]3 I. D# @8 j kernel:NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ksoftirqd/16:89]) c* Q% b/ j/ y3 l3 V9 i1 r8 ^7 z: w
, l! H4 X. f: t$ _; ^5 L2 x
Message from syslogd@compute1 at Dec 13 17:56:22 ... P4 L. f9 Q2 m- _, _: }5 r
kernel:NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:2:918]' h2 Z; d" C. d/ r6 X* B h
2 [5 K( t/ ^/ @* F
Message from syslogd@compute1 at Dec 13 17:57:05 ...6 U8 ?( p/ B* z* I1 s
kernel:NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:0:11804]
& _8 `. d7 d3 o8 Q5 N4 Y" Y1 Q- c4 L& E: S
Message from syslogd@compute1 at Dec 13 17:57:17 ...0 |7 `3 C" e; H6 a+ ?
kernel:NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ksoftirqd/16:89]1 B+ J( M9 F( I' m# T
3 z) Y2 R/ S4 f% g" f8 Q9 g1 {
9 O5 Z/ m. f; r T8 N' Skernel:NMI watchdog: BUG: soft lockup - CPU#34 stuck for 22s!) X$ y' { n6 x/ a6 f% a5 z
解决:
( I4 N I% u0 C) W4 u+ u/ i9 r1 }( d0 S% M
echo 30 > /proc/sys/kernel/watchdog_thresh
/ d5 v( I. P/ \" Z; i0 G5 y#追加到配置文件去2 @4 q2 `5 Y) J
tail -l /proc/sys/kernel/watchdog_thresh
+ G" L" y: y6 ]#查看确认
3 |8 B. d4 f2 a5 a- G% [. V% ?, ^8 O. fsysctl -w kernel.watchdog_thresh=30# u! `4 s# [# ^3 [" j0 T \& {1 W
#临时生效
& j% S: Q k& y7 g8 l* c/ p! A
1 m! M! _$ t: D I* ]& C7 g+ H
: y8 A- J9 k- m解决办法:
" ?, b `5 ^/ d. B2 L
3 I- o* ~: O3 J8 r6 }. A+ t. e/ @7 m#追加到配置文件中" f2 k/ F+ g1 n8 r0 Q1 M _% F
% S8 G! ]* x; k$ l# M3 y/ V1 ^echo 30 > /proc/sys/kernel/watchdog_thresh
f" _" b+ \0 F
5 o% r% K, J; |, l7 m#查看
$ Z: C5 M- d8 S# ]; t/ X8 |" p* d4 N+ W' T
[root@git-node1 data]# tail -1 /proc/sys/kernel/watchdog_thresh
3 j* `9 T* M" {) X3 A30
5 u/ o, l6 e: ?5 o; G( c* X6 @ `* B5 u9 f5 s z# S
#临时生效
3 B/ u; R3 L5 O
: j7 o$ M4 k- b" f/ i; Y! Lsysctl -w kernel.watchdog_thresh=300 J+ U1 @9 g6 F: }: l) l
' `& ~/ S$ e% [5 A
6 v+ g( f2 t. S7 j) D
# a/ P6 s9 A$ i" d# M. D1 Q
#内核软死锁(soft lockup)bug原因分析
2 ~, K3 l1 I, Z* H( k
0 x. f( q/ `8 F, @Soft lockup名称解释:所谓,soft lockup就是说,这个bug没有让系统彻底死机,但是若干个进程(或者kernel thread)被锁死在了某个状态(一般在内核区域),很多情况下这个是由于内核锁的使用的问题。
( w& [. Y$ f* j& _# M, T6 G) P! e& R: x9 i$ x$ ]+ Z
E; D3 R: ^7 a0 r5 b2 w
7 @( C8 \. M7 Hvim /etc/sysctl.conf
& e& o* |5 Z/ X& z7 a) m( L* k/ Q' O9 w, E. q6 w7 E# d! H( S
kernel.watchdog_thresh=30- r4 |# D' {, b0 B
! {7 F( a {: z( h C' C
, p8 C7 I p) l" W |
|