|
|
[root@compute1 ~]#
( ~8 `' Q& C+ k" q* }Message from syslogd@compute1 at Dec 13 17:56:10 ...
2 I# g+ l" n" [. S, [ kernel:NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ksoftirqd/16:89]
1 v5 p! x `' c
9 X* T$ n! c$ C `1 t. l- sMessage from syslogd@compute1 at Dec 13 17:56:22 ...# Q D* I5 X# u
kernel:NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [kworker/3:2:918]
b9 M- l, @, A) h& d9 g, _
# Q+ V6 B+ x$ mMessage from syslogd@compute1 at Dec 13 17:57:05 ...$ n0 t d, v; c* a8 X- L, R, C
kernel:NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:0:11804]
( L" q* m1 V; s. t( z/ R8 }& p6 j d6 v! L* S) h% N( Q& l7 R
Message from syslogd@compute1 at Dec 13 17:57:17 ...( p4 t0 Y7 [3 x! a4 F- M2 _% d) ]; P
kernel:NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ksoftirqd/16:89]& u" G% E7 e9 n8 X* j0 b
7 S( [: L- Z" I! @/ ?5 k
( t# \7 a, J; v1 d$ e4 c9 r& w* Z
kernel:NMI watchdog: BUG: soft lockup - CPU#34 stuck for 22s!
7 K, t$ ?1 b: r- p4 g" E! P) s解决:
, B' E) R$ y0 N7 c' ]( g, C0 S/ g, w6 m9 T8 K
echo 30 > /proc/sys/kernel/watchdog_thresh& x/ b! o3 m( S8 {) v, i: e
#追加到配置文件去
) g% D9 M% H6 A6 S) P) ^( A5 d+ Ktail -l /proc/sys/kernel/watchdog_thresh( L/ H6 b" g7 A) W) K
#查看确认
9 A! ?; I6 ^" t3 ]( O1 ysysctl -w kernel.watchdog_thresh=30' s. \- n& {9 p' |8 P
#临时生效4 o B7 ?" |* c. O8 z8 i
. J# P7 }: e. Y0 M" M. l8 K
9 T* x. q/ c% S" j: c5 ?. r解决办法:0 y7 u% Z0 b& i! w9 e0 H
`4 J/ ^! {, d' m6 o! T2 n/ }#追加到配置文件中
5 Q% Z' Q+ b; f$ x) `) O) H' N3 t
# Q1 f1 l) s! N2 W3 R2 qecho 30 > /proc/sys/kernel/watchdog_thresh
2 \# X& e Y$ l$ p
/ i" f( j/ j2 t% j" O# ^% Y. j! ]8 @#查看/ l1 r( a) d) [$ j* i k$ p: {
9 u# [* C4 c5 i1 u
[root@git-node1 data]# tail -1 /proc/sys/kernel/watchdog_thresh
* W1 |5 @5 I1 E- O% Y% {30. S; H# @" G0 x% o) a& i x
- }) C8 v! d: U1 A
#临时生效8 q' S3 W8 M5 m
+ a/ N. [ w- Q/ L1 y
sysctl -w kernel.watchdog_thresh=305 p# ]9 w. b! A$ b3 f) Q
9 \3 ^! R. }2 u" A' c
" H4 X' ?' \& Y' w5 d; @0 R4 z' V
3 D. Q. d+ ~6 G$ B" I#内核软死锁(soft lockup)bug原因分析% B1 V% P' r' A
7 K" W+ M. P- G) @, e% r$ {4 ZSoft lockup名称解释:所谓,soft lockup就是说,这个bug没有让系统彻底死机,但是若干个进程(或者kernel thread)被锁死在了某个状态(一般在内核区域),很多情况下这个是由于内核锁的使用的问题。
1 C6 v* j. E* `9 q1 ^% \
( P; [& X. i# T* k4 K
# h. ~+ Z9 x1 W2 ]9 W. L5 M
|) R* t# q6 |1 b5 N3 ?! `+ w avim /etc/sysctl.conf
: P/ W1 J$ z8 K
3 u- J* {: g8 ^: _2 e5 c6 qkernel.watchdog_thresh=305 v P- O; N' o& `" i
" q0 l) m. K3 [" `" k! t. C; C- O/ b- p+ a; v! u' @
|
|