博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
CPU问题导致的大量进程崩溃问题
阅读量:6227 次
发布时间:2019-06-21

本文共 6206 字,大约阅读时间需要 20 分钟。

昨天刚收到一个故障机,现象是复重启。

从日志中可以看到surfaceflinger一直在NE,如:

pid: 17522, tid: 17522, name: surfaceflinger >>> /system/bin/surfaceflinger <<

 

pc附近的指令为:

code around pc:    0000007f989d2680 97fc1b75d2817802 911be2b0912ce26f  .x..u...o.,.....    0000007f989d2690 9280000192800000 a90005e0911ed2b1  ................    0000007f989d26a0 a90205e0a90105e0 a90405e0a90305e0  ................    0000007f989d26b0 a90605e0a90505e0 a9000600a90705e0  ................    0000007f989d26c0 a9020600a9010600 a9040600a9030600  ................    0000007f989d26d0 a9060600a9050600 aa1103e3a9070600  ................    0000007f989d26e0 b957e660f94bce72 f90bce618b000241  r.K.`.W.A...a...    0000007f989d26f0 110007c2885ffc7e 35ffffa48804fc62  ~._.....b......5    0000007f989d2700 36000185395eb2a5 aa1303e0f9400276  ..^9...6v.@.....    0000007f989d2710 d63f0260f9400ad3 2a1503e02a0003f5  ..@.`.?....*...*    0000007f989d2720 a94153f3f9401bf7 a8c47bfda9425bf5  ..@..SA..[B..{..    0000007f989d2730 b957e263d65f03c0 51000c6652800009  .._.c.W....Rf..Q    0000007f989d2740 54000689710070df b940168b79402a8a  .p.q...T.*@y..@.    0000007f989d2750 331b0d8bd345214c 721f05bf53001d6d  L!E....3m..S...r    0000007f989d2760 52a1800654000481 52a0800452a10007  ...T...R...R...R    0000007f989d2770 7100099f52800008 7100119f540004a0  ...R...q...T...q

 

用工具解析成arm指令:

7f989d2680:	d2817802 	mov	x2, #0xbc0                 	// #3008    7f989d2684:	97fc1b75 	bl	0x7f988d9458    7f989d2688:	912ce26f 	add	x15, x19, #0xb38    7f989d268c:	911be2b0 	add	x16, x21, #0x6f8    7f989d2690:	92800000 	mov	x0, #0xffffffffffffffff    	// #-1    7f989d2694:	92800001 	mov	x1, #0xffffffffffffffff    	// #-1    7f989d2698:	911ed2b1 	add	x17, x21, #0x7b4    7f989d269c:	a90005e0 	stp	x0, x1, [x15]    7f989d26a0:	a90105e0 	stp	x0, x1, [x15,#16]    7f989d26a4:	a90205e0 	stp	x0, x1, [x15,#32]    7f989d26a8:	a90305e0 	stp	x0, x1, [x15,#48]    7f989d26ac:	a90405e0 	stp	x0, x1, [x15,#64]    7f989d26b0:	a90505e0 	stp	x0, x1, [x15,#80]    7f989d26b4:	a90605e0 	stp	x0, x1, [x15,#96]    7f989d26b8:	a90705e0 	stp	x0, x1, [x15,#112]    7f989d26bc:	a9000600 	stp	x0, x1, [x16]    7f989d26c0:	a9010600 	stp	x0, x1, [x16,#16]    7f989d26c4:	a9020600 	stp	x0, x1, [x16,#32]    7f989d26c8:	a9030600 	stp	x0, x1, [x16,#48]    7f989d26cc:	a9040600 	stp	x0, x1, [x16,#64]    7f989d26d0:	a9050600 	stp	x0, x1, [x16,#80]    7f989d26d4:	a9060600 	stp	x0, x1, [x16,#96]    7f989d26d8:	a9070600 	stp	x0, x1, [x16,#112]    7f989d26dc:	aa1103e3 	mov	x3, x17    7f989d26e0:	f94bce72 	ldr	x18, [x19,#6040]    7f989d26e4:	b957e660 	ldr	w0, [x19,#6116]    7f989d26e8:	8b000241 	add	x1, x18, x0    7f989d26ec:	f90bce61 	str	x1, [x19,#6040]    7f989d26f0:	885ffc7e 	ldaxr	w30, [x3]    7f989d26f4:	110007c2 	add	w2, w30, #0x1    7f989d26f8:	8804fc62 	stlxr	w4, w2, [x3]    7f989d26fc:	35ffffa4 	cbnz	w4, 0x7f989d26f0    7f989d2700:	395eb2a5 	ldrb	w5, [x21,#1964]    7f989d2704:	36000185 	tbz	w5, #0, 0x7f989d2734    7f989d2708:	f9400276 	ldr	x22, [x19]    7f989d270c:	aa1303e0 	mov	x0, x19    7f989d2710:	f9400ad3 	ldr	x19, [x22,#16]    7f989d2714:	d63f0260 	blr	x19    7f989d2718:	2a0003f5 	mov	w21, w0    7f989d271c:	2a1503e0 	mov	w0, w21    7f989d2720:	f9401bf7 	ldr	x23, [sp,#48]    7f989d2724:	a94153f3 	ldp	x19, x20, [sp,#16]    7f989d2728:	a9425bf5 	ldp	x21, x22, [sp,#32]    7f989d272c:	a8c47bfd 	ldp	x29, x30, [sp],#64    7f989d2730:	d65f03c0 	ret    7f989d2734:	b957e263 	ldr	w3, [x19,#6112]    7f989d2738:	52800009 	mov	w9, #0x0                   	// #0    7f989d273c:	51000c66 	sub	w6, w3, #0x3    7f989d2740:	710070df 	cmp	w6, #0x1c    7f989d2744:	54000689 	b.ls	0x7f989d2814    7f989d2748:	79402a8a 	ldrh	w10, [x20,#20]    7f989d274c:	b940168b 	ldr	w11, [x20,#20]    7f989d2750:	d345214c 	ubfx	x12, x10, #5, #4    7f989d2754:	331b0d8b 	bfi	w11, w12, #5, #4    7f989d2758:	53001d6d 	uxtb	w13, w11    7f989d275c:	721f05bf 	tst	w13, #0x6    7f989d2760:	54000481 	b.ne	0x7f989d27f0    7f989d2764:	52a18006 	mov	w6, #0xc000000             	// #201326592    7f989d2768:	52a10007 	mov	w7, #0x8000000             	// #134217728    7f989d276c:	52a08004 	mov	w4, #0x4000000             	// #67108864    7f989d2770:	52800008 	mov	w8, #0x0                   	// #0    7f989d2774:	7100099f 	cmp	w12, #0x2    7f989d2778:	540004a0 	b.eq	0x7f989d280c    7f989d277c:	7100119f 	cmp	w12, #0x4

 

出问题的指令是:

7f989d26a0:	a90105e0 	stp	x0, x1, [x15,#16]

 

此时x15的值是:0x0000007f977d4738

上面这条指令将x0值写入0x0000007f977d4748,x1值写入0x0000007f977d4750

出错的地址是0x0000007d977d4750,看起来是将x1写入0x0000007f977d4750时,地址突然变成了0x0000007d977d4750导致的FC。

0x0000007f977d4750

0x0000007d977d4750

这两个值就差一个bit,单条指令出这种异常,基本能确定是CPU问题。

通过x15指向的内存值也能够证明上面的推测:

memory near x15:    0000007f977d4718 0000000000000000 0000000000000000  ................    0000007f977d4728 0000000000000000 0000000000000000  ................    0000007f977d4738 ffffffffffffffff ffffffffffffffff  ................    0000007f977d4748 ffffffffffffffff 0000000000000000  ................    0000007f977d4758 0000000000000000 0000000000000000  ................    0000007f977d4768 0000000000000000 0000000000000000  ................    0000007f977d4778 0000000000000000 0000000000000000  ................    0000007f977d4788 0000000000000000 0000000000000000  ................    0000007f977d4798 0000000000000000 0000000000000000  ................    0000007f977d47a8 0000000000000000 0000000000000000  ................    0000007f977d47b8 0000000000000000 0000000000000000  ................    0000007f977d47c8 0000000000000000 0000000000000000  ................    0000007f977d47d8 0000000000000000 0000000000000000  ................    0000007f977d47e8 0000000000000000 0000000000000000  ................    0000007f977d47f8 0000000000000000 0000000000000000  ................    0000007f977d4808 0000000000000000 0000000000000000  ................

 

0x0000007f977d4748里的值已经更新为x0值,但0x0000007f977d4750里的值确不是x1值。

一条指令执行过程中x15值变化了,能说明什么呢?

 

同时有其他进程的FC,现象都是寄存器值正确,但读到的值有个别位异常。

列出其中部分出问题的指令如下:

    7f98fd515c: a90039ed stp x13, x14, [x15]

    7f930c8c74: a94051f3 ldp x19, x20, [x15]

    7f81be5234: 3dc001e3 ldr q3, [x15]

    7f914b815c: a90039ed stp x13, x14, [x15]

    7f856c3d7c: b9001ded str w13, [x15,#28]

发现都是从x15指向的内存读写数据时出的错。

软件无法解释,给高通报个bug吧

 

转载于:https://www.cnblogs.com/YYPapa/p/7125283.html

你可能感兴趣的文章
(转)BT1120接口及协议
查看>>
Robot Framework与Web界面自动化测试学习笔记:定位到新窗口
查看>>
The Dataflow Model 论文
查看>>
Linux守护进程
查看>>
遇到没“人性”的管理:你真可怜!
查看>>
http://www.bootcss.com/p/font-awesome/
查看>>
新浪微博UWP UI意见征求
查看>>
使用ServiceStack构建Web服务
查看>>
Linqer工具
查看>>
table中超过长度的列,显示省略号
查看>>
Qtcreator中经常使用快捷键总结
查看>>
可扩展Web架构与分布式系统(转)
查看>>
KVM虚拟机的安装
查看>>
【转】PHP中require和include路径问题总结
查看>>
Android 监听apk安装替换卸载广播
查看>>
指针之——一级二级多级指针
查看>>
AndroidStudio遇到过的问题
查看>>
MySQL整体架构与内存结构
查看>>
线上centos6出现软死锁 kernel:BUG: soft lockup
查看>>
pl/sql developer 自动输入替换 光标自动定位
查看>>