一、背景
linux 内核出现内存类问题时,我们常用的调试工具就是kasan,kasan有三种模式:
1. Generic KASAN (这个就是我们最常用的,1 debug byte indicate 8 bytes use state, 对标用户层 asan)
2. Software Tag-Based KASAN (这个可以对标用户层 hwasan,仅64位生效)
3. Hardware Tag-Based KASAN (大名鼎鼎的MTE, 也是arm64 且硬件平台需要额外支持)
后面将对比三种不同的kasan 使用,实现原理以及使用案例
二、KASAN使能相关配置(Generic版本)
kasan相关config是否打开
/dev # zcat /proc/config.gz | grep -i kasan
CONFIG_KASAN_SHADOW_OFFSET=0xdfff800000000000 //这个offset怎来来的什么含义?下一节描述
CONFIG_DRIVER_KASAN_TEST=m
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y
CONFIG_HAVE_ARCH_KASAN_HW_TAGS=y
CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
CONFIG_CC_HAS_KASAN_GENERIC=y
CONFIG_CC_HAS_KASAN_SW_TAGS=y
CONFIG_KASAN=y
CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y
CONFIG_KASAN_GENERIC=y //标准版本kasan
# CONFIG_KASAN_SW_TAGS is not set
# CONFIG_KASAN_HW_TAGS is not set
CONFIG_KASAN_OUTLINE=y
# CONFIG_KASAN_INLINE is not set
CONFIG_KASAN_STACK=y //stack kasan检测,如局部变量,局部数组等操作引起的内存踩踏
CONFIG_KASAN_VMALLOC=y //vmalloc kasan检测,使用vmalloc申请内存的内存踩踏
这里使用的是普通版本 Generic KASAN
三、kasan基本原理
shadow byte 的值:
1~7 表示8byte可使用的情况
0表示8byte均可使用
其他值表示无法使用,常见的shadow byte值可以看mm/kasan/kasan.h定义:
#ifdef CONFIG_KASAN_GENERIC
#define KASAN_PAGE_FREE 0xFF /* freed page */
#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocation */
#define KASAN_SLAB_REDZONE 0xFC /* redzone for slab object */
#define KASAN_SLAB_FREE 0xFB /* freed slab object */
#define KASAN_VMALLOC_INVALID 0xF8 /* inaccessible space in vmap area */
#define KASAN_SLAB_FREETRACK 0xFA /* freed slab object with free track */
#define KASAN_GLOBAL_REDZONE 0xF9 /* redzone for global variable */
/* Stack redzone shadow values. Compiler ABI, do not change. */
#define KASAN_STACK_LEFT 0xF1
#define KASAN_STACK_MID 0xF2
#define KASAN_STACK_RIGHT 0xF3
#define KASAN_STACK_PARTIAL 0xF4
/* alloca redzone shadow values. */
#define KASAN_ALLOCA_LEFT 0xCA
#define KASAN_ALLOCA_RIGHT 0xCB
下图是arm64 48位 pagesize 4K的内存映射图,正好32TB映射整个内核空间。
前面一节遗留一个问题,CONFIG_KASAN_SHADOW_OFFSET=0xdfff800000000000
这个shadowoffset 是用来看什么的?这个实际是根据我们内核虚拟地址位数,kasan模式计算而来的
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC || KASAN_SW_TAGS
default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
default 0xfffffffffffffff
计算方法是:
CONFIG_KASAN_SHADOW_OFFSET= KASAN_SHADOW_START - KERNEL_ADDR_START >>3
= 0xffff600000000000 - ( 0xffff000000000000 >> 3) = 0xdfff800000000000
有了这个kasan_shadow_offset, 后面我们需要获取一个内核地址对应的shadow 位置,只需要通过公式:
kernel_addr >> 3 + CONFIG_KASAN_SHADOW_OFFSET = kernel_addr对应的shadow_addr
四、利用 test driver程序验证
下面是一个简易的测试用例,用来测试kmalloc、page、全局变量、stack变量和vmalloc的内存踩踏
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <asm/page.h>
#include <linux/vmalloc.h>
#include "../../../mm/kasan/kasan.h"
int global_kasan_value[2] = {996, 007};
struct kasan_test_type {
int type;
};
static struct kasan_test_type *gptr = NULL;
enum kasan_test_case{
slab_out_of_bounds = 0,
page_out_of_bounds = 1,
global_out_of_bounds = 2,
stack_out_of_bounds = 3,
use_after_free = 4,
vmalloc_out_of_bounds = 5,
alloca_out_of_bounds = 6,
};
static void kmalloc_oob_right(size_t size, int write_offset)
{
char *ptr;
ptr = kmalloc(size, GFP_KERNEL);
pr_info("%s %llx\n", __func__, (unsigned long long)ptr);
ptr[size - 1 + write_offset] = 'y';
kfree(ptr);
}
static void global_oob_left(void)
{
pr_info("global arr oob access %d\n", global_kasan_value[2]);
}
static void pagealloc_oob_right(size_t order)
{
char *ptr;
struct page *pages;
size_t size = (1UL << (PAGE_SHIFT + order));
pages = alloc_pages(GFP_KERNEL, order);
ptr = page_address(pages);
pr_info("%s %llx\n", __func__, (unsigned long long)ptr);
ptr[0] = ptr[size];
free_pages((unsigned long)ptr, order);
}
static void pagealloc_uaf(size_t order)
{
char *ptr;
struct page *pages;
pages = alloc_pages(GFP_KERNEL, order);
ptr = page_address(pages);
pr_info("%s %llx", __func__, (unsigned long long)ptr);
free_pages((unsigned long)ptr, order);
pr_info("%s %d\n", __func__, ptr[0]);
}
static void vmalloc_oob(size_t size)
{
char *v_ptr;
v_ptr = vmalloc(size);
OPTIMIZER_HIDE_VAR(v_ptr);
pr_info("%s %llx", __func__, (unsigned long long)v_ptr);
/* Make sure in-bounds accesses are valid. */
v_ptr[0] = 0;
v_ptr[size - 1] = 0;
/* trigger oob access */
pr_info("%s %d\n", __func__, v_ptr[size]);
vfree(v_ptr);
}
static void kasan_stack_oob(void)
{
char stack_array[10];
/* See comment in kasan_global_oob_right. */
char *volatile array = stack_array;
char *p = &array[ARRAY_SIZE(stack_array) + 4];
pr_info("%s %d\n", __func__, *p);
}
static void kasan_test_case(int type)
{
//使能kasan多次检测,默认只上报一次
bool multishot = kasan_save_enable_multi_shot();
switch(type) {
case slab_out_of_bounds:
kmalloc_oob_right(128, 2); //alloc 128 byte and overwrite 2 offset
break;
case page_out_of_bounds:
pagealloc_oob_right(0);
break;
case global_out_of_bounds:
global_oob_left();
break;
case stack_out_of_bounds:
kasan_stack_oob();
break;
case use_after_free:
pagealloc_uaf(0);
break;
case vmalloc_out_of_bounds:
vmalloc_oob(2048);
break;
default :
pr_info("undef error type %d\n", type);
break;
}
kasan_restore_multi_shot(multishot);
pr_info("%s type %d\n", __func__, type);
}
static ssize_t kasan_testcase_write(struct file *filp, const char __user *buf,
size_t len, loff_t *off)
{
char *kbuf;
int ntcase;
kbuf = kmalloc(len + 1, GFP_KERNEL);
if (copy_from_user(kbuf, buf, len) != 0) {
pr_info("copy the buff failed \n");
goto done;
}
ntcase = simple_strtoul(kbuf, NULL, 0);
kasan_test_case(ntcase);
done:
return len;
}
static struct file_operations kasan_fops = {
.owner = THIS_MODULE,
.write = kasan_testcase_write,
.llseek = noop_llseek,
};
static struct miscdevice kasan_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "kasan_test",
.fops = &kasan_fops,
};
static int __init kasan_start(void)
{
int ret;
ret = misc_register(&kasan_misc);
if (ret < 0) {
printk(KERN_EMERG " kasan test register failed %d\n", ret);
return ret;
}
gptr = kzalloc(sizeof(struct kasan_test_type), GFP_KERNEL);
printk(KERN_INFO "kasan test register\n");
return 0;
}
static void __exit kasan_end(void)
{
misc_deregister(&kasan_misc);
}
MODULE_LICENSE("GPL");
MODULE_AUTHOR("geek");
MODULE_DESCRIPTION("A simple kasan test driver!");
MODULE_VERSION("0.1");
module_init(kasan_start);
module_exit(kasan_end);
五、内存踩踏testcase调试
触发kmalloc的out of bound访问
/dev # echo 0 > /dev/kasan_test
[ 4063.037612] kmalloc_oob_right ffff000006e57400
[ 4063.065278] ==================================================================
[ 4063.073081] BUG: KASAN: slab-out-of-bounds in kasan_testcase_write+0x170/0x4d8 [kasan_driver]
[ 4063.075812] Write of size 1 at addr ffff000006e57481 by task sh/179
[ 4063.076529]
[ 4063.077151] CPU: 5 PID: 179 Comm: sh Tainted: G B N 6.6.1-g3cba94c761ec-dirty #15
[ 4063.077902] Hardware name: linux,dummy-virt (DT)
[ 4063.078538] Call trace:
[ 4063.078926] dump_backtrace+0x90/0xe8
[ 4063.079771] show_stack+0x18/0x24
[ 4063.079971] dump_stack_lvl+0x48/0x60
[ 4063.080175] print_report+0xf8/0x5d8
[ 4063.080372] kasan_report+0xc4/0x108
[ 4063.080566] __asan_store1+0x60/0x6c
[ 4063.080783] kasan_testcase_write+0x170/0x4d8 [kasan_driver]
[ 4063.081141] vfs_write+0x158/0x45c
[ 4063.081492] ksys_write+0xd0/0x180
[ 4063.081835] __arm64_sys_write+0x44/0x58
[ 4063.082188] invoke_syscall+0x60/0x184
[ 4063.082550] el0_svc_common.constprop.0+0x78/0x13c
[ 4063.082955] do_el0_svc+0x30/0x40
[ 4063.083179] el0_svc+0x38/0x70
[ 4063.083351] el0t_64_sync_handler+0x120/0x12c
[ 4063.083553] el0t_64_sync+0x190/0x194
[ 4063.083853]
[ 4063.083982] Allocated by task 179:
[ 4063.084229] kasan_save_stack+0x3c/0x64
[ 4063.084559] kasan_set_track+0x2c/0x40
[ 4063.084757] kasan_save_alloc_info+0x24/0x34
[ 4063.084975] __kasan_kmalloc+0xb8/0xbc
[ 4063.085230] kmalloc_trace+0x48/0x5c
[ 4063.085438] kasan_testcase_write+0x154/0x4d8 [kasan_driver]
[ 4063.085758] vfs_write+0x158/0x45c
[ 4063.085965] ksys_write+0xd0/0x180
[ 4063.086155] __arm64_sys_write+0x44/0x58
[ 4063.086355] invoke_syscall+0x60/0x184
[ 4063.086556] el0_svc_common.constprop.0+0x78/0x13c
[ 4063.086790] do_el0_svc+0x30/0x40
[ 4063.086984] el0_svc+0x38/0x70
[ 4063.087168] el0t_64_sync_handler+0x120/0x12c
[ 4063.087385] el0t_64_sync+0x190/0x194
[ 4063.087600]
[ 4063.087749] The buggy address belongs to the object at ffff000006e57400
[ 4063.087749] which belongs to the cache kmalloc-128 of size 128
[ 4063.088269] The buggy address is located 1 bytes to the right of
[ 4063.088269] allocated 128-byte region [ffff000006e57400, ffff000006e57480)
[ 4063.088708]
[ 4063.088928] The buggy address belongs to the physical page:
[ 4063.089384] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x46e56
[ 4063.089990] head:(____ptrval____) order:1 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[ 4063.090330] flags: 0x3fffc0000000840(slab|head|node=0|zone=0|lastcpupid=0xffff)
[ 4063.090960] page_type: 0xffffffff()
[ 4063.091467] raw: 03fffc0000000840 ffff0000060028c0 dead000000000122 0000000000000000
[ 4063.091776] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[ 4063.092095] page dumped because: kasan: bad access detected
[ 4063.092322]
[ 4063.092443] Memory state around the buggy address:
[ 4063.092785] ffff000006e57380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.093109] ffff000006e57400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 4063.093404] >ffff000006e57480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.093690] ^
[ 4063.093906] ffff000006e57500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.094188] ffff000006e57580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.094470] ==================================================================
[ 4063.096441] kasan_test_case type 0
对应代码
static void kmalloc_oob_right(size_t size, int write_offset)
{
char *ptr;
ptr = kmalloc(size, GFP_KERNEL);
pr_info("%s %llx\n", __func__, (unsigned long long)ptr);
ptr[size - 1 + write_offset] = 'y'; //触发越界访问
......
对应汇编代码
0xffff80007dbf0174 <+336>: bl 0xffff800080321be4 <kmalloc_trace> //1.这里完成分配内存的shadow标记
0xffff80007dbf0178 <+340>: mov x2, x0
0xffff80007dbf017c <+344>: add x1, x22, #0x80
0xffff80007dbf0180 <+348>: mov x24, x0
0xffff80007dbf0184 <+352>: add x0, x22, #0xc0
0xffff80007dbf0188 <+356>: bl 0xffff800080154834 <_printk>
0xffff80007dbf018c <+360>: add x0, x24, #0x81
0xffff80007dbf0190 <+364>: bl 0xffff8000803b70d8 <__asan_store1> //2.这里来检查内存访问是否合法
0xffff80007dbf0194 <+368>: mov w1, #0x79 // #121
0xffff80007dbf0198 <+372>: strb w1, [x24, #129]
0xffff80007dbf019c <+376>: mov x0, x24
0xffff80007dbf01a0 <+380>: bl 0xffff800080322a8c <kfree>
这里实际分成两步:
a、在kmalloc时设置shadow标记;
b、在访问时根据指针操作的范围给kasan检查传入指针和长度的检查,对比tag标记确认指针操作是否合法
5.1.kmalloc时设置tag 标记分析:
__kmalloc
-->kmalloc_slab
-->__kasan_kmalloc
-->kasan_poison_last_granule
-->kasan_poison
Dump of assembler code for function kasan_poison_last_granule:
0xffff8000803b89ec <+0>: ands x2, x1, #0x7
0xffff8000803b89f0 <+4>: b.eq 0xffff8000803b8a08 <kasan_poison_last_granule+28> // b.none
0xffff8000803b89f4 <+8>: add x0, x0, x1
0xffff8000803b89f8 <+12>: mov x1, #0x800000000000 // 熟悉的0xdfff800000000000
0xffff8000803b89fc <+16>: movk x1, #0xdfff, lsl #48
0xffff8000803b8a00 <+20>: lsr x0, x0, #3
0xffff8000803b8a04 <+24>: strb w2, [x0, x1]
0xffff8000803b8a08 <+28>: ret
上面的代码完成ptr>>3 然后根据 size 长度,填充tag到shadow的地址:ptr>>3 + kasan_shadow_offset(0xdfff800000000000)
比如上面的kmalloc 128字节,指针值是0xffff000006e57400,我们查看它的shadow标记值,正好shadow值对应16个0(16*8 可用byte) :
0xffff000006e57400对应shadow值
5.2.内存访问时kasan是如何捕获异常
对应汇编代码
0xffff80007dbf0174 <+336>: bl 0xffff800080321be4 <kmalloc_trace> //1.这里完成分配内存的shadow标记
0xffff80007dbf0178 <+340>: mov x2, x0
0xffff80007dbf017c <+344>: add x1, x22, #0x80
0xffff80007dbf0180 <+348>: mov x24, x0
0xffff80007dbf0184 <+352>: add x0, x22, #0xc0
0xffff80007dbf0188 <+356>: bl 0xffff800080154834 <_printk>
0xffff80007dbf018c <+360>: add x0, x24, #0x81 //注意这里传入的0x81,表示指针访问长度
0xffff80007dbf0190 <+364>: bl 0xffff8000803b70d8 <__asan_store1> //2.这里来检查内存访问是否合法
0xffff80007dbf0194 <+368>: mov w1, #0x79 // #121
0xffff80007dbf0198 <+372>: strb w1, [x24, #129]
0xffff80007dbf019c <+376>: mov x0, x24
0xffff80007dbf01a0 <+380>: bl 0xffff800080322a8c <kfree>
__asan_store1实现:
Dump of assembler code for function __asan_store1:
0xffff8000803b70d8 <+0>: paciasp
0xffff8000803b70dc <+4>: stp x29, x30, [sp, #-16]!
0xffff8000803b70e0 <+8>: xpaclri
0xffff8000803b70e4 <+12>: mov x29, sp
0xffff8000803b70e8 <+16>: cmn x0, #0x1
0xffff8000803b70ec <+20>: b.cs 0xffff8000803b7128 <__asan_store1+80> // b.hs, b.nlast
0xffff8000803b70f0 <+24>: mov x2, #0xfffeffffffffffff // #-281474976710657
0xffff8000803b70f4 <+28>: cmp x0, x2
0xffff8000803b70f8 <+32>: b.ls 0xffff8000803b7128 <__asan_store1+80> // b.plast
0xffff8000803b70fc <+36>: lsr x3, x0, #3 // 1.x0指针右移3位后存放在x3
0xffff8000803b7100 <+40>: mov x2, #0x800000000000 // 2.X2 存储kasan_offset 0xdfff800000000000
0xffff8000803b7104 <+44>: movk x2, #0xdfff, lsl #48
0xffff8000803b7108 <+48>: ldrsb w2, [x3, x2] // 3.读取x3+x2地址的值,即tag值
0xffff8000803b710c <+52>: cbnz w2, 0xffff8000803b711c <__asan_store1+68>
0xffff8000803b7110 <+56>: ldp x29, x30, [sp], #16
0xffff8000803b7114 <+60>: autiasp
0xffff8000803b7118 <+64>: ret
0xffff8000803b711c <+68>: and w1, w0, #0x7 //4.取待访问指针访问长度的一字节访问长度
0xffff8000803b7120 <+72>: cmp w2, w1 //5.和shadow值做比较
0xffff8000803b7124 <+76>: b.gt 0xffff8000803b7110 <__asan_store1+56>
0xffff8000803b7128 <+80>: mov x3, x30
0xffff8000803b712c <+84>: mov w2, #0x1 // #1
0xffff8000803b7130 <+88>: mov x1, #0x1 // #1
0xffff8000803b7134 <+92>: bl 0xffff8000803b67a0 <kasan_report> //6、shadow允许访问长度<指针访问长度时触发异常
0xffff8000803b7138 <+96>: ldp x29, x30, [sp], #16
0xffff8000803b713c <+100>: autiasp
0xffff8000803b7140 <+104>: ret
1、传入指针和长度后,指针操作范围计算shadow存放地址
ptr >> 3 + kasan_offset
2、从shadow存放地址取出shadow值,然后和访问长度比较(转换单byte范围)
3、比如这里测试用例是分配128, 访问128+2 位置, 转换成地址(char*)index就是0x81
(gdb) x /30b 0xFFFF600000DCAE80
0xffff600000dcae80: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0xffff600000dcae88: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0xffff600000dcae90: [0xfc] 0xfc 0xfc 0xfc 0xfc 0xfc 0xfc 0xfc
0xffff600000dcae98: 0xfc 0xfc 0xfc 0xfc 0xfc 0xfc
shadow值0xffff600000dcae80对应地址0xffff000006e57400;
shadow值0xffff600000dcae90就是0xffff000006e57400 + 0x81 ,
4、读取的地址0xffff000006e57400 + 0x81:
要求这里的shadow值0~7, 但是实际是0xfc(KASAN_SLAB_REDZONE),所以触发 kasan_report
其他测试用例(由于实现原理类似,不逐一展开分析):
触发page 内存踩踏
/dev # echo 1 > kasan_test
[ 47.775781] pagealloc_oob_right ffff000004265000
[ 47.776110] ==================================================================
[ 47.777583] BUG: KASAN: use-after-free in kasan_testcase_write+0x3e0/0x4d8 [kasan_driver]
[ 47.780457] Read of size 1 at addr ffff000004266000 by task sh/179
[ 47.781456]
[ 47.782662] CPU: 1 PID: 179 Comm: sh Tainted: G N 6.6.1-g3cba94c761ec-dirty #15
[ 47.783727] Hardware name: linux,dummy-virt (DT)
[ 47.784470] Call trace:
[ 47.784783] dump_backtrace+0x90/0xe8
[ 47.785203] show_stack+0x18/0x24
[ 47.785515] dump_stack_lvl+0x48/0x60
[ 47.785785] print_report+0xf8/0x5d8
[ 47.786054] kasan_report+0xc4/0x108
[ 47.786303] __asan_load1+0x60/0x6c
[ 47.786806] kasan_testcase_write+0x3e0/0x4d8 [kasan_driver]
[ 47.787390] vfs_write+0x158/0x45c
[ 47.787656] ksys_write+0xd0/0x180
[ 47.787884] __arm64_sys_write+0x44/0x58
[ 47.788165] invoke_syscall+0x60/0x184
[ 47.788442] el0_svc_common.constprop.0+0x78/0x13c
[ 47.788761] do_el0_svc+0x30/0x40
[ 47.789029] el0_svc+0x38/0x70
[ 47.789214] el0t_64_sync_handler+0x120/0x12c
[ 47.789417] el0t_64_sync+0x190/0x194
[ 47.789708]
[ 47.789900] The buggy address belongs to the physical page:
[ 47.790263] page:(____ptrval____) refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x44266
[ 47.790795] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[ 47.791171] page_type: 0xffffffff()
[ 47.791590] raw: 03fffc0000000000 fffffc00001099c8 ffff00006af4d758 0000000000000000
[ 47.791876] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 47.792185] page dumped because: kasan: bad access detected
[ 47.792400]
[ 47.792513] Memory state around the buggy address:
[ 47.792842] ffff000004265f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 47.793129] ffff000004265f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 47.793394] >ffff000004266000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 47.793694] ^
[ 47.793896] ffff000004266080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 47.794152] ffff000004266100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 47.794554] ==================================================================
[ 47.795106] Disabling lock debugging due to kernel taint
[ 47.795450] kasan_test_case type 1
触发全局变量内存踩踏
/dev # echo 2 > kasan_test
[ 54.484659] ==================================================================
[ 54.484983] BUG: KASAN: global-out-of-bounds in kasan_testcase_write+0x2c0/0x4d8 [kasan_driver]
[ 54.485402] Read of size 4 at addr ffff80007dbf20a8 by task sh/179
[ 54.485638]
[ 54.485772] CPU: 1 PID: 179 Comm: sh Tainted: G B N 6.6.1-g3cba94c761ec-dirty #15
[ 54.486069] Hardware name: linux,dummy-virt (DT)
[ 54.486249] Call trace:
[ 54.486380] dump_backtrace+0x90/0xe8
[ 54.486575] show_stack+0x18/0x24
[ 54.486744] dump_stack_lvl+0x48/0x60
[ 54.486930] print_report+0x318/0x5d8
[ 54.487113] kasan_report+0xc4/0x108
[ 54.487293] __asan_load4+0x9c/0xb8
[ 54.487473] kasan_testcase_write+0x2c0/0x4d8 [kasan_driver]
[ 54.487754] vfs_write+0x158/0x45c
[ 54.487937] ksys_write+0xd0/0x180
[ 54.488108] __arm64_sys_write+0x44/0x58
[ 54.488294] invoke_syscall+0x60/0x184
[ 54.488484] el0_svc_common.constprop.0+0x78/0x13c
[ 54.488698] do_el0_svc+0x30/0x40
[ 54.488876] el0_svc+0x38/0x70
[ 54.489044] el0t_64_sync_handler+0x120/0x12c
[ 54.489244] el0t_64_sync+0x190/0x194
[ 54.489431]
[ 54.489583] The buggy address belongs to the variable:
[ 54.489776] global_kasan_value+0x8/0xffffffffffffef60 [kasan_driver]
[ 54.490085]
[ 54.490190] Memory state around the buggy address:
[ 54.490382] ffff80007dbf1f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[ 54.490637] ffff80007dbf2000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9
[ 54.490893] >ffff80007dbf2080: f9 f9 f9 f9 00 f9 f9 f9 f9 f9 f9 f9 00 00 00 00
[ 54.491166] ^
[ 54.491356] ffff80007dbf2100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 54.491609] ffff80007dbf2180: 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 f9
[ 54.491856] ==================================================================
[ 54.492485] global arr oob access 0
[ 54.492722] kasan_test_case type 2
触发stack内存踩踏
/dev # echo 3 > kasan_test
[ 75.450592] ==================================================================
[ 75.452056] BUG: KASAN: stack-out-of-bounds in kasan_testcase_write+0x414/0x4d8 [kasan_driver]
[ 75.454159] Read of size 1 at addr ffff8000873b7b1e by task sh/179
[ 75.455514]
[ 75.456157] CPU: 1 PID: 179 Comm: sh Tainted: G B N 6.6.1-g3cba94c761ec-dirty #15
[ 75.457488] Hardware name: linux,dummy-virt (DT)
[ 75.458119] Call trace:
[ 75.458581] dump_backtrace+0x90/0xe8
[ 75.459060] show_stack+0x18/0x24
[ 75.459488] dump_stack_lvl+0x48/0x60
[ 75.459950] print_report+0x318/0x5d8
[ 75.460412] kasan_report+0xc4/0x108
[ 75.460872] __asan_load1+0x60/0x6c
[ 75.461068] kasan_testcase_write+0x414/0x4d8 [kasan_driver]
[ 75.461358] vfs_write+0x158/0x45c
[ 75.461550] ksys_write+0xd0/0x180
[ 75.461719] __arm64_sys_write+0x44/0x58
[ 75.461904] invoke_syscall+0x60/0x184
[ 75.462092] el0_svc_common.constprop.0+0x78/0x13c
[ 75.462328] do_el0_svc+0x30/0x40
[ 75.462500] el0_svc+0x38/0x70
[ 75.462816] el0t_64_sync_handler+0x120/0x12c
[ 75.463091] el0t_64_sync+0x190/0x194
[ 75.463336]
[ 75.463560] The buggy address belongs to stack of task sh/179
[ 75.463929] and is located at offset 142 in frame:
[ 75.464205] kasan_testcase_write+0x0/0x4d8 [kasan_driver]
[ 75.464666]
[ 75.464913] This frame has 4 objects:
[ 75.465338] [48, 52) 'i'
[ 75.465413] [64, 72) 'array'
[ 75.465635] [96, 104) 'array'
[ 75.465813] [128, 138) 'stack_array'
[ 75.465977]
[ 75.466241] The buggy address belongs to the virtual mapping at
[ 75.466241] [ffff8000873b0000, ffff8000873b9000) created by:
[ 75.466241] kernel_clone+0xb4/0x470
[ 75.466756]
[ 75.466968] The buggy address belongs to the physical page:
[ 75.467185] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4ca7c
[ 75.467501] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[ 75.467743] page_type: 0xffffffff()
[ 75.467923] raw: 03fffc0000000000 0000000000000000 dead000000000122 0000000000000000
[ 75.468199] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[ 75.468459] page dumped because: kasan: bad access detected
[ 75.468660]
[ 75.468764] Memory state around the buggy address:
[ 75.468955] ffff8000873b7a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 75.469214] ffff8000873b7a80: 00 00 f1 f1 f1 f1 f1 f1 04 f2 00 f2 f2 f2 00 f2
[ 75.469478] >ffff8000873b7b00: f2 f2 00 02 f3 f3 00 00 00 00 00 00 00 00 00 00
[ 75.469725] ^
[ 75.469903] ffff8000873b7b80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00
[ 75.470158] ffff8000873b7c00: 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 f3 f3 f3 f3
[ 75.470407] ==================================================================
[ 75.470858] kasan_stack_oob 0
[ 75.471036] kasan_test_case type 3
触发page use after free
/dev # echo 4 > kasan_test
[ 80.572006] pagealloc_uaf ffff000004265000
[ 80.572276] ==================================================================
[ 80.573408] BUG: KASAN: use-after-free in kasan_testcase_write+0x288/0x4d8 [kasan_driver]
[ 80.574439] Read of size 1 at addr ffff000004265000 by task sh/179
[ 80.575262]
[ 80.575562] CPU: 1 PID: 179 Comm: sh Tainted: G B N 6.6.1-g3cba94c761ec-dirty #15
[ 80.576651] Hardware name: linux,dummy-virt (DT)
[ 80.577286] Call trace:
[ 80.577887] dump_backtrace+0x90/0xe8
[ 80.578659] show_stack+0x18/0x24
[ 80.579220] dump_stack_lvl+0x48/0x60
[ 80.579548] print_report+0xf8/0x5d8
[ 80.579839] kasan_report+0xc4/0x108
[ 80.580055] __asan_load1+0x60/0x6c
[ 80.580236] kasan_testcase_write+0x288/0x4d8 [kasan_driver]
[ 80.580523] vfs_write+0x158/0x45c
[ 80.580706] ksys_write+0xd0/0x180
[ 80.580887] __arm64_sys_write+0x44/0x58
[ 80.581126] invoke_syscall+0x60/0x184
[ 80.581378] el0_svc_common.constprop.0+0x78/0x13c
[ 80.581653] do_el0_svc+0x30/0x40
[ 80.581893] el0_svc+0x38/0x70
[ 80.582130] el0t_64_sync_handler+0x120/0x12c
[ 80.582425] el0t_64_sync+0x190/0x194
[ 80.582701]
[ 80.582861] The buggy address belongs to the physical page:
[ 80.583170] page:(____ptrval____) refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x44265
[ 80.583687] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[ 80.584071] page_type: 0xffffffff()
[ 80.584354] raw: 03fffc0000000000 fffffc0000109988 ffff00006af4d758 0000000000000000
[ 80.584774] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 80.585195] page dumped because: kasan: bad access detected
[ 80.585532]
[ 80.585697] Memory state around the buggy address:
[ 80.586005] ffff000004264f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 80.586408] ffff000004264f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 80.586783] >ffff000004265000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 80.587041] ^
[ 80.587203] ffff000004265080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 80.587465] ffff000004265100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[ 80.587716] ==================================================================
[ 80.588370] pagealloc_uaf 204
[ 80.588569] kasan_test_case type 4
触发vmalloc内存踩踏
/dev # echo 5 > kasan_test
[ 86.262697] vmalloc_oob ffff800085bf5000
[ 86.262824] ==================================================================
[ 86.263246] BUG: KASAN: vmalloc-out-of-bounds in kasan_testcase_write+0x47c/0x4d8 [kasan_driver]
[ 86.263603] Read of size 1 at addr ffff800085bf5800 by task sh/179
[ 86.263816]
[ 86.263932] CPU: 5 PID: 179 Comm: sh Tainted: G B N 6.6.1-g3cba94c761ec-dirty #15
[ 86.264229] Hardware name: linux,dummy-virt (DT)
[ 86.264395] Call trace:
[ 86.264525] dump_backtrace+0x90/0xe8
[ 86.264706] show_stack+0x18/0x24
[ 86.264860] dump_stack_lvl+0x48/0x60
[ 86.265059] print_report+0x318/0x5d8
[ 86.265250] kasan_report+0xc4/0x108
[ 86.265434] __asan_load1+0x60/0x6c
[ 86.265627] kasan_testcase_write+0x47c/0x4d8 [kasan_driver]
[ 86.265921] vfs_write+0x158/0x45c
[ 86.266113] ksys_write+0xd0/0x180
[ 86.266287] __arm64_sys_write+0x44/0x58
[ 86.266476] invoke_syscall+0x60/0x184
[ 86.266672] el0_svc_common.constprop.0+0x78/0x13c
[ 86.266892] do_el0_svc+0x30/0x40
[ 86.267078] el0_svc+0x38/0x70
[ 86.267251] el0t_64_sync_handler+0x120/0x12c
[ 86.267456] el0t_64_sync+0x190/0x194
[ 86.267640]
[ 86.267757] The buggy address belongs to the virtual mapping at
[ 86.267757] [ffff800085bf5000, ffff800085bf7000) created by:
[ 86.267757] kasan_testcase_write+0x444/0x4d8 [kasan_driver]
[ 86.268317]
[ 86.268428] The buggy address belongs to the physical page:
[ 86.268644] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4cd8a
[ 86.268963] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[ 86.269271] page_type: 0xffffffff()
[ 86.269461] raw: 03fffc0000000000 0000000000000000 dead000000000122 0000000000000000
[ 86.269746] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[ 86.270008] page dumped because: kasan: bad access detected
[ 86.270209]
[ 86.270316] Memory state around the buggy address:
[ 86.270511] ffff800085bf5700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 86.270771] ffff800085bf5780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 86.271035] >ffff800085bf5800: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[ 86.271289] ^
[ 86.271470] ffff800085bf5880: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[ 86.271748] ffff800085bf5900: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[ 86.271997] ==================================================================
[ 86.272484] vmalloc_oob 0
[ 86.272779] kasan_test_case type 5
六、小结
kasan 的核心思想是简单的,复杂主要体现在编译器插桩实现(好在gcc/clang都已经支持了),在所有分配的部分需要完成shadow的存储,所有读写的地方加入指针访问地址长度的shadow值检查。我们业务使用是比较简单的,工程应用上需要注意的一些点就是:
1、打开kasan后kenerl会变大,需要考虑boot分区的大小限制(预先需要足够)
2、bootloader引导时也需要注意物理地址划分,以前也遇到过将后面rootfs(ramdisk)覆盖导致无法启动的情况
3、默认是kasan report只是内核打印一次(后续触发也不会上报),大量机器测试时需要人力或者自动化脚本检查,出现问题我们想看下上下文或者一些变量状态也不方便,实际业务中通常增加 cmdline: kasan.fault=panic,这样发生问题时能保存现场,测试/开发同事也能第一时间发现并分析。
参考资料:
KASAN实现原理
HWAddress Sanitizer | Android NDK | Android Developers
Arm Memory Tagging Extension (MTE) | Android NDK | Android Developers
Address Sanitizer | Android NDK | Android Developers
https://developer.android.google.cn/ndk/guides/memory-debug?hl=zh-cn
Kernel page table dump