参考文章:Linux内核态内存泄漏检测工具--kmemleak工具原理及应用_linux 内存泄漏检测工具-CSDN博客
细说|Linux 内存泄漏检测实现原理与实现_内核_指针_信息
kmemleak原理:看网上说大概原理是在通过kmalloc,vmalloc,kmem_cache_alloc等函数申请的时候,把申请的内存地址、内存大小,stack trace等信息记录下来。在内存释放的时候去将记录的信息删除掉。这样就能知道了哪些内存没有被释放掉,同时也能知道这些未是否的内存究竟是哪些人申请的。
那么如何能够确认这些未释放的内存是泄漏,而不是其他人正在使用的呢。kmemleak认为如果这个内存没有泄漏,那么内存中一定存在一个值去记录这个未释放内存块的地址。所以基于这个假设,kmemleak会定期去内存里面扫描,看内存里面是否会存在这些地址。如果不存在,则认为这个内存块泄漏了。但是当内存中恰好有个值和内存块地址相同。那按照上面的逻辑就会出现漏报,另外如何申请的内存块的地址并未直接进行保存,是通过多个数字计算得到。那么这种情况下会出现误报的情况。
下面看看代码是怎么实现的(以vmalloc为例)
内存申请记录:vmalloc->__vmalloc-->__vmalloc_node->__vmalloc_node_range->kmemleak_vmalloc
void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
{
....................................
kmemleak_vmalloc(area, size, gfp_mask);
......................
fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure: %lu bytes", real_size);
return NULL;
}
void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp)
{
........................................
if (kmemleak_enabled) {
create_object((unsigned long)area->addr, size, 2, gfp);
object_set_excess_ref((unsigned long)area,
(unsigned long)area->addr);
}
}
create_object:对于每个申请的内存块,创建一个节点保存相关信息,加入到红黑树中
static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
int min_count, gfp_t gfp)
{
......................................
/* kernel backtrace */
object->trace_len = __save_stack_trace(object->trace);
write_lock_irqsave(&kmemleak_lock, flags);
untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
min_addr = min(min_addr, untagged_ptr);
max_addr = max(max_addr, untagged_ptr + size);
link = &object_tree_root.rb_node;
rb_parent = NULL;
while (*link) {
rb_parent = *link;
parent = rb_entry(rb_parent, struct kmemleak_object, rb_node);
if (ptr + size <= parent->pointer)
link = &parent->rb_node.rb_left;
else if (parent->pointer + parent->size <= ptr)
link = &parent->rb_node.rb_right;
else {
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
ptr);
/*
* No need for parent->lock here since "parent" cannot
* be freed while the kmemleak_lock is held.
*/
dump_object_info(parent);
kmem_cache_free(object_cache, object);
object = NULL;
goto out;
}
}
rb_link_node(&object->rb_node, rb_parent, link);
rb_insert_color(&object->rb_node, &object_tree_root);
list_add_tail_rcu(&object->object_list, &object_list);
out:
write_unlock_irqrestore(&kmemleak_lock, flags);
return object;
}
内存记录删除:vfree->kmemleak_free->delete_object_full。从树里面把节点删除
void vfree(const void *addr)
{
BUG_ON(in_nmi());
kmemleak_free(addr);
.................
}
void __ref kmemleak_free(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
delete_object_full((unsigned long)ptr);
}
static void delete_object_full(unsigned long ptr)
{
struct kmemleak_object *object;
object = find_and_remove_object(ptr, 0);
if (!object) {
#ifdef DEBUG
kmemleak_warn("Freeing unknown object at 0x%08lx\n",
ptr);
#endif
return;
}
__delete_object(object);
}
至此,申请和释放的内存都能被记录和删除了。如果存在内存泄漏,那么泄漏的地址一定在树里面。那么何时、如何检查这颗树呢?
扫描内核线程初始化
static int __init kmemleak_late_init(void)
{
kmemleak_initialized = 1;
.................................
if (IS_ENABLED(CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN)) {
mutex_lock(&scan_mutex);
start_scan_thread();
mutex_unlock(&scan_mutex);
}
pr_info("Kernel memory leak detector initialized (mem pool available: %d)\n",
mem_pool_free_count);
return 0;
}
late_initcall(kmemleak_late_init);
static void start_scan_thread(void)
{
if (scan_thread)
return;
scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak");
if (IS_ERR(scan_thread)) {
pr_warn("Failed to create the scan thread\n");
scan_thread = NULL;
}
}
static int kmemleak_scan_thread(void *arg)
{
static int first_run = IS_ENABLED(CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN);
pr_info("Automatic memory scanning thread started\n");
set_user_nice(current, 10);
/*
* Wait before the first scan to allow the system to fully initialize.
*/
if (first_run) {
signed long timeout = msecs_to_jiffies(SECS_FIRST_SCAN * 1000);
first_run = 0;
while (timeout && !kthread_should_stop())
timeout = schedule_timeout_interruptible(timeout);
}
while (!kthread_should_stop()) {
signed long timeout = jiffies_scan_wait;//这里是10分钟
mutex_lock(&scan_mutex);
kmemleak_scan();
mutex_unlock(&scan_mutex);
/* wait before the next scan */
while (timeout && !kthread_should_stop())
timeout = schedule_timeout_interruptible(timeout);
}
pr_info("Automatic memory scanning thread ended\n");
return 0;
}
kmemleak_scan:真正负责扫描红黑树的地方
参考文章细说|Linux 内存泄漏检测实现原理与实现_内核_指针_信息
白色节点:表示此对象没有被指针引用( count 字段少于 min_count 字段)。
灰色节点:表示此对象被一个或多个指针引用( count 字段大于或等于 min_count 字段)。
黑色节点:表示此对象不需要被扫描( min_count 字段等于 -1)。
static void kmemleak_scan(void)
{
unsigned long flags;
struct kmemleak_object *object;
int i;
int new_leaks = 0;
jiffies_last_scan = jiffies;
/* prepare the kmemleak_object's */
rcu_read_lock();
/* 将所有节点标记为白色节点 */
list_for_each_entry_rcu(object, &object_list, object_list) {
spin_lock_irqsave(&object->lock, flags);
#ifdef DEBUG
/*
* With a few exceptions there should be a maximum of
* 1 reference to any object at this point.
*/
if (atomic_read(&object->use_count) > 1) {
pr_debug("object->use_count = %d\n",
atomic_read(&object->use_count));
dump_object_info(object);
}
#endif
/* reset the reference count (whiten the object) */
object->count = 0;
if (color_gray(object) && get_object(object))
list_add_tail(&object->gray_list, &gray_list);
spin_unlock_irqrestore(&object->lock, flags);
}
rcu_read_unlock();
/* 扫描数据段 */
/* data/bss scanning */
scan_block(_sdata, _edata, NULL, 1);
scan_block(__bss_start, __bss_stop, NULL, 1);
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
for_each_possible_cpu(i)
scan_block(__per_cpu_start + per_cpu_offset(i),
__per_cpu_end + per_cpu_offset(i), NULL, 1);
#endif
/*
* Struct page scanning for each node.
*/
get_online_mems();
for_each_online_node(i) {
unsigned long start_pfn = node_start_pfn(i);
unsigned long end_pfn = node_end_pfn(i);
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page;
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
/* only scan if page is in use */
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL, 1);
}
}
put_online_mems();
/*
* Scanning the task stacks (may introduce false negatives).
*/
if (kmemleak_stack_scan) {
struct task_struct *p, *g;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
scan_block(task_stack_page(p), task_stack_page(p) +
THREAD_SIZE, NULL, 0);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
/*
* Scan the objects already referenced from the sections scanned
* above.
*/
/* 看网上意思是灰色节点里面可能也包含了指向内存块的指针,因此也需要扫描 */
scan_gray_list();
/*
* Check for new or unreferenced objects modified since the previous
* scan and color them gray until the next scan.
*/
rcu_read_lock();
/* 感觉是需要重新在来一下 */
list_for_each_entry_rcu(object, &object_list, object_list) {
spin_lock_irqsave(&object->lock, flags);
if (color_white(object) && (object->flags & OBJECT_ALLOCATED)
&& update_checksum(object) && get_object(object)) {
/* color it gray temporarily */
object->count = object->min_count;
list_add_tail(&object->gray_list, &gray_list);
}
spin_unlock_irqrestore(&object->lock, flags);
}
rcu_read_unlock();
/*
* Re-scan the gray list for modified unreferenced objects.
*/
scan_gray_list();
/*
* If scanning was stopped do not report any new unreferenced objects.
*/
if (scan_should_stop())
return;
/*
* Scanning result reporting.
*/
/* 最后白色节点就可能是出问题了 */
rcu_read_lock();
list_for_each_entry_rcu(object, &object_list, object_list) {
spin_lock_irqsave(&object->lock, flags);
if (unreferenced_object(object) &&
!(object->flags & OBJECT_REPORTED)) {
object->flags |= OBJECT_REPORTED;
new_leaks++;
}
spin_unlock_irqrestore(&object->lock, flags);
}
rcu_read_unlock();
if (new_leaks) {
kmemleak_found_leaks = true;
pr_info("%d new suspected memory leaks (see "
"/sys/kernel/debug/kmemleak)\n", new_leaks);
}
}
检查功能开启后重新编译内核即可
实际效果展示:
int test_thread(void* a)
{
int i = 0;
while (i < 100)
{
kmalloc(100, GFP_KERNEL);
++i;
}
return 0;
}
这里是主动触发扫描
触发扫描
[root@arm_test ]# echo scan > /sys/kernel/debug/kmemleak
查看扫描结果
[root@arm_test ]# cat /sys/kernel/debug/kmemleak
可以看到这块内存是在哪个进程,哪个函数申请的。如果它泄露了,就可以根据这些信息排查 (6b应该是开启了slub debug导致的)