xref: /linux/arch/arm/net/bpf_jit_32.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Just-In-Time compiler for BPF filters on 32bit ARM
3  *
4  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License.
9  */
10 
11 #include <linux/bitops.h>
12 #include <linux/compiler.h>
13 #include <linux/errno.h>
14 #include <linux/filter.h>
15 #include <linux/netdevice.h>
16 #include <linux/string.h>
17 #include <linux/slab.h>
18 #include <linux/if_vlan.h>
19 
20 #include <asm/cacheflush.h>
21 #include <asm/hwcap.h>
22 #include <asm/opcodes.h>
23 
24 #include "bpf_jit_32.h"
25 
26 /*
27  * ABI:
28  *
29  * r0	scratch register
30  * r4	BPF register A
31  * r5	BPF register X
32  * r6	pointer to the skb
33  * r7	skb->data
34  * r8	skb_headlen(skb)
35  */
36 
37 #define r_scratch	ARM_R0
38 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
39 #define r_off		ARM_R1
40 #define r_A		ARM_R4
41 #define r_X		ARM_R5
42 #define r_skb		ARM_R6
43 #define r_skb_data	ARM_R7
44 #define r_skb_hl	ARM_R8
45 
46 #define SCRATCH_SP_OFFSET	0
47 #define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
48 
49 #define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
50 #define SEEN_MEM_WORD(k)	(1 << (k))
51 #define SEEN_X			(1 << BPF_MEMWORDS)
52 #define SEEN_CALL		(1 << (BPF_MEMWORDS + 1))
53 #define SEEN_SKB		(1 << (BPF_MEMWORDS + 2))
54 #define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
55 
56 #define FLAG_NEED_X_RESET	(1 << 0)
57 #define FLAG_IMM_OVERFLOW	(1 << 1)
58 
59 struct jit_ctx {
60 	const struct bpf_prog *skf;
61 	unsigned idx;
62 	unsigned prologue_bytes;
63 	int ret0_fp_idx;
64 	u32 seen;
65 	u32 flags;
66 	u32 *offsets;
67 	u32 *target;
68 #if __LINUX_ARM_ARCH__ < 7
69 	u16 epilogue_bytes;
70 	u16 imm_count;
71 	u32 *imms;
72 #endif
73 };
74 
75 int bpf_jit_enable __read_mostly;
76 
77 static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
78 {
79 	u8 ret;
80 	int err;
81 
82 	err = skb_copy_bits(skb, offset, &ret, 1);
83 
84 	return (u64)err << 32 | ret;
85 }
86 
87 static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
88 {
89 	u16 ret;
90 	int err;
91 
92 	err = skb_copy_bits(skb, offset, &ret, 2);
93 
94 	return (u64)err << 32 | ntohs(ret);
95 }
96 
97 static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
98 {
99 	u32 ret;
100 	int err;
101 
102 	err = skb_copy_bits(skb, offset, &ret, 4);
103 
104 	return (u64)err << 32 | ntohl(ret);
105 }
106 
107 /*
108  * Wrapper that handles both OABI and EABI and assures Thumb2 interworking
109  * (where the assembly routines like __aeabi_uidiv could cause problems).
110  */
111 static u32 jit_udiv(u32 dividend, u32 divisor)
112 {
113 	return dividend / divisor;
114 }
115 
116 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
117 {
118 	inst |= (cond << 28);
119 	inst = __opcode_to_mem_arm(inst);
120 
121 	if (ctx->target != NULL)
122 		ctx->target[ctx->idx] = inst;
123 
124 	ctx->idx++;
125 }
126 
127 /*
128  * Emit an instruction that will be executed unconditionally.
129  */
130 static inline void emit(u32 inst, struct jit_ctx *ctx)
131 {
132 	_emit(ARM_COND_AL, inst, ctx);
133 }
134 
135 static u16 saved_regs(struct jit_ctx *ctx)
136 {
137 	u16 ret = 0;
138 
139 	if ((ctx->skf->len > 1) ||
140 	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
141 		ret |= 1 << r_A;
142 
143 #ifdef CONFIG_FRAME_POINTER
144 	ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
145 #else
146 	if (ctx->seen & SEEN_CALL)
147 		ret |= 1 << ARM_LR;
148 #endif
149 	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
150 		ret |= 1 << r_skb;
151 	if (ctx->seen & SEEN_DATA)
152 		ret |= (1 << r_skb_data) | (1 << r_skb_hl);
153 	if (ctx->seen & SEEN_X)
154 		ret |= 1 << r_X;
155 
156 	return ret;
157 }
158 
159 static inline int mem_words_used(struct jit_ctx *ctx)
160 {
161 	/* yes, we do waste some stack space IF there are "holes" in the set" */
162 	return fls(ctx->seen & SEEN_MEM);
163 }
164 
165 static inline bool is_load_to_a(u16 inst)
166 {
167 	switch (inst) {
168 	case BPF_LD | BPF_W | BPF_LEN:
169 	case BPF_LD | BPF_W | BPF_ABS:
170 	case BPF_LD | BPF_H | BPF_ABS:
171 	case BPF_LD | BPF_B | BPF_ABS:
172 		return true;
173 	default:
174 		return false;
175 	}
176 }
177 
178 static void jit_fill_hole(void *area, unsigned int size)
179 {
180 	u32 *ptr;
181 	/* We are guaranteed to have aligned memory. */
182 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
183 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
184 }
185 
186 static void build_prologue(struct jit_ctx *ctx)
187 {
188 	u16 reg_set = saved_regs(ctx);
189 	u16 first_inst = ctx->skf->insns[0].code;
190 	u16 off;
191 
192 #ifdef CONFIG_FRAME_POINTER
193 	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
194 	emit(ARM_PUSH(reg_set), ctx);
195 	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
196 #else
197 	if (reg_set)
198 		emit(ARM_PUSH(reg_set), ctx);
199 #endif
200 
201 	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
202 		emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
203 
204 	if (ctx->seen & SEEN_DATA) {
205 		off = offsetof(struct sk_buff, data);
206 		emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
207 		/* headlen = len - data_len */
208 		off = offsetof(struct sk_buff, len);
209 		emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
210 		off = offsetof(struct sk_buff, data_len);
211 		emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
212 		emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
213 	}
214 
215 	if (ctx->flags & FLAG_NEED_X_RESET)
216 		emit(ARM_MOV_I(r_X, 0), ctx);
217 
218 	/* do not leak kernel data to userspace */
219 	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
220 		emit(ARM_MOV_I(r_A, 0), ctx);
221 
222 	/* stack space for the BPF_MEM words */
223 	if (ctx->seen & SEEN_MEM)
224 		emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
225 }
226 
227 static void build_epilogue(struct jit_ctx *ctx)
228 {
229 	u16 reg_set = saved_regs(ctx);
230 
231 	if (ctx->seen & SEEN_MEM)
232 		emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
233 
234 	reg_set &= ~(1 << ARM_LR);
235 
236 #ifdef CONFIG_FRAME_POINTER
237 	/* the first instruction of the prologue was: mov ip, sp */
238 	reg_set &= ~(1 << ARM_IP);
239 	reg_set |= (1 << ARM_SP);
240 	emit(ARM_LDM(ARM_SP, reg_set), ctx);
241 #else
242 	if (reg_set) {
243 		if (ctx->seen & SEEN_CALL)
244 			reg_set |= 1 << ARM_PC;
245 		emit(ARM_POP(reg_set), ctx);
246 	}
247 
248 	if (!(ctx->seen & SEEN_CALL))
249 		emit(ARM_BX(ARM_LR), ctx);
250 #endif
251 }
252 
253 static int16_t imm8m(u32 x)
254 {
255 	u32 rot;
256 
257 	for (rot = 0; rot < 16; rot++)
258 		if ((x & ~ror32(0xff, 2 * rot)) == 0)
259 			return rol32(x, 2 * rot) | (rot << 8);
260 
261 	return -1;
262 }
263 
264 #if __LINUX_ARM_ARCH__ < 7
265 
266 static u16 imm_offset(u32 k, struct jit_ctx *ctx)
267 {
268 	unsigned i = 0, offset;
269 	u16 imm;
270 
271 	/* on the "fake" run we just count them (duplicates included) */
272 	if (ctx->target == NULL) {
273 		ctx->imm_count++;
274 		return 0;
275 	}
276 
277 	while ((i < ctx->imm_count) && ctx->imms[i]) {
278 		if (ctx->imms[i] == k)
279 			break;
280 		i++;
281 	}
282 
283 	if (ctx->imms[i] == 0)
284 		ctx->imms[i] = k;
285 
286 	/* constants go just after the epilogue */
287 	offset =  ctx->offsets[ctx->skf->len];
288 	offset += ctx->prologue_bytes;
289 	offset += ctx->epilogue_bytes;
290 	offset += i * 4;
291 
292 	ctx->target[offset / 4] = k;
293 
294 	/* PC in ARM mode == address of the instruction + 8 */
295 	imm = offset - (8 + ctx->idx * 4);
296 
297 	if (imm & ~0xfff) {
298 		/*
299 		 * literal pool is too far, signal it into flags. we
300 		 * can only detect it on the second pass unfortunately.
301 		 */
302 		ctx->flags |= FLAG_IMM_OVERFLOW;
303 		return 0;
304 	}
305 
306 	return imm;
307 }
308 
309 #endif /* __LINUX_ARM_ARCH__ */
310 
311 /*
312  * Move an immediate that's not an imm8m to a core register.
313  */
314 static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
315 {
316 #if __LINUX_ARM_ARCH__ < 7
317 	emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
318 #else
319 	emit(ARM_MOVW(rd, val & 0xffff), ctx);
320 	if (val > 0xffff)
321 		emit(ARM_MOVT(rd, val >> 16), ctx);
322 #endif
323 }
324 
325 static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
326 {
327 	int imm12 = imm8m(val);
328 
329 	if (imm12 >= 0)
330 		emit(ARM_MOV_I(rd, imm12), ctx);
331 	else
332 		emit_mov_i_no8m(rd, val, ctx);
333 }
334 
335 #if __LINUX_ARM_ARCH__ < 6
336 
337 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
338 {
339 	_emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
340 	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
341 	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
342 	_emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
343 	_emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
344 	_emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
345 	_emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
346 	_emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
347 }
348 
349 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
350 {
351 	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
352 	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
353 	_emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
354 }
355 
356 static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
357 {
358 	/* r_dst = (r_src << 8) | (r_src >> 8) */
359 	emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
360 	emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
361 
362 	/*
363 	 * we need to mask out the bits set in r_dst[23:16] due to
364 	 * the first shift instruction.
365 	 *
366 	 * note that 0x8ff is the encoded immediate 0x00ff0000.
367 	 */
368 	emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
369 }
370 
371 #else  /* ARMv6+ */
372 
373 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
374 {
375 	_emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
376 #ifdef __LITTLE_ENDIAN
377 	_emit(cond, ARM_REV(r_res, r_res), ctx);
378 #endif
379 }
380 
381 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
382 {
383 	_emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
384 #ifdef __LITTLE_ENDIAN
385 	_emit(cond, ARM_REV16(r_res, r_res), ctx);
386 #endif
387 }
388 
389 static inline void emit_swap16(u8 r_dst __maybe_unused,
390 			       u8 r_src __maybe_unused,
391 			       struct jit_ctx *ctx __maybe_unused)
392 {
393 #ifdef __LITTLE_ENDIAN
394 	emit(ARM_REV16(r_dst, r_src), ctx);
395 #endif
396 }
397 
398 #endif /* __LINUX_ARM_ARCH__ < 6 */
399 
400 
401 /* Compute the immediate value for a PC-relative branch. */
402 static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
403 {
404 	u32 imm;
405 
406 	if (ctx->target == NULL)
407 		return 0;
408 	/*
409 	 * BPF allows only forward jumps and the offset of the target is
410 	 * still the one computed during the first pass.
411 	 */
412 	imm  = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
413 
414 	return imm >> 2;
415 }
416 
417 #define OP_IMM3(op, r1, r2, imm_val, ctx)				\
418 	do {								\
419 		imm12 = imm8m(imm_val);					\
420 		if (imm12 < 0) {					\
421 			emit_mov_i_no8m(r_scratch, imm_val, ctx);	\
422 			emit(op ## _R((r1), (r2), r_scratch), ctx);	\
423 		} else {						\
424 			emit(op ## _I((r1), (r2), imm12), ctx);		\
425 		}							\
426 	} while (0)
427 
428 static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
429 {
430 	if (ctx->ret0_fp_idx >= 0) {
431 		_emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
432 		/* NOP to keep the size constant between passes */
433 		emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
434 	} else {
435 		_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
436 		_emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
437 	}
438 }
439 
440 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
441 {
442 #if __LINUX_ARM_ARCH__ < 5
443 	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
444 
445 	if (elf_hwcap & HWCAP_THUMB)
446 		emit(ARM_BX(tgt_reg), ctx);
447 	else
448 		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
449 #else
450 	emit(ARM_BLX_R(tgt_reg), ctx);
451 #endif
452 }
453 
454 static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
455 {
456 #if __LINUX_ARM_ARCH__ == 7
457 	if (elf_hwcap & HWCAP_IDIVA) {
458 		emit(ARM_UDIV(rd, rm, rn), ctx);
459 		return;
460 	}
461 #endif
462 
463 	/*
464 	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
465 	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
466 	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
467 	 * before using it as a source for ARM_R1.
468 	 *
469 	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
470 	 * ARM_R5 (r_X) so there is no particular register overlap
471 	 * issues.
472 	 */
473 	if (rn != ARM_R1)
474 		emit(ARM_MOV_R(ARM_R1, rn), ctx);
475 	if (rm != ARM_R0)
476 		emit(ARM_MOV_R(ARM_R0, rm), ctx);
477 
478 	ctx->seen |= SEEN_CALL;
479 	emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
480 	emit_blx_r(ARM_R3, ctx);
481 
482 	if (rd != ARM_R0)
483 		emit(ARM_MOV_R(rd, ARM_R0), ctx);
484 }
485 
486 static inline void update_on_xread(struct jit_ctx *ctx)
487 {
488 	if (!(ctx->seen & SEEN_X))
489 		ctx->flags |= FLAG_NEED_X_RESET;
490 
491 	ctx->seen |= SEEN_X;
492 }
493 
494 static int build_body(struct jit_ctx *ctx)
495 {
496 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
497 	const struct bpf_prog *prog = ctx->skf;
498 	const struct sock_filter *inst;
499 	unsigned i, load_order, off, condt;
500 	int imm12;
501 	u32 k;
502 
503 	for (i = 0; i < prog->len; i++) {
504 		u16 code;
505 
506 		inst = &(prog->insns[i]);
507 		/* K as an immediate value operand */
508 		k = inst->k;
509 		code = bpf_anc_helper(inst);
510 
511 		/* compute offsets only in the fake pass */
512 		if (ctx->target == NULL)
513 			ctx->offsets[i] = ctx->idx * 4;
514 
515 		switch (code) {
516 		case BPF_LD | BPF_IMM:
517 			emit_mov_i(r_A, k, ctx);
518 			break;
519 		case BPF_LD | BPF_W | BPF_LEN:
520 			ctx->seen |= SEEN_SKB;
521 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
522 			emit(ARM_LDR_I(r_A, r_skb,
523 				       offsetof(struct sk_buff, len)), ctx);
524 			break;
525 		case BPF_LD | BPF_MEM:
526 			/* A = scratch[k] */
527 			ctx->seen |= SEEN_MEM_WORD(k);
528 			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
529 			break;
530 		case BPF_LD | BPF_W | BPF_ABS:
531 			load_order = 2;
532 			goto load;
533 		case BPF_LD | BPF_H | BPF_ABS:
534 			load_order = 1;
535 			goto load;
536 		case BPF_LD | BPF_B | BPF_ABS:
537 			load_order = 0;
538 load:
539 			/* the interpreter will deal with the negative K */
540 			if ((int)k < 0)
541 				return -ENOTSUPP;
542 			emit_mov_i(r_off, k, ctx);
543 load_common:
544 			ctx->seen |= SEEN_DATA | SEEN_CALL;
545 
546 			if (load_order > 0) {
547 				emit(ARM_SUB_I(r_scratch, r_skb_hl,
548 					       1 << load_order), ctx);
549 				emit(ARM_CMP_R(r_scratch, r_off), ctx);
550 				condt = ARM_COND_HS;
551 			} else {
552 				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
553 				condt = ARM_COND_HI;
554 			}
555 
556 			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
557 			      ctx);
558 
559 			if (load_order == 0)
560 				_emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
561 				      ctx);
562 			else if (load_order == 1)
563 				emit_load_be16(condt, r_A, r_scratch, ctx);
564 			else if (load_order == 2)
565 				emit_load_be32(condt, r_A, r_scratch, ctx);
566 
567 			_emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
568 
569 			/* the slowpath */
570 			emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
571 			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
572 			/* the offset is already in R1 */
573 			emit_blx_r(ARM_R3, ctx);
574 			/* check the result of skb_copy_bits */
575 			emit(ARM_CMP_I(ARM_R1, 0), ctx);
576 			emit_err_ret(ARM_COND_NE, ctx);
577 			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
578 			break;
579 		case BPF_LD | BPF_W | BPF_IND:
580 			load_order = 2;
581 			goto load_ind;
582 		case BPF_LD | BPF_H | BPF_IND:
583 			load_order = 1;
584 			goto load_ind;
585 		case BPF_LD | BPF_B | BPF_IND:
586 			load_order = 0;
587 load_ind:
588 			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
589 			goto load_common;
590 		case BPF_LDX | BPF_IMM:
591 			ctx->seen |= SEEN_X;
592 			emit_mov_i(r_X, k, ctx);
593 			break;
594 		case BPF_LDX | BPF_W | BPF_LEN:
595 			ctx->seen |= SEEN_X | SEEN_SKB;
596 			emit(ARM_LDR_I(r_X, r_skb,
597 				       offsetof(struct sk_buff, len)), ctx);
598 			break;
599 		case BPF_LDX | BPF_MEM:
600 			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
601 			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
602 			break;
603 		case BPF_LDX | BPF_B | BPF_MSH:
604 			/* x = ((*(frame + k)) & 0xf) << 2; */
605 			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
606 			/* the interpreter should deal with the negative K */
607 			if ((int)k < 0)
608 				return -1;
609 			/* offset in r1: we might have to take the slow path */
610 			emit_mov_i(r_off, k, ctx);
611 			emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
612 
613 			/* load in r0: common with the slowpath */
614 			_emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
615 						      ARM_R1), ctx);
616 			/*
617 			 * emit_mov_i() might generate one or two instructions,
618 			 * the same holds for emit_blx_r()
619 			 */
620 			_emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
621 
622 			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
623 			/* r_off is r1 */
624 			emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
625 			emit_blx_r(ARM_R3, ctx);
626 			/* check the return value of skb_copy_bits */
627 			emit(ARM_CMP_I(ARM_R1, 0), ctx);
628 			emit_err_ret(ARM_COND_NE, ctx);
629 
630 			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
631 			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
632 			break;
633 		case BPF_ST:
634 			ctx->seen |= SEEN_MEM_WORD(k);
635 			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
636 			break;
637 		case BPF_STX:
638 			update_on_xread(ctx);
639 			ctx->seen |= SEEN_MEM_WORD(k);
640 			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
641 			break;
642 		case BPF_ALU | BPF_ADD | BPF_K:
643 			/* A += K */
644 			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
645 			break;
646 		case BPF_ALU | BPF_ADD | BPF_X:
647 			update_on_xread(ctx);
648 			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
649 			break;
650 		case BPF_ALU | BPF_SUB | BPF_K:
651 			/* A -= K */
652 			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
653 			break;
654 		case BPF_ALU | BPF_SUB | BPF_X:
655 			update_on_xread(ctx);
656 			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
657 			break;
658 		case BPF_ALU | BPF_MUL | BPF_K:
659 			/* A *= K */
660 			emit_mov_i(r_scratch, k, ctx);
661 			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
662 			break;
663 		case BPF_ALU | BPF_MUL | BPF_X:
664 			update_on_xread(ctx);
665 			emit(ARM_MUL(r_A, r_A, r_X), ctx);
666 			break;
667 		case BPF_ALU | BPF_DIV | BPF_K:
668 			if (k == 1)
669 				break;
670 			emit_mov_i(r_scratch, k, ctx);
671 			emit_udiv(r_A, r_A, r_scratch, ctx);
672 			break;
673 		case BPF_ALU | BPF_DIV | BPF_X:
674 			update_on_xread(ctx);
675 			emit(ARM_CMP_I(r_X, 0), ctx);
676 			emit_err_ret(ARM_COND_EQ, ctx);
677 			emit_udiv(r_A, r_A, r_X, ctx);
678 			break;
679 		case BPF_ALU | BPF_OR | BPF_K:
680 			/* A |= K */
681 			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
682 			break;
683 		case BPF_ALU | BPF_OR | BPF_X:
684 			update_on_xread(ctx);
685 			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
686 			break;
687 		case BPF_ALU | BPF_XOR | BPF_K:
688 			/* A ^= K; */
689 			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
690 			break;
691 		case BPF_ANC | SKF_AD_ALU_XOR_X:
692 		case BPF_ALU | BPF_XOR | BPF_X:
693 			/* A ^= X */
694 			update_on_xread(ctx);
695 			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
696 			break;
697 		case BPF_ALU | BPF_AND | BPF_K:
698 			/* A &= K */
699 			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
700 			break;
701 		case BPF_ALU | BPF_AND | BPF_X:
702 			update_on_xread(ctx);
703 			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
704 			break;
705 		case BPF_ALU | BPF_LSH | BPF_K:
706 			if (unlikely(k > 31))
707 				return -1;
708 			emit(ARM_LSL_I(r_A, r_A, k), ctx);
709 			break;
710 		case BPF_ALU | BPF_LSH | BPF_X:
711 			update_on_xread(ctx);
712 			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
713 			break;
714 		case BPF_ALU | BPF_RSH | BPF_K:
715 			if (unlikely(k > 31))
716 				return -1;
717 			emit(ARM_LSR_I(r_A, r_A, k), ctx);
718 			break;
719 		case BPF_ALU | BPF_RSH | BPF_X:
720 			update_on_xread(ctx);
721 			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
722 			break;
723 		case BPF_ALU | BPF_NEG:
724 			/* A = -A */
725 			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
726 			break;
727 		case BPF_JMP | BPF_JA:
728 			/* pc += K */
729 			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
730 			break;
731 		case BPF_JMP | BPF_JEQ | BPF_K:
732 			/* pc += (A == K) ? pc->jt : pc->jf */
733 			condt  = ARM_COND_EQ;
734 			goto cmp_imm;
735 		case BPF_JMP | BPF_JGT | BPF_K:
736 			/* pc += (A > K) ? pc->jt : pc->jf */
737 			condt  = ARM_COND_HI;
738 			goto cmp_imm;
739 		case BPF_JMP | BPF_JGE | BPF_K:
740 			/* pc += (A >= K) ? pc->jt : pc->jf */
741 			condt  = ARM_COND_HS;
742 cmp_imm:
743 			imm12 = imm8m(k);
744 			if (imm12 < 0) {
745 				emit_mov_i_no8m(r_scratch, k, ctx);
746 				emit(ARM_CMP_R(r_A, r_scratch), ctx);
747 			} else {
748 				emit(ARM_CMP_I(r_A, imm12), ctx);
749 			}
750 cond_jump:
751 			if (inst->jt)
752 				_emit(condt, ARM_B(b_imm(i + inst->jt + 1,
753 						   ctx)), ctx);
754 			if (inst->jf)
755 				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
756 							     ctx)), ctx);
757 			break;
758 		case BPF_JMP | BPF_JEQ | BPF_X:
759 			/* pc += (A == X) ? pc->jt : pc->jf */
760 			condt   = ARM_COND_EQ;
761 			goto cmp_x;
762 		case BPF_JMP | BPF_JGT | BPF_X:
763 			/* pc += (A > X) ? pc->jt : pc->jf */
764 			condt   = ARM_COND_HI;
765 			goto cmp_x;
766 		case BPF_JMP | BPF_JGE | BPF_X:
767 			/* pc += (A >= X) ? pc->jt : pc->jf */
768 			condt   = ARM_COND_CS;
769 cmp_x:
770 			update_on_xread(ctx);
771 			emit(ARM_CMP_R(r_A, r_X), ctx);
772 			goto cond_jump;
773 		case BPF_JMP | BPF_JSET | BPF_K:
774 			/* pc += (A & K) ? pc->jt : pc->jf */
775 			condt  = ARM_COND_NE;
776 			/* not set iff all zeroes iff Z==1 iff EQ */
777 
778 			imm12 = imm8m(k);
779 			if (imm12 < 0) {
780 				emit_mov_i_no8m(r_scratch, k, ctx);
781 				emit(ARM_TST_R(r_A, r_scratch), ctx);
782 			} else {
783 				emit(ARM_TST_I(r_A, imm12), ctx);
784 			}
785 			goto cond_jump;
786 		case BPF_JMP | BPF_JSET | BPF_X:
787 			/* pc += (A & X) ? pc->jt : pc->jf */
788 			update_on_xread(ctx);
789 			condt  = ARM_COND_NE;
790 			emit(ARM_TST_R(r_A, r_X), ctx);
791 			goto cond_jump;
792 		case BPF_RET | BPF_A:
793 			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
794 			goto b_epilogue;
795 		case BPF_RET | BPF_K:
796 			if ((k == 0) && (ctx->ret0_fp_idx < 0))
797 				ctx->ret0_fp_idx = i;
798 			emit_mov_i(ARM_R0, k, ctx);
799 b_epilogue:
800 			if (i != ctx->skf->len - 1)
801 				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
802 			break;
803 		case BPF_MISC | BPF_TAX:
804 			/* X = A */
805 			ctx->seen |= SEEN_X;
806 			emit(ARM_MOV_R(r_X, r_A), ctx);
807 			break;
808 		case BPF_MISC | BPF_TXA:
809 			/* A = X */
810 			update_on_xread(ctx);
811 			emit(ARM_MOV_R(r_A, r_X), ctx);
812 			break;
813 		case BPF_ANC | SKF_AD_PROTOCOL:
814 			/* A = ntohs(skb->protocol) */
815 			ctx->seen |= SEEN_SKB;
816 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
817 						  protocol) != 2);
818 			off = offsetof(struct sk_buff, protocol);
819 			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
820 			emit_swap16(r_A, r_scratch, ctx);
821 			break;
822 		case BPF_ANC | SKF_AD_CPU:
823 			/* r_scratch = current_thread_info() */
824 			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
825 			/* A = current_thread_info()->cpu */
826 			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
827 			off = offsetof(struct thread_info, cpu);
828 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
829 			break;
830 		case BPF_ANC | SKF_AD_IFINDEX:
831 			/* A = skb->dev->ifindex */
832 			ctx->seen |= SEEN_SKB;
833 			off = offsetof(struct sk_buff, dev);
834 			emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
835 
836 			emit(ARM_CMP_I(r_scratch, 0), ctx);
837 			emit_err_ret(ARM_COND_EQ, ctx);
838 
839 			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
840 						  ifindex) != 4);
841 			off = offsetof(struct net_device, ifindex);
842 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
843 			break;
844 		case BPF_ANC | SKF_AD_MARK:
845 			ctx->seen |= SEEN_SKB;
846 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
847 			off = offsetof(struct sk_buff, mark);
848 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
849 			break;
850 		case BPF_ANC | SKF_AD_RXHASH:
851 			ctx->seen |= SEEN_SKB;
852 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
853 			off = offsetof(struct sk_buff, hash);
854 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
855 			break;
856 		case BPF_ANC | SKF_AD_VLAN_TAG:
857 		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
858 			ctx->seen |= SEEN_SKB;
859 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
860 			off = offsetof(struct sk_buff, vlan_tci);
861 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
862 			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
863 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
864 			else
865 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
866 			break;
867 		case BPF_ANC | SKF_AD_QUEUE:
868 			ctx->seen |= SEEN_SKB;
869 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
870 						  queue_mapping) != 2);
871 			BUILD_BUG_ON(offsetof(struct sk_buff,
872 					      queue_mapping) > 0xff);
873 			off = offsetof(struct sk_buff, queue_mapping);
874 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
875 			break;
876 		case BPF_LDX | BPF_W | BPF_ABS:
877 			/*
878 			 * load a 32bit word from struct seccomp_data.
879 			 * seccomp_check_filter() will already have checked
880 			 * that k is 32bit aligned and lies within the
881 			 * struct seccomp_data.
882 			 */
883 			ctx->seen |= SEEN_SKB;
884 			emit(ARM_LDR_I(r_A, r_skb, k), ctx);
885 			break;
886 		default:
887 			return -1;
888 		}
889 
890 		if (ctx->flags & FLAG_IMM_OVERFLOW)
891 			/*
892 			 * this instruction generated an overflow when
893 			 * trying to access the literal pool, so
894 			 * delegate this filter to the kernel interpreter.
895 			 */
896 			return -1;
897 	}
898 
899 	/* compute offsets only during the first pass */
900 	if (ctx->target == NULL)
901 		ctx->offsets[i] = ctx->idx * 4;
902 
903 	return 0;
904 }
905 
906 
907 void bpf_jit_compile(struct bpf_prog *fp)
908 {
909 	struct bpf_binary_header *header;
910 	struct jit_ctx ctx;
911 	unsigned tmp_idx;
912 	unsigned alloc_size;
913 	u8 *target_ptr;
914 
915 	if (!bpf_jit_enable)
916 		return;
917 
918 	memset(&ctx, 0, sizeof(ctx));
919 	ctx.skf		= fp;
920 	ctx.ret0_fp_idx = -1;
921 
922 	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
923 	if (ctx.offsets == NULL)
924 		return;
925 
926 	/* fake pass to fill in the ctx->seen */
927 	if (unlikely(build_body(&ctx)))
928 		goto out;
929 
930 	tmp_idx = ctx.idx;
931 	build_prologue(&ctx);
932 	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
933 
934 #if __LINUX_ARM_ARCH__ < 7
935 	tmp_idx = ctx.idx;
936 	build_epilogue(&ctx);
937 	ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4;
938 
939 	ctx.idx += ctx.imm_count;
940 	if (ctx.imm_count) {
941 		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
942 		if (ctx.imms == NULL)
943 			goto out;
944 	}
945 #else
946 	/* there's nothing after the epilogue on ARMv7 */
947 	build_epilogue(&ctx);
948 #endif
949 	alloc_size = 4 * ctx.idx;
950 	header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
951 				      4, jit_fill_hole);
952 	if (header == NULL)
953 		goto out;
954 
955 	ctx.target = (u32 *) target_ptr;
956 	ctx.idx = 0;
957 
958 	build_prologue(&ctx);
959 	if (build_body(&ctx) < 0) {
960 #if __LINUX_ARM_ARCH__ < 7
961 		if (ctx.imm_count)
962 			kfree(ctx.imms);
963 #endif
964 		bpf_jit_binary_free(header);
965 		goto out;
966 	}
967 	build_epilogue(&ctx);
968 
969 	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
970 
971 #if __LINUX_ARM_ARCH__ < 7
972 	if (ctx.imm_count)
973 		kfree(ctx.imms);
974 #endif
975 
976 	if (bpf_jit_enable > 1)
977 		/* there are 2 passes here */
978 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
979 
980 	set_memory_ro((unsigned long)header, header->pages);
981 	fp->bpf_func = (void *)ctx.target;
982 	fp->jited = true;
983 out:
984 	kfree(ctx.offsets);
985 	return;
986 }
987 
988 void bpf_jit_free(struct bpf_prog *fp)
989 {
990 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
991 	struct bpf_binary_header *header = (void *)addr;
992 
993 	if (!fp->jited)
994 		goto free_filter;
995 
996 	set_memory_rw(addr, header->pages);
997 	bpf_jit_binary_free(header);
998 
999 free_filter:
1000 	bpf_prog_unlock_free(fp);
1001 }
1002