xref: /linux/arch/arc/net/bpf_jit_core.c (revision d9ef13f72711f2dad64cd4445472ded98fb6c954)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * The back-end-agnostic part of Just-In-Time compiler for eBPF bytecode.
4  *
5  * Copyright (c) 2024 Synopsys Inc.
6  * Author: Shahab Vahedi <shahab@synopsys.com>
7  */
8 #include <linux/bug.h>
9 #include "bpf_jit.h"
10 
11 /*
12  * Check for the return value. A pattern used often in this file.
13  * There must be a "ret" variable of type "int" in the scope.
14  */
15 #define CHECK_RET(cmd)			\
16 	do {				\
17 		ret = (cmd);		\
18 		if (ret < 0)		\
19 			return ret;	\
20 	} while (0)
21 
22 #ifdef ARC_BPF_JIT_DEBUG
23 /* Dumps bytes in /var/log/messages at KERN_INFO level (4). */
24 static void dump_bytes(const u8 *buf, u32 len, const char *header)
25 {
26 	u8 line[64];
27 	size_t i, j;
28 
29 	pr_info("-----------------[ %s ]-----------------\n", header);
30 
31 	for (i = 0, j = 0; i < len; i++) {
32 		/* Last input byte? */
33 		if (i == len - 1) {
34 			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
35 			pr_info("%s\n", line);
36 			break;
37 		}
38 		/* End of line? */
39 		else if (i % 8 == 7) {
40 			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
41 			pr_info("%s\n", line);
42 			j = 0;
43 		} else {
44 			j += scnprintf(line + j, 64 - j, "0x%02x, ", buf[i]);
45 		}
46 	}
47 }
48 #endif /* ARC_BPF_JIT_DEBUG */
49 
50 /********************* JIT context ***********************/
51 
52 /*
53  * buf:		Translated instructions end up here.
54  * len:		The length of whole block in bytes.
55  * index:	The offset at which the _next_ instruction may be put.
56  */
57 struct jit_buffer {
58 	u8	*buf;
59 	u32	len;
60 	u32	index;
61 };
62 
63 /*
64  * This is a subset of "struct jit_context" that its information is deemed
65  * necessary for the next extra pass to come.
66  *
67  * bpf_header:	Needed to finally lock the region.
68  * bpf2insn:	Used to find the translation for instructions of interest.
69  *
70  * Things like "jit.buf" and "jit.len" can be retrieved respectively from
71  * "prog->bpf_func" and "prog->jited_len".
72  */
73 struct arc_jit_data {
74 	struct bpf_binary_header *bpf_header;
75 	u32                      *bpf2insn;
76 };
77 
78 /*
79  * The JIT pertinent context that is used by different functions.
80  *
81  * prog:		The current eBPF program being handled.
82  * jit:			The JIT buffer and its length.
83  * bpf_header:		The JITed program header. "jit.buf" points inside it.
84  * emit:		If set, opcodes are written to memory; else, a dry-run.
85  * do_zext:		If true, 32-bit sub-regs must be zero extended.
86  * bpf2insn:		Maps BPF insn indices to their counterparts in jit.buf.
87  * bpf2insn_valid:	Indicates if "bpf2ins" is populated with the mappings.
88  * jit_data:		A piece of memory to transfer data to the next pass.
89  * arc_regs_clobbered:	Each bit status determines if that arc reg is clobbered.
90  * save_blink:		Whether ARC's "blink" register needs to be saved.
91  * frame_size:		Derived from "prog->aux->stack_depth".
92  * epilogue_offset:	Used by early "return"s in the code to jump here.
93  * need_extra_pass:	A forecast if an "extra_pass" will occur.
94  * is_extra_pass:	Indicates if the current pass is an extra pass.
95  * user_bpf_prog:	True, if VM opcodes come from a real program.
96  * success:		Indicates if the whole JIT went OK.
97  */
98 struct jit_context {
99 	struct bpf_prog			*prog;
100 	struct jit_buffer		jit;
101 	struct bpf_binary_header	*bpf_header;
102 	bool				emit;
103 	bool				do_zext;
104 	u32				*bpf2insn;
105 	bool				bpf2insn_valid;
106 	struct arc_jit_data		*jit_data;
107 	u32				arc_regs_clobbered;
108 	bool				save_blink;
109 	u16				frame_size;
110 	u32				epilogue_offset;
111 	bool				need_extra_pass;
112 	bool				is_extra_pass;
113 	bool				user_bpf_prog;
114 	bool				success;
115 };
116 
117 /*
118  * If we're in ARC_BPF_JIT_DEBUG mode and the debug level is right, dump the
119  * input BPF stream. "bpf_jit_dump()" is not fully suited for this purpose.
120  */
121 static void vm_dump(const struct bpf_prog *prog)
122 {
123 #ifdef ARC_BPF_JIT_DEBUG
124 	if (bpf_jit_enable > 1)
125 		dump_bytes((u8 *)prog->insns, 8 * prog->len, " VM  ");
126 #endif
127 }
128 
129 /*
130  * If the right level of debug is set, dump the bytes. There are 2 variants
131  * of this function:
132  *
133  * 1. Use the standard bpf_jit_dump() which is meant only for JITed code.
134  * 2. Use the dump_bytes() to match its "vm_dump()" instance.
135  */
136 static void jit_dump(const struct jit_context *ctx)
137 {
138 #ifdef ARC_BPF_JIT_DEBUG
139 	u8 header[8];
140 #endif
141 	const int pass = ctx->is_extra_pass ? 2 : 1;
142 
143 	if (bpf_jit_enable <= 1 || !ctx->prog->jited)
144 		return;
145 
146 #ifdef ARC_BPF_JIT_DEBUG
147 	scnprintf(header, sizeof(header), "JIT:%d", pass);
148 	dump_bytes(ctx->jit.buf, ctx->jit.len, header);
149 	pr_info("\n");
150 #else
151 	bpf_jit_dump(ctx->prog->len, ctx->jit.len, pass, ctx->jit.buf);
152 #endif
153 }
154 
155 /* Initialise the context so there's no garbage. */
156 static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
157 {
158 	memset(ctx, 0, sizeof(*ctx));
159 
160 	ctx->prog = prog;
161 
162 	/* If the verifier doesn't zero-extend, then we have to do it. */
163 	ctx->do_zext = !ctx->prog->aux->verifier_zext;
164 
165 	ctx->is_extra_pass = ctx->prog->jited;
166 	ctx->user_bpf_prog = ctx->prog->is_func;
167 
168 	return 0;
169 }
170 
171 /*
172  * Only after the first iteration of normal pass (the dry-run),
173  * there are valid offsets in ctx->bpf2insn array.
174  */
175 static inline bool offsets_available(const struct jit_context *ctx)
176 {
177 	return ctx->bpf2insn_valid;
178 }
179 
180 /*
181  * "*mem" should be freed when there is no "extra pass" to come,
182  * or the compilation terminated abruptly. A few of such memory
183  * allocations are: ctx->jit_data and ctx->bpf2insn.
184  */
185 static inline void maybe_free(struct jit_context *ctx, void **mem)
186 {
187 	if (*mem) {
188 		if (!ctx->success || !ctx->need_extra_pass) {
189 			kfree(*mem);
190 			*mem = NULL;
191 		}
192 	}
193 }
194 
195 /*
196  * Free memories based on the status of the context.
197  *
198  * A note about "bpf_header": On successful runs, "bpf_header" is
199  * not freed, because "jit.buf", a sub-array of it, is returned as
200  * the "bpf_func". However, "bpf_header" is lost and nothing points
201  * to it. This should not cause a leakage, because apparently
202  * "bpf_header" can be revived by "bpf_jit_binary_hdr()". This is
203  * how "bpf_jit_free()" in "kernel/bpf/core.c" releases the memory.
204  */
205 static void jit_ctx_cleanup(struct jit_context *ctx)
206 {
207 	maybe_free(ctx, (void **)&ctx->bpf2insn);
208 	maybe_free(ctx, (void **)&ctx->jit_data);
209 
210 	if (!ctx->bpf2insn)
211 		ctx->bpf2insn_valid = false;
212 
213 	/* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */
214 	if (!ctx->success) {
215 		if (ctx->bpf_header) {
216 			bpf_jit_binary_free(ctx->bpf_header);
217 			ctx->bpf_header = NULL;
218 			ctx->jit.buf    = NULL;
219 			ctx->jit.index  = 0;
220 			ctx->jit.len    = 0;
221 		}
222 		if (ctx->is_extra_pass) {
223 			ctx->prog->bpf_func = NULL;
224 			ctx->prog->jited = 0;
225 			ctx->prog->jited_len = 0;
226 		}
227 	}
228 
229 	ctx->emit = false;
230 	ctx->do_zext = false;
231 }
232 
233 /*
234  * Analyse the register usage and record the frame size.
235  * The register usage is determined by consulting the back-end.
236  */
237 static void analyze_reg_usage(struct jit_context *ctx)
238 {
239 	size_t i;
240 	u32 usage = 0;
241 	const struct bpf_insn *insn = ctx->prog->insnsi;
242 
243 	for (i = 0; i < ctx->prog->len; i++) {
244 		u8 bpf_reg;
245 		bool call;
246 
247 		bpf_reg = insn[i].dst_reg;
248 		call = (insn[i].code == (BPF_JMP | BPF_CALL)) ? true : false;
249 		usage |= mask_for_used_regs(bpf_reg, call);
250 	}
251 
252 	ctx->arc_regs_clobbered = usage;
253 	ctx->frame_size = ctx->prog->aux->stack_depth;
254 }
255 
256 /* Verify that no instruction will be emitted when there is no buffer. */
257 static inline int jit_buffer_check(const struct jit_context *ctx)
258 {
259 	if (ctx->emit) {
260 		if (!ctx->jit.buf) {
261 			pr_err("bpf-jit: inconsistence state; no "
262 			       "buffer to emit instructions.\n");
263 			return -EINVAL;
264 		} else if (ctx->jit.index > ctx->jit.len) {
265 			pr_err("bpf-jit: estimated JIT length is less "
266 			       "than the emitted instructions.\n");
267 			return -EFAULT;
268 		}
269 	}
270 	return 0;
271 }
272 
273 /* On a dry-run (emit=false), "jit.len" is growing gradually. */
274 static inline void jit_buffer_update(struct jit_context *ctx, u32 n)
275 {
276 	if (!ctx->emit)
277 		ctx->jit.len += n;
278 	else
279 		ctx->jit.index += n;
280 }
281 
282 /* Based on "emit", determine the address where instructions are emitted. */
283 static inline u8 *effective_jit_buf(const struct jit_context *ctx)
284 {
285 	return ctx->emit ? (ctx->jit.buf + ctx->jit.index) : NULL;
286 }
287 
288 /* Prologue based on context variables set by "analyze_reg_usage()". */
289 static int handle_prologue(struct jit_context *ctx)
290 {
291 	int ret;
292 	u8 *buf = effective_jit_buf(ctx);
293 	u32 len = 0;
294 
295 	CHECK_RET(jit_buffer_check(ctx));
296 
297 	len = arc_prologue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
298 	jit_buffer_update(ctx, len);
299 
300 	return 0;
301 }
302 
303 /* The counter part for "handle_prologue()". */
304 static int handle_epilogue(struct jit_context *ctx)
305 {
306 	int ret;
307 	u8 *buf = effective_jit_buf(ctx);
308 	u32 len = 0;
309 
310 	CHECK_RET(jit_buffer_check(ctx));
311 
312 	len = arc_epilogue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
313 	jit_buffer_update(ctx, len);
314 
315 	return 0;
316 }
317 
318 /* Tell which number of the BPF instruction we are dealing with. */
319 static inline s32 get_index_for_insn(const struct jit_context *ctx,
320 				     const struct bpf_insn *insn)
321 {
322 	return (insn - ctx->prog->insnsi);
323 }
324 
325 /*
326  * In most of the cases, the "offset" is read from "insn->off". However,
327  * if it is an unconditional BPF_JMP32, then it comes from "insn->imm".
328  *
329  * (Courtesy of "cpu=v4" support)
330  */
331 static inline s32 get_offset(const struct bpf_insn *insn)
332 {
333 	if ((BPF_CLASS(insn->code) == BPF_JMP32) &&
334 	    (BPF_OP(insn->code) == BPF_JA))
335 		return insn->imm;
336 	else
337 		return insn->off;
338 }
339 
340 /*
341  * Determine to which number of the BPF instruction we're jumping to.
342  *
343  * The "offset" is interpreted as the "number" of BPF instructions
344  * from the _next_ BPF instruction. e.g.:
345  *
346  *  4 means 4 instructions after  the next insn
347  *  0 means 0 instructions after  the next insn -> fallthrough.
348  * -1 means 1 instruction  before the next insn -> jmp to current insn.
349  *
350  *  Another way to look at this, "offset" is the number of instructions
351  *  that exist between the current instruction and the target instruction.
352  *
353  *  It is worth noting that a "mov r,i64", which is 16-byte long, is
354  *  treated as two instructions long, therefore "offset" needn't be
355  *  treated specially for those. Everything is uniform.
356  */
357 static inline s32 get_target_index_for_insn(const struct jit_context *ctx,
358 					    const struct bpf_insn *insn)
359 {
360 	return (get_index_for_insn(ctx, insn) + 1) + get_offset(insn);
361 }
362 
363 /* Is there an immediate operand encoded in the "insn"? */
364 static inline bool has_imm(const struct bpf_insn *insn)
365 {
366 	return BPF_SRC(insn->code) == BPF_K;
367 }
368 
369 /* Is the last BPF instruction? */
370 static inline bool is_last_insn(const struct bpf_prog *prog, u32 idx)
371 {
372 	return idx == (prog->len - 1);
373 }
374 
375 /*
376  * Invocation of this function, conditionally signals the need for
377  * an extra pass. The conditions that must be met are:
378  *
379  * 1. The current pass itself shouldn't be an extra pass.
380  * 2. The stream of bytes being JITed must come from a user program.
381  */
382 static inline void set_need_for_extra_pass(struct jit_context *ctx)
383 {
384 	if (!ctx->is_extra_pass)
385 		ctx->need_extra_pass = ctx->user_bpf_prog;
386 }
387 
388 /*
389  * Check if the "size" is valid and then transfer the control to
390  * the back-end for the swap.
391  */
392 static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
393 		       bool force, bool do_zext, u8 *len)
394 {
395 	/* Sanity check on the size. */
396 	switch (size) {
397 	case 16:
398 	case 32:
399 	case 64:
400 		break;
401 	default:
402 		pr_err("bpf-jit: invalid size for swap.\n");
403 		return -EINVAL;
404 	}
405 
406 	*len = gen_swap(buf, rd, size, endian, force, do_zext);
407 
408 	return 0;
409 }
410 
411 /* Checks if the (instruction) index is in valid range. */
412 static inline bool check_insn_idx_valid(const struct jit_context *ctx,
413 					const s32 idx)
414 {
415 	return (idx >= 0 && idx < ctx->prog->len);
416 }
417 
418 /*
419  * Decouple the back-end from BPF by converting BPF conditions
420  * to internal enum. ARC_CC_* start from 0 and are used as index
421  * to an array. BPF_J* usage must end after this conversion.
422  */
423 static int bpf_cond_to_arc(const u8 op, u8 *arc_cc)
424 {
425 	switch (op) {
426 	case BPF_JA:
427 		*arc_cc = ARC_CC_AL;
428 		break;
429 	case BPF_JEQ:
430 		*arc_cc = ARC_CC_EQ;
431 		break;
432 	case BPF_JGT:
433 		*arc_cc = ARC_CC_UGT;
434 		break;
435 	case BPF_JGE:
436 		*arc_cc = ARC_CC_UGE;
437 		break;
438 	case BPF_JSET:
439 		*arc_cc = ARC_CC_SET;
440 		break;
441 	case BPF_JNE:
442 		*arc_cc = ARC_CC_NE;
443 		break;
444 	case BPF_JSGT:
445 		*arc_cc = ARC_CC_SGT;
446 		break;
447 	case BPF_JSGE:
448 		*arc_cc = ARC_CC_SGE;
449 		break;
450 	case BPF_JLT:
451 		*arc_cc = ARC_CC_ULT;
452 		break;
453 	case BPF_JLE:
454 		*arc_cc = ARC_CC_ULE;
455 		break;
456 	case BPF_JSLT:
457 		*arc_cc = ARC_CC_SLT;
458 		break;
459 	case BPF_JSLE:
460 		*arc_cc = ARC_CC_SLE;
461 		break;
462 	default:
463 		pr_err("bpf-jit: can't handle condition 0x%02X\n", op);
464 		return -EINVAL;
465 	}
466 	return 0;
467 }
468 
469 /*
470  * Check a few things for a supposedly "jump" instruction:
471  *
472  * 0. "insn" is a "jump" instruction, but not the "call/exit" variant.
473  * 1. The current "insn" index is in valid range.
474  * 2. The index of target instruction is in valid range.
475  */
476 static int check_bpf_jump(const struct jit_context *ctx,
477 			  const struct bpf_insn *insn)
478 {
479 	const u8 class = BPF_CLASS(insn->code);
480 	const u8 op = BPF_OP(insn->code);
481 
482 	/* Must be a jmp(32) instruction that is not a "call/exit". */
483 	if ((class != BPF_JMP && class != BPF_JMP32) ||
484 	    (op == BPF_CALL || op == BPF_EXIT)) {
485 		pr_err("bpf-jit: not a jump instruction.\n");
486 		return -EINVAL;
487 	}
488 
489 	if (!check_insn_idx_valid(ctx, get_index_for_insn(ctx, insn))) {
490 		pr_err("bpf-jit: the bpf jump insn is not in prog.\n");
491 		return -EINVAL;
492 	}
493 
494 	if (!check_insn_idx_valid(ctx, get_target_index_for_insn(ctx, insn))) {
495 		pr_err("bpf-jit: bpf jump label is out of range.\n");
496 		return -EINVAL;
497 	}
498 
499 	return 0;
500 }
501 
502 /*
503  * Based on input "insn", consult "ctx->bpf2insn" to get the
504  * related index (offset) of the translation in JIT stream.
505  */
506 static u32 get_curr_jit_off(const struct jit_context *ctx,
507 			    const struct bpf_insn *insn)
508 {
509 	const s32 idx = get_index_for_insn(ctx, insn);
510 #ifdef ARC_BPF_JIT_DEBUG
511 	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, idx));
512 #endif
513 	return ctx->bpf2insn[idx];
514 }
515 
516 /*
517  * The input "insn" must be a jump instruction.
518  *
519  * Based on input "insn", consult "ctx->bpf2insn" to get the
520  * related JIT index (offset) of "target instruction" that
521  * "insn" would jump to.
522  */
523 static u32 get_targ_jit_off(const struct jit_context *ctx,
524 			    const struct bpf_insn *insn)
525 {
526 	const s32 tidx = get_target_index_for_insn(ctx, insn);
527 #ifdef ARC_BPF_JIT_DEBUG
528 	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, tidx));
529 #endif
530 	return ctx->bpf2insn[tidx];
531 }
532 
533 /*
534  * This function will return 0 for a feasible jump.
535  *
536  * Consult the back-end to check if it finds it feasible to emit
537  * the necessary instructions based on "cond" and the displacement
538  * between the "from_off" and the "to_off".
539  */
540 static int feasible_jit_jump(u32 from_off, u32 to_off, u8 cond, bool j32)
541 {
542 	int ret = 0;
543 
544 	if (j32) {
545 		if (!check_jmp_32(from_off, to_off, cond))
546 			ret = -EFAULT;
547 	} else {
548 		if (!check_jmp_64(from_off, to_off, cond))
549 			ret = -EFAULT;
550 	}
551 
552 	if (ret != 0)
553 		pr_err("bpf-jit: the JIT displacement is not OK.\n");
554 
555 	return ret;
556 }
557 
558 /*
559  * This jump handler performs the following steps:
560  *
561  * 1. Compute ARC's internal condition code from BPF's
562  * 2. Determine the bitness of the operation (32 vs. 64)
563  * 3. Sanity check on BPF stream
564  * 4. Sanity check on what is supposed to be JIT's displacement
565  * 5. And finally, emit the necessary instructions
566  *
567  * The last two steps are performed through the back-end.
568  * The value of steps 1 and 2 are necessary inputs for the back-end.
569  */
570 static int handle_jumps(const struct jit_context *ctx,
571 			const struct bpf_insn *insn,
572 			u8 *len)
573 {
574 	u8 cond;
575 	int ret = 0;
576 	u8 *buf = effective_jit_buf(ctx);
577 	const bool j32 = (BPF_CLASS(insn->code) == BPF_JMP32) ? true : false;
578 	const u8 rd = insn->dst_reg;
579 	u8 rs = insn->src_reg;
580 	u32 curr_off = 0, targ_off = 0;
581 
582 	*len = 0;
583 
584 	/* Map the BPF condition to internal enum. */
585 	CHECK_RET(bpf_cond_to_arc(BPF_OP(insn->code), &cond));
586 
587 	/* Sanity check on the BPF byte stream. */
588 	CHECK_RET(check_bpf_jump(ctx, insn));
589 
590 	/*
591 	 * Move the immediate into a temporary register _now_ for 2 reasons:
592 	 *
593 	 * 1. "gen_jmp_{32,64}()" deal with operands in registers.
594 	 *
595 	 * 2. The "len" parameter will grow so that the current jit offset
596 	 *    (curr_off) will have increased to a point where the necessary
597 	 *    instructions can be inserted by "gen_jmp_{32,64}()".
598 	 */
599 	if (has_imm(insn) && cond != ARC_CC_AL) {
600 		if (j32) {
601 			*len += mov_r32_i32(BUF(buf, *len), JIT_REG_TMP,
602 					    insn->imm);
603 		} else {
604 			*len += mov_r64_i32(BUF(buf, *len), JIT_REG_TMP,
605 					    insn->imm);
606 		}
607 		rs = JIT_REG_TMP;
608 	}
609 
610 	/* If the offsets are known, check if the branch can occur. */
611 	if (offsets_available(ctx)) {
612 		curr_off = get_curr_jit_off(ctx, insn) + *len;
613 		targ_off = get_targ_jit_off(ctx, insn);
614 
615 		/* Sanity check on the back-end side. */
616 		CHECK_RET(feasible_jit_jump(curr_off, targ_off, cond, j32));
617 	}
618 
619 	if (j32) {
620 		*len += gen_jmp_32(BUF(buf, *len), rd, rs, cond,
621 				   curr_off, targ_off);
622 	} else {
623 		*len += gen_jmp_64(BUF(buf, *len), rd, rs, cond,
624 				   curr_off, targ_off);
625 	}
626 
627 	return ret;
628 }
629 
630 /* Jump to translated epilogue address. */
631 static int handle_jmp_epilogue(struct jit_context *ctx,
632 			       const struct bpf_insn *insn, u8 *len)
633 {
634 	u8 *buf = effective_jit_buf(ctx);
635 	u32 curr_off = 0, epi_off = 0;
636 
637 	/* Check the offset only if the data is available. */
638 	if (offsets_available(ctx)) {
639 		curr_off = get_curr_jit_off(ctx, insn);
640 		epi_off = ctx->epilogue_offset;
641 
642 		if (!check_jmp_64(curr_off, epi_off, ARC_CC_AL)) {
643 			pr_err("bpf-jit: epilogue offset is not valid.\n");
644 			return -EINVAL;
645 		}
646 	}
647 
648 	/* Jump to "epilogue offset" (rd and rs don't matter). */
649 	*len = gen_jmp_64(buf, 0, 0, ARC_CC_AL, curr_off, epi_off);
650 
651 	return 0;
652 }
653 
654 /* Try to get the resolved address and generate the instructions. */
655 static int handle_call(struct jit_context *ctx,
656 		       const struct bpf_insn *insn,
657 		       u8 *len)
658 {
659 	int  ret;
660 	bool in_kernel_func, fixed = false;
661 	u64  addr = 0;
662 	u8  *buf = effective_jit_buf(ctx);
663 
664 	ret = bpf_jit_get_func_addr(ctx->prog, insn, ctx->is_extra_pass,
665 				    &addr, &fixed);
666 	if (ret < 0) {
667 		pr_err("bpf-jit: can't get the address for call.\n");
668 		return ret;
669 	}
670 	in_kernel_func = (fixed ? true : false);
671 
672 	/* No valuable address retrieved (yet). */
673 	if (!fixed && !addr)
674 		set_need_for_extra_pass(ctx);
675 
676 	*len = gen_func_call(buf, (ARC_ADDR)addr, in_kernel_func);
677 
678 	if (insn->src_reg != BPF_PSEUDO_CALL) {
679 		/* Assigning ABI's return reg to JIT's return reg. */
680 		*len += arc_to_bpf_return(BUF(buf, *len));
681 	}
682 
683 	return 0;
684 }
685 
686 /*
687  * Try to generate instructions for loading a 64-bit immediate.
688  * These sort of instructions are usually associated with the 64-bit
689  * relocations: R_BPF_64_64. Therefore, signal the need for an extra
690  * pass if the circumstances are right.
691  */
692 static int handle_ld_imm64(struct jit_context *ctx,
693 			   const struct bpf_insn *insn,
694 			   u8 *len)
695 {
696 	const s32 idx = get_index_for_insn(ctx, insn);
697 	u8 *buf = effective_jit_buf(ctx);
698 
699 	/* We're about to consume 2 VM instructions. */
700 	if (is_last_insn(ctx->prog, idx)) {
701 		pr_err("bpf-jit: need more data for 64-bit immediate.\n");
702 		return -EINVAL;
703 	}
704 
705 	*len = mov_r64_i64(buf, insn->dst_reg, insn->imm, (insn + 1)->imm);
706 
707 	if (bpf_pseudo_func(insn))
708 		set_need_for_extra_pass(ctx);
709 
710 	return 0;
711 }
712 
713 /*
714  * Handles one eBPF instruction at a time. To make this function faster,
715  * it does not call "jit_buffer_check()". Else, it would call it for every
716  * instruction. As a result, it should not be invoked directly. Only
717  * "handle_body()", that has already executed the "check", may call this
718  * function.
719  *
720  * If the "ret" value is negative, something has went wrong. Else,
721  * it mostly holds the value 0 and rarely 1. Number 1 signals
722  * the loop in "handle_body()" to skip the next instruction, because
723  * it has been consumed as part of a 64-bit immediate value.
724  */
725 static int handle_insn(struct jit_context *ctx, u32 idx)
726 {
727 	const struct bpf_insn *insn = &ctx->prog->insnsi[idx];
728 	const u8  code = insn->code;
729 	const u8  dst  = insn->dst_reg;
730 	const u8  src  = insn->src_reg;
731 	const s16 off  = insn->off;
732 	const s32 imm  = insn->imm;
733 	u8 *buf = effective_jit_buf(ctx);
734 	u8  len = 0;
735 	int ret = 0;
736 
737 	switch (code) {
738 	/* dst += src (32-bit) */
739 	case BPF_ALU | BPF_ADD | BPF_X:
740 		len = add_r32(buf, dst, src);
741 		break;
742 	/* dst += imm (32-bit) */
743 	case BPF_ALU | BPF_ADD | BPF_K:
744 		len = add_r32_i32(buf, dst, imm);
745 		break;
746 	/* dst -= src (32-bit) */
747 	case BPF_ALU | BPF_SUB | BPF_X:
748 		len = sub_r32(buf, dst, src);
749 		break;
750 	/* dst -= imm (32-bit) */
751 	case BPF_ALU | BPF_SUB | BPF_K:
752 		len = sub_r32_i32(buf, dst, imm);
753 		break;
754 	/* dst = -dst (32-bit) */
755 	case BPF_ALU | BPF_NEG:
756 		len = neg_r32(buf, dst);
757 		break;
758 	/* dst *= src (32-bit) */
759 	case BPF_ALU | BPF_MUL | BPF_X:
760 		len = mul_r32(buf, dst, src);
761 		break;
762 	/* dst *= imm (32-bit) */
763 	case BPF_ALU | BPF_MUL | BPF_K:
764 		len = mul_r32_i32(buf, dst, imm);
765 		break;
766 	/* dst /= src (32-bit) */
767 	case BPF_ALU | BPF_DIV | BPF_X:
768 		len = div_r32(buf, dst, src, off == 1);
769 		break;
770 	/* dst /= imm (32-bit) */
771 	case BPF_ALU | BPF_DIV | BPF_K:
772 		len = div_r32_i32(buf, dst, imm, off == 1);
773 		break;
774 	/* dst %= src (32-bit) */
775 	case BPF_ALU | BPF_MOD | BPF_X:
776 		len = mod_r32(buf, dst, src, off == 1);
777 		break;
778 	/* dst %= imm (32-bit) */
779 	case BPF_ALU | BPF_MOD | BPF_K:
780 		len = mod_r32_i32(buf, dst, imm, off == 1);
781 		break;
782 	/* dst &= src (32-bit) */
783 	case BPF_ALU | BPF_AND | BPF_X:
784 		len = and_r32(buf, dst, src);
785 		break;
786 	/* dst &= imm (32-bit) */
787 	case BPF_ALU | BPF_AND | BPF_K:
788 		len = and_r32_i32(buf, dst, imm);
789 		break;
790 	/* dst |= src (32-bit) */
791 	case BPF_ALU | BPF_OR | BPF_X:
792 		len = or_r32(buf, dst, src);
793 		break;
794 	/* dst |= imm (32-bit) */
795 	case BPF_ALU | BPF_OR | BPF_K:
796 		len = or_r32_i32(buf, dst, imm);
797 		break;
798 	/* dst ^= src (32-bit) */
799 	case BPF_ALU | BPF_XOR | BPF_X:
800 		len = xor_r32(buf, dst, src);
801 		break;
802 	/* dst ^= imm (32-bit) */
803 	case BPF_ALU | BPF_XOR | BPF_K:
804 		len = xor_r32_i32(buf, dst, imm);
805 		break;
806 	/* dst <<= src (32-bit) */
807 	case BPF_ALU | BPF_LSH | BPF_X:
808 		len = lsh_r32(buf, dst, src);
809 		break;
810 	/* dst <<= imm (32-bit) */
811 	case BPF_ALU | BPF_LSH | BPF_K:
812 		len = lsh_r32_i32(buf, dst, imm);
813 		break;
814 	/* dst >>= src (32-bit) [unsigned] */
815 	case BPF_ALU | BPF_RSH | BPF_X:
816 		len = rsh_r32(buf, dst, src);
817 		break;
818 	/* dst >>= imm (32-bit) [unsigned] */
819 	case BPF_ALU | BPF_RSH | BPF_K:
820 		len = rsh_r32_i32(buf, dst, imm);
821 		break;
822 	/* dst >>= src (32-bit) [signed] */
823 	case BPF_ALU | BPF_ARSH | BPF_X:
824 		len = arsh_r32(buf, dst, src);
825 		break;
826 	/* dst >>= imm (32-bit) [signed] */
827 	case BPF_ALU | BPF_ARSH | BPF_K:
828 		len = arsh_r32_i32(buf, dst, imm);
829 		break;
830 	/* dst = src (32-bit) */
831 	case BPF_ALU | BPF_MOV | BPF_X:
832 		len = mov_r32(buf, dst, src, (u8)off);
833 		break;
834 	/* dst = imm32 (32-bit) */
835 	case BPF_ALU | BPF_MOV | BPF_K:
836 		len = mov_r32_i32(buf, dst, imm);
837 		break;
838 	/* dst = swap(dst) */
839 	case BPF_ALU   | BPF_END | BPF_FROM_LE:
840 	case BPF_ALU   | BPF_END | BPF_FROM_BE:
841 	case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
842 		CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
843 				      BPF_CLASS(code) == BPF_ALU64,
844 				      ctx->do_zext, &len));
845 		break;
846 	}
847 	/* dst += src (64-bit) */
848 	case BPF_ALU64 | BPF_ADD | BPF_X:
849 		len = add_r64(buf, dst, src);
850 		break;
851 	/* dst += imm32 (64-bit) */
852 	case BPF_ALU64 | BPF_ADD | BPF_K:
853 		len = add_r64_i32(buf, dst, imm);
854 		break;
855 	/* dst -= src (64-bit) */
856 	case BPF_ALU64 | BPF_SUB | BPF_X:
857 		len = sub_r64(buf, dst, src);
858 		break;
859 	/* dst -= imm32 (64-bit) */
860 	case BPF_ALU64 | BPF_SUB | BPF_K:
861 		len = sub_r64_i32(buf, dst, imm);
862 		break;
863 	/* dst = -dst (64-bit) */
864 	case BPF_ALU64 | BPF_NEG:
865 		len = neg_r64(buf, dst);
866 		break;
867 	/* dst *= src (64-bit) */
868 	case BPF_ALU64 | BPF_MUL | BPF_X:
869 		len = mul_r64(buf, dst, src);
870 		break;
871 	/* dst *= imm32 (64-bit) */
872 	case BPF_ALU64 | BPF_MUL | BPF_K:
873 		len = mul_r64_i32(buf, dst, imm);
874 		break;
875 	/* dst &= src (64-bit) */
876 	case BPF_ALU64 | BPF_AND | BPF_X:
877 		len = and_r64(buf, dst, src);
878 		break;
879 	/* dst &= imm32 (64-bit) */
880 	case BPF_ALU64 | BPF_AND | BPF_K:
881 		len = and_r64_i32(buf, dst, imm);
882 		break;
883 	/* dst |= src (64-bit) */
884 	case BPF_ALU64 | BPF_OR | BPF_X:
885 		len = or_r64(buf, dst, src);
886 		break;
887 	/* dst |= imm32 (64-bit) */
888 	case BPF_ALU64 | BPF_OR | BPF_K:
889 		len = or_r64_i32(buf, dst, imm);
890 		break;
891 	/* dst ^= src (64-bit) */
892 	case BPF_ALU64 | BPF_XOR | BPF_X:
893 		len = xor_r64(buf, dst, src);
894 		break;
895 	/* dst ^= imm32 (64-bit) */
896 	case BPF_ALU64 | BPF_XOR | BPF_K:
897 		len = xor_r64_i32(buf, dst, imm);
898 		break;
899 	/* dst <<= src (64-bit) */
900 	case BPF_ALU64 | BPF_LSH | BPF_X:
901 		len = lsh_r64(buf, dst, src);
902 		break;
903 	/* dst <<= imm32 (64-bit) */
904 	case BPF_ALU64 | BPF_LSH | BPF_K:
905 		len = lsh_r64_i32(buf, dst, imm);
906 		break;
907 	/* dst >>= src (64-bit) [unsigned] */
908 	case BPF_ALU64 | BPF_RSH | BPF_X:
909 		len = rsh_r64(buf, dst, src);
910 		break;
911 	/* dst >>= imm32 (64-bit) [unsigned] */
912 	case BPF_ALU64 | BPF_RSH | BPF_K:
913 		len = rsh_r64_i32(buf, dst, imm);
914 		break;
915 	/* dst >>= src (64-bit) [signed] */
916 	case BPF_ALU64 | BPF_ARSH | BPF_X:
917 		len = arsh_r64(buf, dst, src);
918 		break;
919 	/* dst >>= imm32 (64-bit) [signed] */
920 	case BPF_ALU64 | BPF_ARSH | BPF_K:
921 		len = arsh_r64_i32(buf, dst, imm);
922 		break;
923 	/* dst = src (64-bit) */
924 	case BPF_ALU64 | BPF_MOV | BPF_X:
925 		len = mov_r64(buf, dst, src, (u8)off);
926 		break;
927 	/* dst = imm32 (sign extend to 64-bit) */
928 	case BPF_ALU64 | BPF_MOV | BPF_K:
929 		len = mov_r64_i32(buf, dst, imm);
930 		break;
931 	/* dst = imm64 */
932 	case BPF_LD | BPF_DW | BPF_IMM:
933 		CHECK_RET(handle_ld_imm64(ctx, insn, &len));
934 		/* Tell the loop to skip the next instruction. */
935 		ret = 1;
936 		break;
937 	/* dst = *(size *)(src + off) */
938 	case BPF_LDX | BPF_MEM | BPF_W:
939 	case BPF_LDX | BPF_MEM | BPF_H:
940 	case BPF_LDX | BPF_MEM | BPF_B:
941 	case BPF_LDX | BPF_MEM | BPF_DW:
942 		len = load_r(buf, dst, src, off, BPF_SIZE(code), false);
943 		break;
944 	case BPF_LDX | BPF_MEMSX | BPF_W:
945 	case BPF_LDX | BPF_MEMSX | BPF_H:
946 	case BPF_LDX | BPF_MEMSX | BPF_B:
947 		len = load_r(buf, dst, src, off, BPF_SIZE(code), true);
948 		break;
949 	/* *(size *)(dst + off) = src */
950 	case BPF_STX | BPF_MEM | BPF_W:
951 	case BPF_STX | BPF_MEM | BPF_H:
952 	case BPF_STX | BPF_MEM | BPF_B:
953 	case BPF_STX | BPF_MEM | BPF_DW:
954 		len = store_r(buf, src, dst, off, BPF_SIZE(code));
955 		break;
956 	case BPF_ST | BPF_MEM | BPF_W:
957 	case BPF_ST | BPF_MEM | BPF_H:
958 	case BPF_ST | BPF_MEM | BPF_B:
959 	case BPF_ST | BPF_MEM | BPF_DW:
960 		len = store_i(buf, imm, dst, off, BPF_SIZE(code));
961 		break;
962 	case BPF_JMP   | BPF_JA:
963 	case BPF_JMP   | BPF_JEQ  | BPF_X:
964 	case BPF_JMP   | BPF_JEQ  | BPF_K:
965 	case BPF_JMP   | BPF_JNE  | BPF_X:
966 	case BPF_JMP   | BPF_JNE  | BPF_K:
967 	case BPF_JMP   | BPF_JSET | BPF_X:
968 	case BPF_JMP   | BPF_JSET | BPF_K:
969 	case BPF_JMP   | BPF_JGT  | BPF_X:
970 	case BPF_JMP   | BPF_JGT  | BPF_K:
971 	case BPF_JMP   | BPF_JGE  | BPF_X:
972 	case BPF_JMP   | BPF_JGE  | BPF_K:
973 	case BPF_JMP   | BPF_JSGT | BPF_X:
974 	case BPF_JMP   | BPF_JSGT | BPF_K:
975 	case BPF_JMP   | BPF_JSGE | BPF_X:
976 	case BPF_JMP   | BPF_JSGE | BPF_K:
977 	case BPF_JMP   | BPF_JLT  | BPF_X:
978 	case BPF_JMP   | BPF_JLT  | BPF_K:
979 	case BPF_JMP   | BPF_JLE  | BPF_X:
980 	case BPF_JMP   | BPF_JLE  | BPF_K:
981 	case BPF_JMP   | BPF_JSLT | BPF_X:
982 	case BPF_JMP   | BPF_JSLT | BPF_K:
983 	case BPF_JMP   | BPF_JSLE | BPF_X:
984 	case BPF_JMP   | BPF_JSLE | BPF_K:
985 	case BPF_JMP32 | BPF_JA:
986 	case BPF_JMP32 | BPF_JEQ  | BPF_X:
987 	case BPF_JMP32 | BPF_JEQ  | BPF_K:
988 	case BPF_JMP32 | BPF_JNE  | BPF_X:
989 	case BPF_JMP32 | BPF_JNE  | BPF_K:
990 	case BPF_JMP32 | BPF_JSET | BPF_X:
991 	case BPF_JMP32 | BPF_JSET | BPF_K:
992 	case BPF_JMP32 | BPF_JGT  | BPF_X:
993 	case BPF_JMP32 | BPF_JGT  | BPF_K:
994 	case BPF_JMP32 | BPF_JGE  | BPF_X:
995 	case BPF_JMP32 | BPF_JGE  | BPF_K:
996 	case BPF_JMP32 | BPF_JSGT | BPF_X:
997 	case BPF_JMP32 | BPF_JSGT | BPF_K:
998 	case BPF_JMP32 | BPF_JSGE | BPF_X:
999 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1000 	case BPF_JMP32 | BPF_JLT  | BPF_X:
1001 	case BPF_JMP32 | BPF_JLT  | BPF_K:
1002 	case BPF_JMP32 | BPF_JLE  | BPF_X:
1003 	case BPF_JMP32 | BPF_JLE  | BPF_K:
1004 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1005 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1006 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1007 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1008 		CHECK_RET(handle_jumps(ctx, insn, &len));
1009 		break;
1010 	case BPF_JMP | BPF_CALL:
1011 		CHECK_RET(handle_call(ctx, insn, &len));
1012 		break;
1013 
1014 	case BPF_JMP | BPF_EXIT:
1015 		/* If this is the last instruction, epilogue will follow. */
1016 		if (is_last_insn(ctx->prog, idx))
1017 			break;
1018 		CHECK_RET(handle_jmp_epilogue(ctx, insn, &len));
1019 		break;
1020 	default:
1021 		pr_err("bpf-jit: can't handle instruction code 0x%02X\n", code);
1022 		return -EOPNOTSUPP;
1023 	}
1024 
1025 	if (BPF_CLASS(code) == BPF_ALU) {
1026 		/*
1027 		 * Skip the "swap" instructions. Even 64-bit swaps are of type
1028 		 * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
1029 		 * has to look at the "size" of the operations rather than the
1030 		 * ALU type. "gen_swap()" specifically takes care of that.
1031 		 */
1032 		if (BPF_OP(code) != BPF_END && ctx->do_zext)
1033 			len += zext(BUF(buf, len), dst);
1034 	}
1035 
1036 	jit_buffer_update(ctx, len);
1037 
1038 	return ret;
1039 }
1040 
1041 static int handle_body(struct jit_context *ctx)
1042 {
1043 	int ret;
1044 	bool populate_bpf2insn = false;
1045 	const struct bpf_prog *prog = ctx->prog;
1046 
1047 	CHECK_RET(jit_buffer_check(ctx));
1048 
1049 	/*
1050 	 * Record the mapping for the instructions during the dry-run.
1051 	 * Doing it this way allows us to have the mapping ready for
1052 	 * the jump instructions during the real compilation phase.
1053 	 */
1054 	if (!ctx->emit)
1055 		populate_bpf2insn = true;
1056 
1057 	for (u32 i = 0; i < prog->len; i++) {
1058 		/* During the dry-run, jit.len grows gradually per BPF insn. */
1059 		if (populate_bpf2insn)
1060 			ctx->bpf2insn[i] = ctx->jit.len;
1061 
1062 		CHECK_RET(handle_insn(ctx, i));
1063 		if (ret > 0) {
1064 			/* "ret" is 1 if two (64-bit) chunks were consumed. */
1065 			ctx->bpf2insn[i + 1] = ctx->bpf2insn[i];
1066 			i++;
1067 		}
1068 	}
1069 
1070 	/* If bpf2insn had to be populated, then it is done at this point. */
1071 	if (populate_bpf2insn)
1072 		ctx->bpf2insn_valid = true;
1073 
1074 	return 0;
1075 }
1076 
1077 /*
1078  * Initialize the memory with "unimp_s" which is the mnemonic for
1079  * "unimplemented" instruction and always raises an exception.
1080  *
1081  * The instruction is 2 bytes. If "size" is odd, there is not much
1082  * that can be done about the last byte in "area". Because, the
1083  * CPU always fetches instructions in two bytes. Therefore, the
1084  * byte beyond the last one is going to accompany it during a
1085  * possible fetch. In the most likely case of a little endian
1086  * system, that beyond-byte will become the major opcode and
1087  * we have no control over its initialisation.
1088  */
1089 static void fill_ill_insn(void *area, unsigned int size)
1090 {
1091 	const u16 unimp_s = 0x79e0;
1092 
1093 	if (size & 1) {
1094 		*((u8 *)area + (size - 1)) = 0xff;
1095 		size -= 1;
1096 	}
1097 
1098 	memset16(area, unimp_s, size >> 1);
1099 }
1100 
1101 /* Piece of memory that can be allocated at the beginning of jit_prepare(). */
1102 static int jit_prepare_early_mem_alloc(struct jit_context *ctx)
1103 {
1104 	ctx->bpf2insn = kcalloc(ctx->prog->len, sizeof(ctx->jit.len),
1105 				GFP_KERNEL);
1106 
1107 	if (!ctx->bpf2insn) {
1108 		pr_err("bpf-jit: could not allocate memory for "
1109 		       "mapping of the instructions.\n");
1110 		return -ENOMEM;
1111 	}
1112 
1113 	return 0;
1114 }
1115 
1116 /*
1117  * Memory allocations that rely on parameters known at the end of
1118  * jit_prepare().
1119  */
1120 static int jit_prepare_final_mem_alloc(struct jit_context *ctx)
1121 {
1122 	const size_t alignment = sizeof(u32);
1123 
1124 	ctx->bpf_header = bpf_jit_binary_alloc(ctx->jit.len, &ctx->jit.buf,
1125 					       alignment, fill_ill_insn);
1126 	if (!ctx->bpf_header) {
1127 		pr_err("bpf-jit: could not allocate memory for translation.\n");
1128 		return -ENOMEM;
1129 	}
1130 
1131 	if (ctx->need_extra_pass) {
1132 		ctx->jit_data = kzalloc_obj(*ctx->jit_data);
1133 		if (!ctx->jit_data)
1134 			return -ENOMEM;
1135 	}
1136 
1137 	return 0;
1138 }
1139 
1140 /*
1141  * The first phase of the translation without actually emitting any
1142  * instruction. It helps in getting a forecast on some aspects, such
1143  * as the length of the whole program or where the epilogue starts.
1144  *
1145  * Whenever the necessary parameters are known, memories are allocated.
1146  */
1147 static int jit_prepare(struct jit_context *ctx)
1148 {
1149 	int ret;
1150 
1151 	/* Dry run. */
1152 	ctx->emit = false;
1153 
1154 	CHECK_RET(jit_prepare_early_mem_alloc(ctx));
1155 
1156 	/* Get the length of prologue section after some register analysis. */
1157 	analyze_reg_usage(ctx);
1158 	CHECK_RET(handle_prologue(ctx));
1159 
1160 	CHECK_RET(handle_body(ctx));
1161 
1162 	/* Record at which offset epilogue begins. */
1163 	ctx->epilogue_offset = ctx->jit.len;
1164 
1165 	/* Process the epilogue section now. */
1166 	CHECK_RET(handle_epilogue(ctx));
1167 
1168 	CHECK_RET(jit_prepare_final_mem_alloc(ctx));
1169 
1170 	return 0;
1171 }
1172 
1173 /*
1174  * jit_compile() is the real compilation phase. jit_prepare() is
1175  * invoked before jit_compile() as a dry-run to make sure everything
1176  * will go OK and allocate the necessary memory.
1177  *
1178  * In the end, jit_compile() checks if it has produced the same number
1179  * of instructions as jit_prepare() would.
1180  */
1181 static int jit_compile(struct jit_context *ctx)
1182 {
1183 	int ret;
1184 
1185 	/* Let there be code. */
1186 	ctx->emit = true;
1187 
1188 	CHECK_RET(handle_prologue(ctx));
1189 
1190 	CHECK_RET(handle_body(ctx));
1191 
1192 	CHECK_RET(handle_epilogue(ctx));
1193 
1194 	if (ctx->jit.index != ctx->jit.len) {
1195 		pr_err("bpf-jit: divergence between the phases; "
1196 		       "%u vs. %u (bytes).\n",
1197 		       ctx->jit.len, ctx->jit.index);
1198 		return -EFAULT;
1199 	}
1200 
1201 	return 0;
1202 }
1203 
1204 /*
1205  * Calling this function implies a successful JIT. A successful
1206  * translation is signaled by setting the right parameters:
1207  *
1208  * prog->jited=1, prog->jited_len=..., prog->bpf_func=...
1209  */
1210 static int jit_finalize(struct jit_context *ctx)
1211 {
1212 	struct bpf_prog *prog = ctx->prog;
1213 
1214 	/* We're going to need this information for the "do_extra_pass()". */
1215 	if (ctx->need_extra_pass) {
1216 		ctx->jit_data->bpf_header = ctx->bpf_header;
1217 		ctx->jit_data->bpf2insn = ctx->bpf2insn;
1218 		prog->aux->jit_data = (void *)ctx->jit_data;
1219 	} else {
1220 		/*
1221 		 * If things seem finalised, then mark the JITed memory
1222 		 * as R-X and flush it.
1223 		 */
1224 		if (bpf_jit_binary_lock_ro(ctx->bpf_header)) {
1225 			pr_err("bpf-jit: Could not lock the JIT memory.\n");
1226 			return -EFAULT;
1227 		}
1228 		flush_icache_range((unsigned long)ctx->bpf_header,
1229 				   (unsigned long)
1230 				   BUF(ctx->jit.buf, ctx->jit.len));
1231 		prog->aux->jit_data = NULL;
1232 		bpf_prog_fill_jited_linfo(prog, ctx->bpf2insn);
1233 	}
1234 
1235 	ctx->success = true;
1236 	prog->bpf_func = (void *)ctx->jit.buf;
1237 	prog->jited_len = ctx->jit.len;
1238 	prog->jited = 1;
1239 
1240 	jit_ctx_cleanup(ctx);
1241 	jit_dump(ctx);
1242 
1243 	return 0;
1244 }
1245 
1246 /*
1247  * A lenient verification for the existence of JIT context in "prog".
1248  * Apparently the JIT internals, namely jit_subprogs() in bpf/verifier.c,
1249  * may request for a second compilation although nothing needs to be done.
1250  */
1251 static inline int check_jit_context(const struct bpf_prog *prog)
1252 {
1253 	if (!prog->aux->jit_data) {
1254 		pr_notice("bpf-jit: no jit data for the extra pass.\n");
1255 		return 1;
1256 	} else {
1257 		return 0;
1258 	}
1259 }
1260 
1261 /* Reuse the previous pass's data. */
1262 static int jit_resume_context(struct jit_context *ctx)
1263 {
1264 	struct arc_jit_data *jdata =
1265 		(struct arc_jit_data *)ctx->prog->aux->jit_data;
1266 
1267 	if (!jdata) {
1268 		pr_err("bpf-jit: no jit data for the extra pass.\n");
1269 		return -EINVAL;
1270 	}
1271 
1272 	ctx->jit.buf = (u8 *)ctx->prog->bpf_func;
1273 	ctx->jit.len = ctx->prog->jited_len;
1274 	ctx->bpf_header = jdata->bpf_header;
1275 	ctx->bpf2insn = (u32 *)jdata->bpf2insn;
1276 	ctx->bpf2insn_valid = ctx->bpf2insn ? true : false;
1277 	ctx->jit_data = jdata;
1278 
1279 	return 0;
1280 }
1281 
1282 /*
1283  * Patch in the new addresses. The instructions of interest are:
1284  *
1285  * - call
1286  * - ld r64, imm64
1287  *
1288  * For "call"s, it resolves the addresses one more time through the
1289  * handle_call().
1290  *
1291  * For 64-bit immediate loads, it just retranslates them, because the BPF
1292  * core in kernel might have changed the value since the normal pass.
1293  */
1294 static int jit_patch_relocations(struct jit_context *ctx)
1295 {
1296 	const u8 bpf_opc_call = BPF_JMP | BPF_CALL;
1297 	const u8 bpf_opc_ldi64 = BPF_LD | BPF_DW | BPF_IMM;
1298 	const struct bpf_prog *prog = ctx->prog;
1299 	int ret;
1300 
1301 	ctx->emit = true;
1302 	for (u32 i = 0; i < prog->len; i++) {
1303 		const struct bpf_insn *insn = &prog->insnsi[i];
1304 		u8 dummy;
1305 		/*
1306 		 * Adjust "ctx.jit.index", so "gen_*()" functions below
1307 		 * can use it for their output addresses.
1308 		 */
1309 		ctx->jit.index = ctx->bpf2insn[i];
1310 
1311 		if (insn->code == bpf_opc_call) {
1312 			CHECK_RET(handle_call(ctx, insn, &dummy));
1313 		} else if (insn->code == bpf_opc_ldi64) {
1314 			CHECK_RET(handle_ld_imm64(ctx, insn, &dummy));
1315 			/* Skip the next instruction. */
1316 			++i;
1317 		}
1318 	}
1319 	return 0;
1320 }
1321 
1322 /*
1323  * A normal pass that involves a "dry-run" phase, jit_prepare(),
1324  * to get the necessary data for the real compilation phase,
1325  * jit_compile().
1326  */
1327 static struct bpf_prog *do_normal_pass(struct bpf_prog *prog)
1328 {
1329 	struct jit_context ctx;
1330 
1331 	/* Bail out if JIT is disabled. */
1332 	if (!prog->jit_requested)
1333 		return prog;
1334 
1335 	if (jit_ctx_init(&ctx, prog)) {
1336 		jit_ctx_cleanup(&ctx);
1337 		return prog;
1338 	}
1339 
1340 	/* Get the lengths and allocate buffer. */
1341 	if (jit_prepare(&ctx)) {
1342 		jit_ctx_cleanup(&ctx);
1343 		return prog;
1344 	}
1345 
1346 	if (jit_compile(&ctx)) {
1347 		jit_ctx_cleanup(&ctx);
1348 		return prog;
1349 	}
1350 
1351 	if (jit_finalize(&ctx)) {
1352 		jit_ctx_cleanup(&ctx);
1353 		return prog;
1354 	}
1355 
1356 	return ctx.prog;
1357 }
1358 
1359 /*
1360  * If there are multi-function BPF programs that call each other,
1361  * their translated addresses are not known all at once. Therefore,
1362  * an extra pass is needed to consult the bpf_jit_get_func_addr()
1363  * again to get the newly translated addresses in order to resolve
1364  * the "call"s.
1365  */
1366 static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
1367 {
1368 	struct jit_context ctx;
1369 
1370 	/* Skip if there's no context to resume from. */
1371 	if (check_jit_context(prog))
1372 		return prog;
1373 
1374 	if (jit_ctx_init(&ctx, prog)) {
1375 		jit_ctx_cleanup(&ctx);
1376 		return prog;
1377 	}
1378 
1379 	if (jit_resume_context(&ctx)) {
1380 		jit_ctx_cleanup(&ctx);
1381 		return prog;
1382 	}
1383 
1384 	if (jit_patch_relocations(&ctx)) {
1385 		jit_ctx_cleanup(&ctx);
1386 		return prog;
1387 	}
1388 
1389 	if (jit_finalize(&ctx)) {
1390 		jit_ctx_cleanup(&ctx);
1391 		return prog;
1392 	}
1393 
1394 	return ctx.prog;
1395 }
1396 
1397 /*
1398  * This function may be invoked twice for the same stream of BPF
1399  * instructions. The "extra pass" happens, when there are
1400  * (re)locations involved that their addresses are not known
1401  * during the first run.
1402  */
1403 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
1404 {
1405 	vm_dump(prog);
1406 
1407 	/* Was this program already translated? */
1408 	if (!prog->jited)
1409 		return do_normal_pass(prog);
1410 	else
1411 		return do_extra_pass(prog);
1412 
1413 	return prog;
1414 }
1415