xref: /linux/kernel/bpf/fixups.c (revision 12e896b9794bbd88f56aeac2a5807ae8d4bb5ad8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <linux/bpf.h>
4 #include <linux/btf.h>
5 #include <linux/bpf_verifier.h>
6 #include <linux/filter.h>
7 #include <linux/vmalloc.h>
8 #include <linux/bsearch.h>
9 #include <linux/sort.h>
10 #include <linux/perf_event.h>
11 #include <net/xdp.h>
12 #include "disasm.h"
13 
14 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
15 
16 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
17 {
18 	return BPF_CLASS(insn->code) == BPF_STX &&
19 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
20 	       insn->imm == BPF_CMPXCHG;
21 }
22 
23 /* Return the regno defined by the insn, or -1. */
24 static int insn_def_regno(const struct bpf_insn *insn)
25 {
26 	switch (BPF_CLASS(insn->code)) {
27 	case BPF_JMP:
28 	case BPF_JMP32:
29 	case BPF_ST:
30 		return -1;
31 	case BPF_STX:
32 		if (BPF_MODE(insn->code) == BPF_ATOMIC ||
33 		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
34 			if (insn->imm == BPF_CMPXCHG)
35 				return BPF_REG_0;
36 			else if (insn->imm == BPF_LOAD_ACQ)
37 				return insn->dst_reg;
38 			else if (insn->imm & BPF_FETCH)
39 				return insn->src_reg;
40 		}
41 		return -1;
42 	default:
43 		return insn->dst_reg;
44 	}
45 }
46 
47 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
48 static bool insn_has_def32(struct bpf_insn *insn)
49 {
50 	int dst_reg = insn_def_regno(insn);
51 
52 	if (dst_reg == -1)
53 		return false;
54 
55 	return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
56 }
57 
58 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
59 {
60 	const struct bpf_kfunc_desc *d0 = a;
61 	const struct bpf_kfunc_desc *d1 = b;
62 
63 	if (d0->imm != d1->imm)
64 		return d0->imm < d1->imm ? -1 : 1;
65 	if (d0->offset != d1->offset)
66 		return d0->offset < d1->offset ? -1 : 1;
67 	return 0;
68 }
69 
70 const struct btf_func_model *
71 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
72 			 const struct bpf_insn *insn)
73 {
74 	const struct bpf_kfunc_desc desc = {
75 		.imm = insn->imm,
76 		.offset = insn->off,
77 	};
78 	const struct bpf_kfunc_desc *res;
79 	struct bpf_kfunc_desc_tab *tab;
80 
81 	tab = prog->aux->kfunc_tab;
82 	res = bsearch(&desc, tab->descs, tab->nr_descs,
83 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
84 
85 	return res ? &res->func_model : NULL;
86 }
87 
88 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
89 {
90 	unsigned long call_imm;
91 
92 	if (bpf_jit_supports_far_kfunc_call()) {
93 		call_imm = desc->func_id;
94 	} else {
95 		call_imm = BPF_CALL_IMM(desc->addr);
96 		/* Check whether the relative offset overflows desc->imm */
97 		if ((unsigned long)(s32)call_imm != call_imm) {
98 			verbose(env, "address of kernel func_id %u is out of range\n",
99 				desc->func_id);
100 			return -EINVAL;
101 		}
102 	}
103 	desc->imm = call_imm;
104 	return 0;
105 }
106 
107 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
108 {
109 	struct bpf_kfunc_desc_tab *tab;
110 	int i, err;
111 
112 	tab = env->prog->aux->kfunc_tab;
113 	if (!tab)
114 		return 0;
115 
116 	for (i = 0; i < tab->nr_descs; i++) {
117 		err = set_kfunc_desc_imm(env, &tab->descs[i]);
118 		if (err)
119 			return err;
120 	}
121 
122 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
123 	     kfunc_desc_cmp_by_imm_off, NULL);
124 	return 0;
125 }
126 
127 static int add_kfunc_in_insns(struct bpf_verifier_env *env,
128 			      struct bpf_insn *insn, int cnt)
129 {
130 	int i, ret;
131 
132 	for (i = 0; i < cnt; i++, insn++) {
133 		if (bpf_pseudo_kfunc_call(insn)) {
134 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
135 			if (ret < 0)
136 				return ret;
137 		}
138 	}
139 	return 0;
140 }
141 
142 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
143 static int get_callee_stack_depth(struct bpf_verifier_env *env,
144 				  const struct bpf_insn *insn, int idx)
145 {
146 	int start = idx + insn->imm + 1, subprog;
147 
148 	subprog = bpf_find_subprog(env, start);
149 	if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
150 		return -EFAULT;
151 	return env->subprog_info[subprog].stack_depth;
152 }
153 #endif
154 
155 /* single env->prog->insni[off] instruction was replaced with the range
156  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
157  * [0, off) and [off, end) to new locations, so the patched range stays zero
158  */
159 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
160 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
161 {
162 	struct bpf_insn_aux_data *data = env->insn_aux_data;
163 	struct bpf_insn *insn = new_prog->insnsi;
164 	u32 old_seen = data[off].seen;
165 	u32 prog_len;
166 	int i;
167 
168 	/* aux info at OFF always needs adjustment, no matter fast path
169 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
170 	 * original insn at old prog.
171 	 */
172 	data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
173 
174 	if (cnt == 1)
175 		return;
176 	prog_len = new_prog->len;
177 
178 	memmove(data + off + cnt - 1, data + off,
179 		sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
180 	memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
181 	for (i = off; i < off + cnt - 1; i++) {
182 		/* Expand insni[off]'s seen count to the patched range. */
183 		data[i].seen = old_seen;
184 		data[i].zext_dst = insn_has_def32(insn + i);
185 	}
186 
187 	/*
188 	 * The indirect_target flag of the original instruction was moved to the last of the
189 	 * new instructions by the above memmove and memset, but the indirect jump target is
190 	 * actually the first instruction, so move it back. This also matches with the behavior
191 	 * of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new
192 	 * instruction.
193 	 */
194 	if (data[off + cnt - 1].indirect_target) {
195 		data[off].indirect_target = 1;
196 		data[off + cnt - 1].indirect_target = 0;
197 	}
198 }
199 
200 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
201 {
202 	int i;
203 
204 	if (len == 1)
205 		return;
206 	/* NOTE: fake 'exit' subprog should be updated as well. */
207 	for (i = 0; i <= env->subprog_cnt; i++) {
208 		if (env->subprog_info[i].start <= off)
209 			continue;
210 		env->subprog_info[i].start += len - 1;
211 	}
212 }
213 
214 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
215 {
216 	int i;
217 
218 	if (len == 1)
219 		return;
220 
221 	for (i = 0; i < env->insn_array_map_cnt; i++)
222 		bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
223 }
224 
225 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
226 {
227 	int i;
228 
229 	for (i = 0; i < env->insn_array_map_cnt; i++)
230 		bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
231 }
232 
233 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
234 {
235 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
236 	int i, sz = prog->aux->size_poke_tab;
237 	struct bpf_jit_poke_descriptor *desc;
238 
239 	for (i = 0; i < sz; i++) {
240 		desc = &tab[i];
241 		if (desc->insn_idx <= off)
242 			continue;
243 		desc->insn_idx += len - 1;
244 	}
245 }
246 
247 struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
248 				     const struct bpf_insn *patch, u32 len)
249 {
250 	struct bpf_prog *new_prog;
251 	struct bpf_insn_aux_data *new_data = NULL;
252 
253 	if (len > 1) {
254 		new_data = vrealloc(env->insn_aux_data,
255 				    array_size(env->prog->len + len - 1,
256 					       sizeof(struct bpf_insn_aux_data)),
257 				    GFP_KERNEL_ACCOUNT | __GFP_ZERO);
258 		if (!new_data)
259 			return NULL;
260 
261 		env->insn_aux_data = new_data;
262 	}
263 
264 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
265 	if (IS_ERR(new_prog)) {
266 		if (PTR_ERR(new_prog) == -ERANGE)
267 			verbose(env,
268 				"insn %d cannot be patched due to 16-bit range\n",
269 				env->insn_aux_data[off].orig_idx);
270 		return NULL;
271 	}
272 	adjust_insn_aux_data(env, new_prog, off, len);
273 	adjust_subprog_starts(env, off, len);
274 	adjust_insn_arrays(env, off, len);
275 	adjust_poke_descs(new_prog, off, len);
276 	return new_prog;
277 }
278 
279 /*
280  * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
281  * jump offset by 'delta'.
282  */
283 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
284 {
285 	struct bpf_insn *insn = prog->insnsi;
286 	u32 insn_cnt = prog->len, i;
287 	s32 imm;
288 	s16 off;
289 
290 	for (i = 0; i < insn_cnt; i++, insn++) {
291 		u8 code = insn->code;
292 
293 		if (tgt_idx <= i && i < tgt_idx + delta)
294 			continue;
295 
296 		if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
297 		    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
298 			continue;
299 
300 		if (insn->code == (BPF_JMP32 | BPF_JA)) {
301 			if (i + 1 + insn->imm != tgt_idx)
302 				continue;
303 			if (check_add_overflow(insn->imm, delta, &imm))
304 				return -ERANGE;
305 			insn->imm = imm;
306 		} else {
307 			if (i + 1 + insn->off != tgt_idx)
308 				continue;
309 			if (check_add_overflow(insn->off, delta, &off))
310 				return -ERANGE;
311 			insn->off = off;
312 		}
313 	}
314 	return 0;
315 }
316 
317 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
318 					      u32 off, u32 cnt)
319 {
320 	int i, j;
321 
322 	/* find first prog starting at or after off (first to remove) */
323 	for (i = 0; i < env->subprog_cnt; i++)
324 		if (env->subprog_info[i].start >= off)
325 			break;
326 	/* find first prog starting at or after off + cnt (first to stay) */
327 	for (j = i; j < env->subprog_cnt; j++)
328 		if (env->subprog_info[j].start >= off + cnt)
329 			break;
330 	/* if j doesn't start exactly at off + cnt, we are just removing
331 	 * the front of previous prog
332 	 */
333 	if (env->subprog_info[j].start != off + cnt)
334 		j--;
335 
336 	if (j > i) {
337 		struct bpf_prog_aux *aux = env->prog->aux;
338 		int move;
339 
340 		/* move fake 'exit' subprog as well */
341 		move = env->subprog_cnt + 1 - j;
342 
343 		memmove(env->subprog_info + i,
344 			env->subprog_info + j,
345 			sizeof(*env->subprog_info) * move);
346 		env->subprog_cnt -= j - i;
347 
348 		/* remove func_info */
349 		if (aux->func_info) {
350 			move = aux->func_info_cnt - j;
351 
352 			memmove(aux->func_info + i,
353 				aux->func_info + j,
354 				sizeof(*aux->func_info) * move);
355 			aux->func_info_cnt -= j - i;
356 			/* func_info->insn_off is set after all code rewrites,
357 			 * in adjust_btf_func() - no need to adjust
358 			 */
359 		}
360 	} else {
361 		/* convert i from "first prog to remove" to "first to adjust" */
362 		if (env->subprog_info[i].start == off)
363 			i++;
364 	}
365 
366 	/* update fake 'exit' subprog as well */
367 	for (; i <= env->subprog_cnt; i++)
368 		env->subprog_info[i].start -= cnt;
369 
370 	return 0;
371 }
372 
373 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
374 				      u32 cnt)
375 {
376 	struct bpf_prog *prog = env->prog;
377 	u32 i, l_off, l_cnt, nr_linfo;
378 	struct bpf_line_info *linfo;
379 
380 	nr_linfo = prog->aux->nr_linfo;
381 	if (!nr_linfo)
382 		return 0;
383 
384 	linfo = prog->aux->linfo;
385 
386 	/* find first line info to remove, count lines to be removed */
387 	for (i = 0; i < nr_linfo; i++)
388 		if (linfo[i].insn_off >= off)
389 			break;
390 
391 	l_off = i;
392 	l_cnt = 0;
393 	for (; i < nr_linfo; i++)
394 		if (linfo[i].insn_off < off + cnt)
395 			l_cnt++;
396 		else
397 			break;
398 
399 	/* First live insn doesn't match first live linfo, it needs to "inherit"
400 	 * last removed linfo.  prog is already modified, so prog->len == off
401 	 * means no live instructions after (tail of the program was removed).
402 	 */
403 	if (prog->len != off && l_cnt &&
404 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
405 		l_cnt--;
406 		linfo[--i].insn_off = off + cnt;
407 	}
408 
409 	/* remove the line info which refer to the removed instructions */
410 	if (l_cnt) {
411 		memmove(linfo + l_off, linfo + i,
412 			sizeof(*linfo) * (nr_linfo - i));
413 
414 		prog->aux->nr_linfo -= l_cnt;
415 		nr_linfo = prog->aux->nr_linfo;
416 	}
417 
418 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
419 	for (i = l_off; i < nr_linfo; i++)
420 		linfo[i].insn_off -= cnt;
421 
422 	/* fix up all subprogs (incl. 'exit') which start >= off */
423 	for (i = 0; i <= env->subprog_cnt; i++)
424 		if (env->subprog_info[i].linfo_idx > l_off) {
425 			/* program may have started in the removed region but
426 			 * may not be fully removed
427 			 */
428 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
429 				env->subprog_info[i].linfo_idx -= l_cnt;
430 			else
431 				env->subprog_info[i].linfo_idx = l_off;
432 		}
433 
434 	return 0;
435 }
436 
437 /*
438  * Clean up dynamically allocated fields of aux data for instructions [start, ...]
439  */
440 void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
441 {
442 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
443 	struct bpf_insn *insns = env->prog->insnsi;
444 	int end = start + len;
445 	int i;
446 
447 	for (i = start; i < end; i++) {
448 		if (aux_data[i].jt) {
449 			kvfree(aux_data[i].jt);
450 			aux_data[i].jt = NULL;
451 		}
452 
453 		if (bpf_is_ldimm64(&insns[i]))
454 			i++;
455 	}
456 }
457 
458 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
459 {
460 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
461 	unsigned int orig_prog_len = env->prog->len;
462 	int err;
463 
464 	if (bpf_prog_is_offloaded(env->prog->aux))
465 		bpf_prog_offload_remove_insns(env, off, cnt);
466 
467 	/* Should be called before bpf_remove_insns, as it uses prog->insnsi */
468 	bpf_clear_insn_aux_data(env, off, cnt);
469 
470 	err = bpf_remove_insns(env->prog, off, cnt);
471 	if (err)
472 		return err;
473 
474 	err = adjust_subprog_starts_after_remove(env, off, cnt);
475 	if (err)
476 		return err;
477 
478 	err = bpf_adj_linfo_after_remove(env, off, cnt);
479 	if (err)
480 		return err;
481 
482 	adjust_insn_arrays_after_remove(env, off, cnt);
483 
484 	memmove(aux_data + off,	aux_data + off + cnt,
485 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
486 
487 	return 0;
488 }
489 
490 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
491 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
492 
493 bool bpf_insn_is_cond_jump(u8 code)
494 {
495 	u8 op;
496 
497 	op = BPF_OP(code);
498 	if (BPF_CLASS(code) == BPF_JMP32)
499 		return op != BPF_JA;
500 
501 	if (BPF_CLASS(code) != BPF_JMP)
502 		return false;
503 
504 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
505 }
506 
507 void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
508 {
509 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
510 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
511 	struct bpf_insn *insn = env->prog->insnsi;
512 	const int insn_cnt = env->prog->len;
513 	int i;
514 
515 	for (i = 0; i < insn_cnt; i++, insn++) {
516 		if (!bpf_insn_is_cond_jump(insn->code))
517 			continue;
518 
519 		if (!aux_data[i + 1].seen)
520 			ja.off = insn->off;
521 		else if (!aux_data[i + 1 + insn->off].seen)
522 			ja.off = 0;
523 		else
524 			continue;
525 
526 		if (bpf_prog_is_offloaded(env->prog->aux))
527 			bpf_prog_offload_replace_insn(env, i, &ja);
528 
529 		memcpy(insn, &ja, sizeof(ja));
530 	}
531 }
532 
533 int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
534 {
535 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
536 	int insn_cnt = env->prog->len;
537 	int i, err;
538 
539 	for (i = 0; i < insn_cnt; i++) {
540 		int j;
541 
542 		j = 0;
543 		while (i + j < insn_cnt && !aux_data[i + j].seen)
544 			j++;
545 		if (!j)
546 			continue;
547 
548 		err = verifier_remove_insns(env, i, j);
549 		if (err)
550 			return err;
551 		insn_cnt = env->prog->len;
552 	}
553 
554 	return 0;
555 }
556 
557 int bpf_opt_remove_nops(struct bpf_verifier_env *env)
558 {
559 	struct bpf_insn *insn = env->prog->insnsi;
560 	int insn_cnt = env->prog->len;
561 	bool is_may_goto_0, is_ja;
562 	int i, err;
563 
564 	for (i = 0; i < insn_cnt; i++) {
565 		is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
566 		is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
567 
568 		if (!is_may_goto_0 && !is_ja)
569 			continue;
570 
571 		err = verifier_remove_insns(env, i, 1);
572 		if (err)
573 			return err;
574 		insn_cnt--;
575 		/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
576 		i -= (is_may_goto_0 && i > 0) ? 2 : 1;
577 	}
578 
579 	return 0;
580 }
581 
582 int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
583 					 const union bpf_attr *attr)
584 {
585 	struct bpf_insn *patch;
586 	/* use env->insn_buf as two independent buffers */
587 	struct bpf_insn *zext_patch = env->insn_buf;
588 	struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
589 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
590 	int i, patch_len, delta = 0, len = env->prog->len;
591 	struct bpf_insn *insns = env->prog->insnsi;
592 	struct bpf_prog *new_prog;
593 	bool rnd_hi32;
594 
595 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
596 	zext_patch[1] = BPF_ZEXT_REG(0);
597 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
598 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
599 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
600 	for (i = 0; i < len; i++) {
601 		int adj_idx = i + delta;
602 		struct bpf_insn insn;
603 		int load_reg;
604 
605 		insn = insns[adj_idx];
606 		load_reg = insn_def_regno(&insn);
607 		if (!aux[adj_idx].zext_dst) {
608 			u8 code, class;
609 			u32 imm_rnd;
610 
611 			if (!rnd_hi32)
612 				continue;
613 
614 			code = insn.code;
615 			class = BPF_CLASS(code);
616 			if (load_reg == -1)
617 				continue;
618 
619 			/* NOTE: arg "reg" (the fourth one) is only used for
620 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
621 			 *       here.
622 			 */
623 			if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
624 				if (class == BPF_LD &&
625 				    BPF_MODE(code) == BPF_IMM)
626 					i++;
627 				continue;
628 			}
629 
630 			/* ctx load could be transformed into wider load. */
631 			if (class == BPF_LDX &&
632 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
633 				continue;
634 
635 			imm_rnd = get_random_u32();
636 			rnd_hi32_patch[0] = insn;
637 			rnd_hi32_patch[1].imm = imm_rnd;
638 			rnd_hi32_patch[3].dst_reg = load_reg;
639 			patch = rnd_hi32_patch;
640 			patch_len = 4;
641 			goto apply_patch_buffer;
642 		}
643 
644 		/* Add in an zero-extend instruction if a) the JIT has requested
645 		 * it or b) it's a CMPXCHG.
646 		 *
647 		 * The latter is because: BPF_CMPXCHG always loads a value into
648 		 * R0, therefore always zero-extends. However some archs'
649 		 * equivalent instruction only does this load when the
650 		 * comparison is successful. This detail of CMPXCHG is
651 		 * orthogonal to the general zero-extension behaviour of the
652 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
653 		 */
654 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
655 			continue;
656 
657 		/* Zero-extension is done by the caller. */
658 		if (bpf_pseudo_kfunc_call(&insn))
659 			continue;
660 
661 		if (verifier_bug_if(load_reg == -1, env,
662 				    "zext_dst is set, but no reg is defined"))
663 			return -EFAULT;
664 
665 		zext_patch[0] = insn;
666 		zext_patch[1].dst_reg = load_reg;
667 		zext_patch[1].src_reg = load_reg;
668 		patch = zext_patch;
669 		patch_len = 2;
670 apply_patch_buffer:
671 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
672 		if (!new_prog)
673 			return -ENOMEM;
674 		env->prog = new_prog;
675 		insns = new_prog->insnsi;
676 		aux = env->insn_aux_data;
677 		delta += patch_len - 1;
678 	}
679 
680 	return 0;
681 }
682 
683 /* convert load instructions that access fields of a context type into a
684  * sequence of instructions that access fields of the underlying structure:
685  *     struct __sk_buff    -> struct sk_buff
686  *     struct bpf_sock_ops -> struct sock
687  */
688 int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
689 {
690 	struct bpf_subprog_info *subprogs = env->subprog_info;
691 	const struct bpf_verifier_ops *ops = env->ops;
692 	int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
693 	const int insn_cnt = env->prog->len;
694 	struct bpf_insn *epilogue_buf = env->epilogue_buf;
695 	struct bpf_insn *insn_buf = env->insn_buf;
696 	struct bpf_insn *insn;
697 	u32 target_size, size_default, off;
698 	struct bpf_prog *new_prog;
699 	enum bpf_access_type type;
700 	bool is_narrower_load;
701 	int epilogue_idx = 0;
702 
703 	if (ops->gen_epilogue) {
704 		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
705 						 -(subprogs[0].stack_depth + 8));
706 		if (epilogue_cnt >= INSN_BUF_SIZE) {
707 			verifier_bug(env, "epilogue is too long");
708 			return -EFAULT;
709 		} else if (epilogue_cnt) {
710 			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
711 			cnt = 0;
712 			subprogs[0].stack_depth += 8;
713 			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
714 						      -subprogs[0].stack_depth);
715 			insn_buf[cnt++] = env->prog->insnsi[0];
716 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
717 			if (!new_prog)
718 				return -ENOMEM;
719 			env->prog = new_prog;
720 			delta += cnt - 1;
721 
722 			ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
723 			if (ret < 0)
724 				return ret;
725 		}
726 	}
727 
728 	if (ops->gen_prologue || env->seen_direct_write) {
729 		if (!ops->gen_prologue) {
730 			verifier_bug(env, "gen_prologue is null");
731 			return -EFAULT;
732 		}
733 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
734 					env->prog);
735 		if (cnt >= INSN_BUF_SIZE) {
736 			verifier_bug(env, "prologue is too long");
737 			return -EFAULT;
738 		} else if (cnt) {
739 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
740 			if (!new_prog)
741 				return -ENOMEM;
742 
743 			env->prog = new_prog;
744 			delta += cnt - 1;
745 
746 			ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
747 			if (ret < 0)
748 				return ret;
749 		}
750 	}
751 
752 	if (delta)
753 		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
754 
755 	if (bpf_prog_is_offloaded(env->prog->aux))
756 		return 0;
757 
758 	insn = env->prog->insnsi + delta;
759 
760 	for (i = 0; i < insn_cnt; i++, insn++) {
761 		bpf_convert_ctx_access_t convert_ctx_access;
762 		u8 mode;
763 
764 		if (env->insn_aux_data[i + delta].nospec) {
765 			WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
766 			struct bpf_insn *patch = insn_buf;
767 
768 			*patch++ = BPF_ST_NOSPEC();
769 			*patch++ = *insn;
770 			cnt = patch - insn_buf;
771 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
772 			if (!new_prog)
773 				return -ENOMEM;
774 
775 			delta    += cnt - 1;
776 			env->prog = new_prog;
777 			insn      = new_prog->insnsi + i + delta;
778 			/* This can not be easily merged with the
779 			 * nospec_result-case, because an insn may require a
780 			 * nospec before and after itself. Therefore also do not
781 			 * 'continue' here but potentially apply further
782 			 * patching to insn. *insn should equal patch[1] now.
783 			 */
784 		}
785 
786 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
787 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
788 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
789 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
790 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
791 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
792 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
793 			type = BPF_READ;
794 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
795 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
796 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
797 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
798 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
799 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
800 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
801 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
802 			type = BPF_WRITE;
803 		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
804 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
805 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
806 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
807 			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
808 			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
809 			env->prog->aux->num_exentries++;
810 			continue;
811 		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
812 			   epilogue_cnt &&
813 			   i + delta < subprogs[1].start) {
814 			/* Generate epilogue for the main prog */
815 			if (epilogue_idx) {
816 				/* jump back to the earlier generated epilogue */
817 				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
818 				cnt = 1;
819 			} else {
820 				memcpy(insn_buf, epilogue_buf,
821 				       epilogue_cnt * sizeof(*epilogue_buf));
822 				cnt = epilogue_cnt;
823 				/* epilogue_idx cannot be 0. It must have at
824 				 * least one ctx ptr saving insn before the
825 				 * epilogue.
826 				 */
827 				epilogue_idx = i + delta;
828 			}
829 			goto patch_insn_buf;
830 		} else {
831 			continue;
832 		}
833 
834 		if (type == BPF_WRITE &&
835 		    env->insn_aux_data[i + delta].nospec_result) {
836 			/* nospec_result is only used to mitigate Spectre v4 and
837 			 * to limit verification-time for Spectre v1.
838 			 */
839 			struct bpf_insn *patch = insn_buf;
840 
841 			*patch++ = *insn;
842 			*patch++ = BPF_ST_NOSPEC();
843 			cnt = patch - insn_buf;
844 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
845 			if (!new_prog)
846 				return -ENOMEM;
847 
848 			delta    += cnt - 1;
849 			env->prog = new_prog;
850 			insn      = new_prog->insnsi + i + delta;
851 			continue;
852 		}
853 
854 		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
855 		case PTR_TO_CTX:
856 			if (!ops->convert_ctx_access)
857 				continue;
858 			convert_ctx_access = ops->convert_ctx_access;
859 			break;
860 		case PTR_TO_SOCKET:
861 		case PTR_TO_SOCK_COMMON:
862 			convert_ctx_access = bpf_sock_convert_ctx_access;
863 			break;
864 		case PTR_TO_TCP_SOCK:
865 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
866 			break;
867 		case PTR_TO_XDP_SOCK:
868 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
869 			break;
870 		case PTR_TO_BTF_ID:
871 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
872 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
873 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
874 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
875 		 * any faults for loads into such types. BPF_WRITE is disallowed
876 		 * for this case.
877 		 */
878 		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
879 		case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
880 			if (type == BPF_READ) {
881 				if (BPF_MODE(insn->code) == BPF_MEM)
882 					insn->code = BPF_LDX | BPF_PROBE_MEM |
883 						     BPF_SIZE((insn)->code);
884 				else
885 					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
886 						     BPF_SIZE((insn)->code);
887 				env->prog->aux->num_exentries++;
888 			}
889 			continue;
890 		case PTR_TO_ARENA:
891 			if (BPF_MODE(insn->code) == BPF_MEMSX) {
892 				if (!bpf_jit_supports_insn(insn, true)) {
893 					verbose(env, "sign extending loads from arena are not supported yet\n");
894 					return -EOPNOTSUPP;
895 				}
896 				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
897 			} else {
898 				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
899 			}
900 			env->prog->aux->num_exentries++;
901 			continue;
902 		default:
903 			continue;
904 		}
905 
906 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
907 		size = BPF_LDST_BYTES(insn);
908 		mode = BPF_MODE(insn->code);
909 
910 		/* If the read access is a narrower load of the field,
911 		 * convert to a 4/8-byte load, to minimum program type specific
912 		 * convert_ctx_access changes. If conversion is successful,
913 		 * we will apply proper mask to the result.
914 		 */
915 		is_narrower_load = size < ctx_field_size;
916 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
917 		off = insn->off;
918 		if (is_narrower_load) {
919 			u8 size_code;
920 
921 			if (type == BPF_WRITE) {
922 				verifier_bug(env, "narrow ctx access misconfigured");
923 				return -EFAULT;
924 			}
925 
926 			size_code = BPF_H;
927 			if (ctx_field_size == 4)
928 				size_code = BPF_W;
929 			else if (ctx_field_size == 8)
930 				size_code = BPF_DW;
931 
932 			insn->off = off & ~(size_default - 1);
933 			insn->code = BPF_LDX | BPF_MEM | size_code;
934 		}
935 
936 		target_size = 0;
937 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
938 					 &target_size);
939 		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
940 		    (ctx_field_size && !target_size)) {
941 			verifier_bug(env, "error during ctx access conversion (%d)", cnt);
942 			return -EFAULT;
943 		}
944 
945 		if (is_narrower_load && size < target_size) {
946 			u8 shift = bpf_ctx_narrow_access_offset(
947 				off, size, size_default) * 8;
948 			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
949 				verifier_bug(env, "narrow ctx load misconfigured");
950 				return -EFAULT;
951 			}
952 			if (ctx_field_size <= 4) {
953 				if (shift)
954 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
955 									insn->dst_reg,
956 									shift);
957 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
958 								(1 << size * 8) - 1);
959 			} else {
960 				if (shift)
961 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
962 									insn->dst_reg,
963 									shift);
964 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
965 								(1ULL << size * 8) - 1);
966 			}
967 		}
968 		if (mode == BPF_MEMSX)
969 			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
970 						       insn->dst_reg, insn->dst_reg,
971 						       size * 8, 0);
972 
973 patch_insn_buf:
974 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
975 		if (!new_prog)
976 			return -ENOMEM;
977 
978 		delta += cnt - 1;
979 
980 		/* keep walking new program and skip insns we just inserted */
981 		env->prog = new_prog;
982 		insn      = new_prog->insnsi + i + delta;
983 	}
984 
985 	return 0;
986 }
987 
988 static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env)
989 {
990 	u32 *starts = NULL;
991 
992 	starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
993 	if (starts) {
994 		for (int i = 0; i < env->subprog_cnt; i++)
995 			starts[i] = env->subprog_info[i].start;
996 	}
997 	return starts;
998 }
999 
1000 static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts)
1001 {
1002 	for (int i = 0; i < env->subprog_cnt; i++)
1003 		env->subprog_info[i].start = orig_starts[i];
1004 	/* restore the start of fake 'exit' subprog as well */
1005 	env->subprog_info[env->subprog_cnt].start = env->prog->len;
1006 }
1007 
1008 struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
1009 {
1010 	size_t size;
1011 	void *new_aux;
1012 
1013 	size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len);
1014 	new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT);
1015 	if (new_aux)
1016 		memcpy(new_aux, env->insn_aux_data, size);
1017 	return new_aux;
1018 }
1019 
1020 void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
1021 			       struct bpf_insn_aux_data *orig_insn_aux)
1022 {
1023 	/* the expanded elements are zero-filled, so no special handling is required */
1024 	vfree(env->insn_aux_data);
1025 	env->insn_aux_data = orig_insn_aux;
1026 }
1027 
1028 static int jit_subprogs(struct bpf_verifier_env *env)
1029 {
1030 	struct bpf_prog *prog = env->prog, **func, *tmp;
1031 	int i, j, subprog_start, subprog_end = 0, len, subprog;
1032 	struct bpf_map *map_ptr;
1033 	struct bpf_insn *insn;
1034 	void *old_bpf_func;
1035 	int err, num_exentries;
1036 
1037 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1038 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
1039 			continue;
1040 
1041 		/* Upon error here we cannot fall back to interpreter but
1042 		 * need a hard reject of the program. Thus -EFAULT is
1043 		 * propagated in any case.
1044 		 */
1045 		subprog = bpf_find_subprog(env, i + insn->imm + 1);
1046 		if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
1047 				    i + insn->imm + 1))
1048 			return -EFAULT;
1049 		/* temporarily remember subprog id inside insn instead of
1050 		 * aux_data, since next loop will split up all insns into funcs
1051 		 */
1052 		insn->off = subprog;
1053 		/* remember original imm in case JIT fails and fallback
1054 		 * to interpreter will be needed
1055 		 */
1056 		env->insn_aux_data[i].call_imm = insn->imm;
1057 		/* point imm to __bpf_call_base+1 from JITs point of view */
1058 		insn->imm = 1;
1059 		if (bpf_pseudo_func(insn)) {
1060 #if defined(MODULES_VADDR)
1061 			u64 addr = MODULES_VADDR;
1062 #else
1063 			u64 addr = VMALLOC_START;
1064 #endif
1065 			/* jit (e.g. x86_64) may emit fewer instructions
1066 			 * if it learns a u32 imm is the same as a u64 imm.
1067 			 * Set close enough to possible prog address.
1068 			 */
1069 			insn[0].imm = (u32)addr;
1070 			insn[1].imm = addr >> 32;
1071 		}
1072 	}
1073 
1074 	err = bpf_prog_alloc_jited_linfo(prog);
1075 	if (err)
1076 		goto out_undo_insn;
1077 
1078 	err = -ENOMEM;
1079 	func = kzalloc_objs(prog, env->subprog_cnt);
1080 	if (!func)
1081 		goto out_undo_insn;
1082 
1083 	for (i = 0; i < env->subprog_cnt; i++) {
1084 		subprog_start = subprog_end;
1085 		subprog_end = env->subprog_info[i + 1].start;
1086 
1087 		len = subprog_end - subprog_start;
1088 		/* bpf_prog_run() doesn't call subprogs directly,
1089 		 * hence main prog stats include the runtime of subprogs.
1090 		 * subprogs don't have IDs and not reachable via prog_get_next_id
1091 		 * func[i]->stats will never be accessed and stays NULL
1092 		 */
1093 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1094 		if (!func[i])
1095 			goto out_free;
1096 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
1097 		       len * sizeof(struct bpf_insn));
1098 		func[i]->type = prog->type;
1099 		func[i]->len = len;
1100 		if (bpf_prog_calc_tag(func[i]))
1101 			goto out_free;
1102 		func[i]->is_func = 1;
1103 		func[i]->sleepable = prog->sleepable;
1104 		func[i]->blinded = prog->blinded;
1105 		func[i]->aux->func_idx = i;
1106 		/* Below members will be freed only at prog->aux */
1107 		func[i]->aux->btf = prog->aux->btf;
1108 		func[i]->aux->subprog_start = subprog_start;
1109 		func[i]->aux->func_info = prog->aux->func_info;
1110 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
1111 		func[i]->aux->poke_tab = prog->aux->poke_tab;
1112 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
1113 		func[i]->aux->main_prog_aux = prog->aux;
1114 
1115 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
1116 			struct bpf_jit_poke_descriptor *poke;
1117 
1118 			poke = &prog->aux->poke_tab[j];
1119 			if (poke->insn_idx < subprog_end &&
1120 			    poke->insn_idx >= subprog_start)
1121 				poke->aux = func[i]->aux;
1122 		}
1123 
1124 		func[i]->aux->name[0] = 'F';
1125 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1126 		if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
1127 			func[i]->aux->jits_use_priv_stack = true;
1128 
1129 		func[i]->jit_requested = 1;
1130 		func[i]->blinding_requested = prog->blinding_requested;
1131 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
1132 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
1133 		func[i]->aux->linfo = prog->aux->linfo;
1134 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
1135 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
1136 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1137 		func[i]->aux->arena = prog->aux->arena;
1138 		func[i]->aux->used_maps = env->used_maps;
1139 		func[i]->aux->used_map_cnt = env->used_map_cnt;
1140 		num_exentries = 0;
1141 		insn = func[i]->insnsi;
1142 		for (j = 0; j < func[i]->len; j++, insn++) {
1143 			if (BPF_CLASS(insn->code) == BPF_LDX &&
1144 			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1145 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1146 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
1147 			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
1148 				num_exentries++;
1149 			if ((BPF_CLASS(insn->code) == BPF_STX ||
1150 			     BPF_CLASS(insn->code) == BPF_ST) &&
1151 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32)
1152 				num_exentries++;
1153 			if (BPF_CLASS(insn->code) == BPF_STX &&
1154 			     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
1155 				num_exentries++;
1156 		}
1157 		func[i]->aux->num_exentries = num_exentries;
1158 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1159 		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
1160 		func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
1161 		func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
1162 		func[i]->aux->token = prog->aux->token;
1163 		if (!i)
1164 			func[i]->aux->exception_boundary = env->seen_exception;
1165 		func[i] = bpf_int_jit_compile(env, func[i]);
1166 		if (!func[i]->jited) {
1167 			err = -ENOTSUPP;
1168 			goto out_free;
1169 		}
1170 		cond_resched();
1171 	}
1172 
1173 	/* at this point all bpf functions were successfully JITed
1174 	 * now populate all bpf_calls with correct addresses and
1175 	 * run last pass of JIT
1176 	 */
1177 	for (i = 0; i < env->subprog_cnt; i++) {
1178 		insn = func[i]->insnsi;
1179 		for (j = 0; j < func[i]->len; j++, insn++) {
1180 			if (bpf_pseudo_func(insn)) {
1181 				subprog = insn->off;
1182 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
1183 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
1184 				continue;
1185 			}
1186 			if (!bpf_pseudo_call(insn))
1187 				continue;
1188 			subprog = insn->off;
1189 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1190 		}
1191 
1192 		/* we use the aux data to keep a list of the start addresses
1193 		 * of the JITed images for each function in the program
1194 		 *
1195 		 * for some architectures, such as powerpc64, the imm field
1196 		 * might not be large enough to hold the offset of the start
1197 		 * address of the callee's JITed image from __bpf_call_base
1198 		 *
1199 		 * in such cases, we can lookup the start address of a callee
1200 		 * by using its subprog id, available from the off field of
1201 		 * the call instruction, as an index for this list
1202 		 */
1203 		func[i]->aux->func = func;
1204 		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1205 		func[i]->aux->real_func_cnt = env->subprog_cnt;
1206 	}
1207 	for (i = 0; i < env->subprog_cnt; i++) {
1208 		old_bpf_func = func[i]->bpf_func;
1209 		tmp = bpf_int_jit_compile(env, func[i]);
1210 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
1211 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
1212 			err = -ENOTSUPP;
1213 			goto out_free;
1214 		}
1215 		cond_resched();
1216 	}
1217 
1218 	/*
1219 	 * Cleanup func[i]->aux fields which aren't required
1220 	 * or can become invalid in future
1221 	 */
1222 	for (i = 0; i < env->subprog_cnt; i++) {
1223 		func[i]->aux->used_maps = NULL;
1224 		func[i]->aux->used_map_cnt = 0;
1225 	}
1226 
1227 	/* finally lock prog and jit images for all functions and
1228 	 * populate kallsysm. Begin at the first subprogram, since
1229 	 * bpf_prog_load will add the kallsyms for the main program.
1230 	 */
1231 	for (i = 1; i < env->subprog_cnt; i++) {
1232 		err = bpf_prog_lock_ro(func[i]);
1233 		if (err)
1234 			goto out_free;
1235 	}
1236 
1237 	for (i = 1; i < env->subprog_cnt; i++)
1238 		bpf_prog_kallsyms_add(func[i]);
1239 
1240 	/* Last step: make now unused interpreter insns from main
1241 	 * prog consistent for later dump requests, so they can
1242 	 * later look the same as if they were interpreted only.
1243 	 */
1244 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1245 		if (bpf_pseudo_func(insn)) {
1246 			insn[0].imm = env->insn_aux_data[i].call_imm;
1247 			insn[1].imm = insn->off;
1248 			insn->off = 0;
1249 			continue;
1250 		}
1251 		if (!bpf_pseudo_call(insn))
1252 			continue;
1253 		insn->off = env->insn_aux_data[i].call_imm;
1254 		subprog = bpf_find_subprog(env, i + insn->off + 1);
1255 		insn->imm = subprog;
1256 	}
1257 
1258 	prog->jited = 1;
1259 	prog->bpf_func = func[0]->bpf_func;
1260 	prog->jited_len = func[0]->jited_len;
1261 	prog->aux->extable = func[0]->aux->extable;
1262 	prog->aux->num_exentries = func[0]->aux->num_exentries;
1263 	prog->aux->func = func;
1264 	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1265 	prog->aux->real_func_cnt = env->subprog_cnt;
1266 	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
1267 	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
1268 	prog->aux->stack_arg_sp_adjust = func[0]->aux->stack_arg_sp_adjust;
1269 	bpf_prog_jit_attempt_done(prog);
1270 	return 0;
1271 out_free:
1272 	/* We failed JIT'ing, so at this point we need to unregister poke
1273 	 * descriptors from subprogs, so that kernel is not attempting to
1274 	 * patch it anymore as we're freeing the subprog JIT memory.
1275 	 */
1276 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
1277 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
1278 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
1279 	}
1280 	/* At this point we're guaranteed that poke descriptors are not
1281 	 * live anymore. We can just unlink its descriptor table as it's
1282 	 * released with the main prog.
1283 	 */
1284 	for (i = 0; i < env->subprog_cnt; i++) {
1285 		if (!func[i])
1286 			continue;
1287 		func[i]->aux->poke_tab = NULL;
1288 		bpf_jit_free(func[i]);
1289 	}
1290 	kfree(func);
1291 out_undo_insn:
1292 	bpf_prog_jit_attempt_done(prog);
1293 	return err;
1294 }
1295 
1296 int bpf_jit_subprogs(struct bpf_verifier_env *env)
1297 {
1298 	int err, i;
1299 	bool blinded = false;
1300 	struct bpf_insn *insn;
1301 	struct bpf_prog *prog, *orig_prog;
1302 	struct bpf_insn_aux_data *orig_insn_aux;
1303 	u32 *orig_subprog_starts;
1304 
1305 	if (env->subprog_cnt <= 1)
1306 		return 0;
1307 
1308 	prog = orig_prog = env->prog;
1309 	if (bpf_prog_need_blind(prog)) {
1310 		orig_insn_aux = bpf_dup_insn_aux_data(env);
1311 		if (!orig_insn_aux) {
1312 			err = -ENOMEM;
1313 			goto out_cleanup;
1314 		}
1315 		orig_subprog_starts = bpf_dup_subprog_starts(env);
1316 		if (!orig_subprog_starts) {
1317 			vfree(orig_insn_aux);
1318 			err = -ENOMEM;
1319 			goto out_cleanup;
1320 		}
1321 		prog = bpf_jit_blind_constants(env, prog);
1322 		if (IS_ERR(prog)) {
1323 			err = -ENOMEM;
1324 			prog = orig_prog;
1325 			goto out_restore;
1326 		}
1327 		blinded = true;
1328 	}
1329 
1330 	err = jit_subprogs(env);
1331 	if (err)
1332 		goto out_jit_err;
1333 
1334 	if (blinded) {
1335 		bpf_jit_prog_release_other(prog, orig_prog);
1336 		kvfree(orig_subprog_starts);
1337 		vfree(orig_insn_aux);
1338 	}
1339 
1340 	return 0;
1341 
1342 out_jit_err:
1343 	if (blinded) {
1344 		bpf_jit_prog_release_other(orig_prog, prog);
1345 		/* roll back to the clean original prog */
1346 		prog = env->prog = orig_prog;
1347 		goto out_restore;
1348 	} else {
1349 		if (err != -EFAULT) {
1350 			/*
1351 			 * We will fall back to interpreter mode when err is not -EFAULT, before
1352 			 * that, insn->off and insn->imm should be restored to their original
1353 			 * values since they were modified by jit_subprogs.
1354 			 */
1355 			for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1356 				if (!bpf_pseudo_call(insn))
1357 					continue;
1358 				insn->off = 0;
1359 				insn->imm = env->insn_aux_data[i].call_imm;
1360 			}
1361 		}
1362 		goto out_cleanup;
1363 	}
1364 
1365 out_restore:
1366 	bpf_restore_subprog_starts(env, orig_subprog_starts);
1367 	bpf_restore_insn_aux_data(env, orig_insn_aux);
1368 	kvfree(orig_subprog_starts);
1369 out_cleanup:
1370 	/* cleanup main prog to be interpreted */
1371 	prog->jit_requested = 0;
1372 	prog->blinding_requested = 0;
1373 	return err;
1374 }
1375 
1376 int bpf_fixup_call_args(struct bpf_verifier_env *env)
1377 {
1378 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1379 	struct bpf_prog *prog = env->prog;
1380 	struct bpf_insn *insn = prog->insnsi;
1381 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1382 	int depth;
1383 #endif
1384 	int i, err = 0;
1385 
1386 	for (i = 0; i < env->subprog_cnt; i++) {
1387 		struct bpf_subprog_info *subprog = &env->subprog_info[i];
1388 		u16 outgoing = subprog->stack_arg_cnt - bpf_in_stack_arg_cnt(subprog);
1389 
1390 		if (subprog->max_out_stack_arg_cnt > outgoing) {
1391 			verbose(env,
1392 				"func#%d writes %u stack arg slots, but calls only require %u\n",
1393 				i, subprog->max_out_stack_arg_cnt, outgoing);
1394 			return -EINVAL;
1395 		}
1396 	}
1397 
1398 	if (env->prog->jit_requested &&
1399 	    !bpf_prog_is_offloaded(env->prog->aux)) {
1400 		err = bpf_jit_subprogs(env);
1401 		if (err == 0)
1402 			return 0;
1403 		if (err == -EFAULT)
1404 			return err;
1405 	}
1406 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1407 	if (has_kfunc_call) {
1408 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
1409 		return -EINVAL;
1410 	}
1411 	for (i = 0; i < env->subprog_cnt; i++) {
1412 		if (bpf_in_stack_arg_cnt(&env->subprog_info[i])) {
1413 			verbose(env, "stack args are not supported in non-JITed programs\n");
1414 			return -EINVAL;
1415 		}
1416 	}
1417 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
1418 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
1419 		 * have to be rejected, since interpreter doesn't support them yet.
1420 		 */
1421 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
1422 		return -EINVAL;
1423 	}
1424 	for (i = 0; i < prog->len; i++, insn++) {
1425 		if (bpf_pseudo_func(insn)) {
1426 			/* When JIT fails the progs with callback calls
1427 			 * have to be rejected, since interpreter doesn't support them yet.
1428 			 */
1429 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
1430 			return -EINVAL;
1431 		}
1432 
1433 		if (!bpf_pseudo_call(insn))
1434 			continue;
1435 		depth = get_callee_stack_depth(env, insn, i);
1436 		if (depth < 0)
1437 			return depth;
1438 		bpf_patch_call_args(insn, depth);
1439 	}
1440 	err = 0;
1441 #endif
1442 	return err;
1443 }
1444 
1445 
1446 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
1447 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
1448 {
1449 	struct bpf_subprog_info *info = env->subprog_info;
1450 	int cnt = env->subprog_cnt;
1451 	struct bpf_prog *prog;
1452 
1453 	/* We only reserve one slot for hidden subprogs in subprog_info. */
1454 	if (env->hidden_subprog_cnt) {
1455 		verifier_bug(env, "only one hidden subprog supported");
1456 		return -EFAULT;
1457 	}
1458 	/* We're not patching any existing instruction, just appending the new
1459 	 * ones for the hidden subprog. Hence all of the adjustment operations
1460 	 * in bpf_patch_insn_data are no-ops.
1461 	 */
1462 	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
1463 	if (!prog)
1464 		return -ENOMEM;
1465 	env->prog = prog;
1466 	info[cnt + 1].start = info[cnt].start;
1467 	info[cnt].start = prog->len - len + 1;
1468 	env->subprog_cnt++;
1469 	env->hidden_subprog_cnt++;
1470 	return 0;
1471 }
1472 
1473 /* Do various post-verification rewrites in a single program pass.
1474  * These rewrites simplify JIT and interpreter implementations.
1475  */
1476 int bpf_do_misc_fixups(struct bpf_verifier_env *env)
1477 {
1478 	struct bpf_prog *prog = env->prog;
1479 	enum bpf_attach_type eatype = prog->expected_attach_type;
1480 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
1481 	struct bpf_insn *insn = prog->insnsi;
1482 	const struct bpf_func_proto *fn;
1483 	const int insn_cnt = prog->len;
1484 	const struct bpf_map_ops *ops;
1485 	struct bpf_insn_aux_data *aux;
1486 	struct bpf_insn *insn_buf = env->insn_buf;
1487 	struct bpf_prog *new_prog;
1488 	struct bpf_map *map_ptr;
1489 	int i, ret, cnt, delta = 0, cur_subprog = 0;
1490 	struct bpf_subprog_info *subprogs = env->subprog_info;
1491 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
1492 	u16 stack_depth_extra = 0;
1493 
1494 	if (env->seen_exception && !env->exception_callback_subprog) {
1495 		struct bpf_insn *patch = insn_buf;
1496 
1497 		*patch++ = env->prog->insnsi[insn_cnt - 1];
1498 		*patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
1499 		*patch++ = BPF_EXIT_INSN();
1500 		ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
1501 		if (ret < 0)
1502 			return ret;
1503 		prog = env->prog;
1504 		insn = prog->insnsi;
1505 
1506 		env->exception_callback_subprog = env->subprog_cnt - 1;
1507 		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
1508 		bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
1509 	}
1510 
1511 	for (i = 0; i < insn_cnt;) {
1512 		if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
1513 			if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
1514 			    (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
1515 				/* convert to 32-bit mov that clears upper 32-bit */
1516 				insn->code = BPF_ALU | BPF_MOV | BPF_X;
1517 				/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
1518 				insn->off = 0;
1519 				insn->imm = 0;
1520 			} /* cast from as(0) to as(1) should be handled by JIT */
1521 			goto next_insn;
1522 		}
1523 
1524 		if (env->insn_aux_data[i + delta].needs_zext)
1525 			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
1526 			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
1527 
1528 		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
1529 		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
1530 		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
1531 		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
1532 		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
1533 		    insn->off == 1 && insn->imm == -1) {
1534 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1535 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1536 			struct bpf_insn *patch = insn_buf;
1537 
1538 			if (isdiv)
1539 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1540 							BPF_NEG | BPF_K, insn->dst_reg,
1541 							0, 0, 0);
1542 			else
1543 				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1544 
1545 			cnt = patch - insn_buf;
1546 
1547 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1548 			if (!new_prog)
1549 				return -ENOMEM;
1550 
1551 			delta    += cnt - 1;
1552 			env->prog = prog = new_prog;
1553 			insn      = new_prog->insnsi + i + delta;
1554 			goto next_insn;
1555 		}
1556 
1557 		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
1558 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
1559 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
1560 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
1561 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
1562 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1563 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1564 			bool is_sdiv = isdiv && insn->off == 1;
1565 			bool is_smod = !isdiv && insn->off == 1;
1566 			struct bpf_insn *patch = insn_buf;
1567 
1568 			if (is_sdiv) {
1569 				/* [R,W]x sdiv 0 -> 0
1570 				 * LLONG_MIN sdiv -1 -> LLONG_MIN
1571 				 * INT_MIN sdiv -1 -> INT_MIN
1572 				 */
1573 				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1574 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1575 							BPF_ADD | BPF_K, BPF_REG_AX,
1576 							0, 0, 1);
1577 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1578 							BPF_JGT | BPF_K, BPF_REG_AX,
1579 							0, 4, 1);
1580 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1581 							BPF_JEQ | BPF_K, BPF_REG_AX,
1582 							0, 1, 0);
1583 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1584 							BPF_MOV | BPF_K, insn->dst_reg,
1585 							0, 0, 0);
1586 				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
1587 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1588 							BPF_NEG | BPF_K, insn->dst_reg,
1589 							0, 0, 0);
1590 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1591 				*patch++ = *insn;
1592 				cnt = patch - insn_buf;
1593 			} else if (is_smod) {
1594 				/* [R,W]x mod 0 -> [R,W]x */
1595 				/* [R,W]x mod -1 -> 0 */
1596 				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1597 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1598 							BPF_ADD | BPF_K, BPF_REG_AX,
1599 							0, 0, 1);
1600 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1601 							BPF_JGT | BPF_K, BPF_REG_AX,
1602 							0, 3, 1);
1603 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1604 							BPF_JEQ | BPF_K, BPF_REG_AX,
1605 							0, 3 + (is64 ? 0 : 1), 1);
1606 				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1607 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1608 				*patch++ = *insn;
1609 
1610 				if (!is64) {
1611 					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1612 					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1613 				}
1614 				cnt = patch - insn_buf;
1615 			} else if (isdiv) {
1616 				/* [R,W]x div 0 -> 0 */
1617 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1618 							BPF_JNE | BPF_K, insn->src_reg,
1619 							0, 2, 0);
1620 				*patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
1621 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1622 				*patch++ = *insn;
1623 				cnt = patch - insn_buf;
1624 			} else {
1625 				/* [R,W]x mod 0 -> [R,W]x */
1626 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1627 							BPF_JEQ | BPF_K, insn->src_reg,
1628 							0, 1 + (is64 ? 0 : 1), 0);
1629 				*patch++ = *insn;
1630 
1631 				if (!is64) {
1632 					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1633 					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1634 				}
1635 				cnt = patch - insn_buf;
1636 			}
1637 
1638 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1639 			if (!new_prog)
1640 				return -ENOMEM;
1641 
1642 			delta    += cnt - 1;
1643 			env->prog = prog = new_prog;
1644 			insn      = new_prog->insnsi + i + delta;
1645 			goto next_insn;
1646 		}
1647 
1648 		/* Make it impossible to de-reference a userspace address */
1649 		if (BPF_CLASS(insn->code) == BPF_LDX &&
1650 		    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1651 		     BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
1652 			struct bpf_insn *patch = insn_buf;
1653 			u64 uaddress_limit = bpf_arch_uaddress_limit();
1654 
1655 			if (!uaddress_limit)
1656 				goto next_insn;
1657 
1658 			*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1659 			if (insn->off)
1660 				*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
1661 			*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
1662 			*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
1663 			*patch++ = *insn;
1664 			*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1665 			*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
1666 
1667 			cnt = patch - insn_buf;
1668 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1669 			if (!new_prog)
1670 				return -ENOMEM;
1671 
1672 			delta    += cnt - 1;
1673 			env->prog = prog = new_prog;
1674 			insn      = new_prog->insnsi + i + delta;
1675 			goto next_insn;
1676 		}
1677 
1678 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
1679 		if (BPF_CLASS(insn->code) == BPF_LD &&
1680 		    (BPF_MODE(insn->code) == BPF_ABS ||
1681 		     BPF_MODE(insn->code) == BPF_IND)) {
1682 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
1683 			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
1684 				verifier_bug(env, "%d insns generated for ld_abs", cnt);
1685 				return -EFAULT;
1686 			}
1687 
1688 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1689 			if (!new_prog)
1690 				return -ENOMEM;
1691 
1692 			delta    += cnt - 1;
1693 			env->prog = prog = new_prog;
1694 			insn      = new_prog->insnsi + i + delta;
1695 			goto next_insn;
1696 		}
1697 
1698 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
1699 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
1700 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
1701 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
1702 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
1703 			struct bpf_insn *patch = insn_buf;
1704 			bool issrc, isneg, isimm;
1705 			u32 off_reg;
1706 
1707 			aux = &env->insn_aux_data[i + delta];
1708 			if (!aux->alu_state ||
1709 			    aux->alu_state == BPF_ALU_NON_POINTER)
1710 				goto next_insn;
1711 
1712 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
1713 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
1714 				BPF_ALU_SANITIZE_SRC;
1715 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
1716 
1717 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
1718 			if (isimm) {
1719 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1720 			} else {
1721 				if (isneg)
1722 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1723 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1724 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
1725 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
1726 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
1727 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
1728 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
1729 			}
1730 			if (!issrc)
1731 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
1732 			insn->src_reg = BPF_REG_AX;
1733 			if (isneg)
1734 				insn->code = insn->code == code_add ?
1735 					     code_sub : code_add;
1736 			*patch++ = *insn;
1737 			if (issrc && isneg && !isimm)
1738 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1739 			cnt = patch - insn_buf;
1740 
1741 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1742 			if (!new_prog)
1743 				return -ENOMEM;
1744 
1745 			delta    += cnt - 1;
1746 			env->prog = prog = new_prog;
1747 			insn      = new_prog->insnsi + i + delta;
1748 			goto next_insn;
1749 		}
1750 
1751 		if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
1752 			int stack_off_cnt = -stack_depth - 16;
1753 
1754 			/*
1755 			 * Two 8 byte slots, depth-16 stores the count, and
1756 			 * depth-8 stores the start timestamp of the loop.
1757 			 *
1758 			 * The starting value of count is BPF_MAX_TIMED_LOOPS
1759 			 * (0xffff).  Every iteration loads it and subs it by 1,
1760 			 * until the value becomes 0 in AX (thus, 1 in stack),
1761 			 * after which we call arch_bpf_timed_may_goto, which
1762 			 * either sets AX to 0xffff to keep looping, or to 0
1763 			 * upon timeout. AX is then stored into the stack. In
1764 			 * the next iteration, we either see 0 and break out, or
1765 			 * continue iterating until the next time value is 0
1766 			 * after subtraction, rinse and repeat.
1767 			 */
1768 			stack_depth_extra = 16;
1769 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
1770 			if (insn->off >= 0)
1771 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
1772 			else
1773 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1774 			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1775 			insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
1776 			/*
1777 			 * AX is used as an argument to pass in stack_off_cnt
1778 			 * (to add to r10/fp), and also as the return value of
1779 			 * the call to arch_bpf_timed_may_goto.
1780 			 */
1781 			insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
1782 			insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
1783 			insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
1784 			cnt = 7;
1785 
1786 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1787 			if (!new_prog)
1788 				return -ENOMEM;
1789 
1790 			delta += cnt - 1;
1791 			env->prog = prog = new_prog;
1792 			insn = new_prog->insnsi + i + delta;
1793 			goto next_insn;
1794 		} else if (bpf_is_may_goto_insn(insn)) {
1795 			int stack_off = -stack_depth - 8;
1796 
1797 			stack_depth_extra = 8;
1798 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
1799 			if (insn->off >= 0)
1800 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
1801 			else
1802 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1803 			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1804 			insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
1805 			cnt = 4;
1806 
1807 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1808 			if (!new_prog)
1809 				return -ENOMEM;
1810 
1811 			delta += cnt - 1;
1812 			env->prog = prog = new_prog;
1813 			insn = new_prog->insnsi + i + delta;
1814 			goto next_insn;
1815 		}
1816 
1817 		if (insn->code != (BPF_JMP | BPF_CALL))
1818 			goto next_insn;
1819 		if (insn->src_reg == BPF_PSEUDO_CALL)
1820 			goto next_insn;
1821 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1822 			ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
1823 			if (ret)
1824 				return ret;
1825 			if (cnt == 0)
1826 				goto next_insn;
1827 
1828 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1829 			if (!new_prog)
1830 				return -ENOMEM;
1831 
1832 			delta	 += cnt - 1;
1833 			env->prog = prog = new_prog;
1834 			insn	  = new_prog->insnsi + i + delta;
1835 			goto next_insn;
1836 		}
1837 
1838 		/* Skip inlining the helper call if the JIT does it. */
1839 		if (bpf_jit_inlines_helper_call(insn->imm))
1840 			goto next_insn;
1841 
1842 		if (insn->imm == BPF_FUNC_get_route_realm)
1843 			prog->dst_needed = 1;
1844 		if (insn->imm == BPF_FUNC_get_prandom_u32)
1845 			bpf_user_rnd_init_once();
1846 		if (insn->imm == BPF_FUNC_override_return)
1847 			prog->kprobe_override = 1;
1848 		if (insn->imm == BPF_FUNC_tail_call) {
1849 			/* If we tail call into other programs, we
1850 			 * cannot make any assumptions since they can
1851 			 * be replaced dynamically during runtime in
1852 			 * the program array.
1853 			 */
1854 			prog->cb_access = 1;
1855 			if (!bpf_allow_tail_call_in_subprogs(env))
1856 				prog->aux->stack_depth = MAX_BPF_STACK;
1857 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
1858 
1859 			/* mark bpf_tail_call as different opcode to avoid
1860 			 * conditional branch in the interpreter for every normal
1861 			 * call and to prevent accidental JITing by JIT compiler
1862 			 * that doesn't support bpf_tail_call yet
1863 			 */
1864 			insn->imm = 0;
1865 			insn->code = BPF_JMP | BPF_TAIL_CALL;
1866 
1867 			aux = &env->insn_aux_data[i + delta];
1868 			if (env->bpf_capable && !prog->blinding_requested &&
1869 			    prog->jit_requested &&
1870 			    !bpf_map_key_poisoned(aux) &&
1871 			    !bpf_map_ptr_poisoned(aux) &&
1872 			    !bpf_map_ptr_unpriv(aux)) {
1873 				struct bpf_jit_poke_descriptor desc = {
1874 					.reason = BPF_POKE_REASON_TAIL_CALL,
1875 					.tail_call.map = aux->map_ptr_state.map_ptr,
1876 					.tail_call.key = bpf_map_key_immediate(aux),
1877 					.insn_idx = i + delta,
1878 				};
1879 
1880 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
1881 				if (ret < 0) {
1882 					verbose(env, "adding tail call poke descriptor failed\n");
1883 					return ret;
1884 				}
1885 
1886 				insn->imm = ret + 1;
1887 				goto next_insn;
1888 			}
1889 
1890 			if (!bpf_map_ptr_unpriv(aux))
1891 				goto next_insn;
1892 
1893 			/* instead of changing every JIT dealing with tail_call
1894 			 * emit two extra insns:
1895 			 * if (index >= max_entries) goto out;
1896 			 * index &= array->index_mask;
1897 			 * to avoid out-of-bounds cpu speculation
1898 			 */
1899 			if (bpf_map_ptr_poisoned(aux)) {
1900 				verbose(env, "tail_call abusing map_ptr\n");
1901 				return -EINVAL;
1902 			}
1903 
1904 			map_ptr = aux->map_ptr_state.map_ptr;
1905 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
1906 						  map_ptr->max_entries, 2);
1907 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
1908 						    container_of(map_ptr,
1909 								 struct bpf_array,
1910 								 map)->index_mask);
1911 			insn_buf[2] = *insn;
1912 			cnt = 3;
1913 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1914 			if (!new_prog)
1915 				return -ENOMEM;
1916 
1917 			delta    += cnt - 1;
1918 			env->prog = prog = new_prog;
1919 			insn      = new_prog->insnsi + i + delta;
1920 			goto next_insn;
1921 		}
1922 
1923 		if (insn->imm == BPF_FUNC_timer_set_callback) {
1924 			/* The verifier will process callback_fn as many times as necessary
1925 			 * with different maps and the register states prepared by
1926 			 * set_timer_callback_state will be accurate.
1927 			 *
1928 			 * The following use case is valid:
1929 			 *   map1 is shared by prog1, prog2, prog3.
1930 			 *   prog1 calls bpf_timer_init for some map1 elements
1931 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
1932 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
1933 			 *   prog3 calls bpf_timer_start for some map1 elements.
1934 			 *     Those that were not both bpf_timer_init-ed and
1935 			 *     bpf_timer_set_callback-ed will return -EINVAL.
1936 			 */
1937 			struct bpf_insn ld_addrs[2] = {
1938 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
1939 			};
1940 
1941 			insn_buf[0] = ld_addrs[0];
1942 			insn_buf[1] = ld_addrs[1];
1943 			insn_buf[2] = *insn;
1944 			cnt = 3;
1945 
1946 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1947 			if (!new_prog)
1948 				return -ENOMEM;
1949 
1950 			delta    += cnt - 1;
1951 			env->prog = prog = new_prog;
1952 			insn      = new_prog->insnsi + i + delta;
1953 			goto patch_call_imm;
1954 		}
1955 
1956 		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
1957 		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
1958 			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
1959 			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
1960 			 */
1961 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
1962 			insn_buf[1] = *insn;
1963 			cnt = 2;
1964 
1965 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1966 			if (!new_prog)
1967 				return -ENOMEM;
1968 
1969 			delta += cnt - 1;
1970 			env->prog = prog = new_prog;
1971 			insn = new_prog->insnsi + i + delta;
1972 			goto patch_call_imm;
1973 		}
1974 
1975 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
1976 		 * and other inlining handlers are currently limited to 64 bit
1977 		 * only.
1978 		 */
1979 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
1980 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
1981 		     insn->imm == BPF_FUNC_map_update_elem ||
1982 		     insn->imm == BPF_FUNC_map_delete_elem ||
1983 		     insn->imm == BPF_FUNC_map_push_elem   ||
1984 		     insn->imm == BPF_FUNC_map_pop_elem    ||
1985 		     insn->imm == BPF_FUNC_map_peek_elem   ||
1986 		     insn->imm == BPF_FUNC_redirect_map    ||
1987 		     insn->imm == BPF_FUNC_for_each_map_elem ||
1988 		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
1989 			aux = &env->insn_aux_data[i + delta];
1990 			if (bpf_map_ptr_poisoned(aux))
1991 				goto patch_call_imm;
1992 
1993 			map_ptr = aux->map_ptr_state.map_ptr;
1994 			ops = map_ptr->ops;
1995 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
1996 			    ops->map_gen_lookup) {
1997 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
1998 				if (cnt == -EOPNOTSUPP)
1999 					goto patch_map_ops_generic;
2000 				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
2001 					verifier_bug(env, "%d insns generated for map lookup", cnt);
2002 					return -EFAULT;
2003 				}
2004 
2005 				new_prog = bpf_patch_insn_data(env, i + delta,
2006 							       insn_buf, cnt);
2007 				if (!new_prog)
2008 					return -ENOMEM;
2009 
2010 				delta    += cnt - 1;
2011 				env->prog = prog = new_prog;
2012 				insn      = new_prog->insnsi + i + delta;
2013 				goto next_insn;
2014 			}
2015 
2016 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
2017 				     (void *(*)(struct bpf_map *map, void *key))NULL));
2018 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
2019 				     (long (*)(struct bpf_map *map, void *key))NULL));
2020 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
2021 				     (long (*)(struct bpf_map *map, void *key, void *value,
2022 					      u64 flags))NULL));
2023 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
2024 				     (long (*)(struct bpf_map *map, void *value,
2025 					      u64 flags))NULL));
2026 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
2027 				     (long (*)(struct bpf_map *map, void *value))NULL));
2028 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
2029 				     (long (*)(struct bpf_map *map, void *value))NULL));
2030 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
2031 				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
2032 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
2033 				     (long (*)(struct bpf_map *map,
2034 					      bpf_callback_t callback_fn,
2035 					      void *callback_ctx,
2036 					      u64 flags))NULL));
2037 			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
2038 				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
2039 
2040 patch_map_ops_generic:
2041 			switch (insn->imm) {
2042 			case BPF_FUNC_map_lookup_elem:
2043 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
2044 				goto next_insn;
2045 			case BPF_FUNC_map_update_elem:
2046 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
2047 				goto next_insn;
2048 			case BPF_FUNC_map_delete_elem:
2049 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
2050 				goto next_insn;
2051 			case BPF_FUNC_map_push_elem:
2052 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
2053 				goto next_insn;
2054 			case BPF_FUNC_map_pop_elem:
2055 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
2056 				goto next_insn;
2057 			case BPF_FUNC_map_peek_elem:
2058 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
2059 				goto next_insn;
2060 			case BPF_FUNC_redirect_map:
2061 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
2062 				goto next_insn;
2063 			case BPF_FUNC_for_each_map_elem:
2064 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
2065 				goto next_insn;
2066 			case BPF_FUNC_map_lookup_percpu_elem:
2067 				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
2068 				goto next_insn;
2069 			}
2070 
2071 			goto patch_call_imm;
2072 		}
2073 
2074 		/* Implement bpf_jiffies64 inline. */
2075 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
2076 		    insn->imm == BPF_FUNC_jiffies64) {
2077 			struct bpf_insn ld_jiffies_addr[2] = {
2078 				BPF_LD_IMM64(BPF_REG_0,
2079 					     (unsigned long)&jiffies),
2080 			};
2081 
2082 			insn_buf[0] = ld_jiffies_addr[0];
2083 			insn_buf[1] = ld_jiffies_addr[1];
2084 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
2085 						  BPF_REG_0, 0);
2086 			cnt = 3;
2087 
2088 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
2089 						       cnt);
2090 			if (!new_prog)
2091 				return -ENOMEM;
2092 
2093 			delta    += cnt - 1;
2094 			env->prog = prog = new_prog;
2095 			insn      = new_prog->insnsi + i + delta;
2096 			goto next_insn;
2097 		}
2098 
2099 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
2100 		/* Implement bpf_get_smp_processor_id() inline. */
2101 		if (insn->imm == BPF_FUNC_get_smp_processor_id &&
2102 		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
2103 			/* BPF_FUNC_get_smp_processor_id inlining is an
2104 			 * optimization, so if cpu_number is ever
2105 			 * changed in some incompatible and hard to support
2106 			 * way, it's fine to back out this inlining logic
2107 			 */
2108 #ifdef CONFIG_SMP
2109 			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
2110 			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2111 			insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
2112 			cnt = 3;
2113 #else
2114 			insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
2115 			cnt = 1;
2116 #endif
2117 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2118 			if (!new_prog)
2119 				return -ENOMEM;
2120 
2121 			delta    += cnt - 1;
2122 			env->prog = prog = new_prog;
2123 			insn      = new_prog->insnsi + i + delta;
2124 			goto next_insn;
2125 		}
2126 
2127 		/* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
2128 		if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
2129 		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
2130 			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
2131 			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2132 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
2133 			cnt = 3;
2134 
2135 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2136 			if (!new_prog)
2137 				return -ENOMEM;
2138 
2139 			delta    += cnt - 1;
2140 			env->prog = prog = new_prog;
2141 			insn      = new_prog->insnsi + i + delta;
2142 			goto next_insn;
2143 		}
2144 #endif
2145 		/* Implement bpf_get_func_arg inline. */
2146 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2147 		    insn->imm == BPF_FUNC_get_func_arg) {
2148 			if (eatype == BPF_TRACE_RAW_TP) {
2149 				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2150 
2151 				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2152 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2153 				cnt = 1;
2154 			} else {
2155 				/* Load nr_args from ctx - 8 */
2156 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2157 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2158 				cnt = 2;
2159 			}
2160 			insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
2161 			insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
2162 			insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
2163 			insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
2164 			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2165 			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
2166 			insn_buf[cnt++] = BPF_JMP_A(1);
2167 			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2168 
2169 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2170 			if (!new_prog)
2171 				return -ENOMEM;
2172 
2173 			delta    += cnt - 1;
2174 			env->prog = prog = new_prog;
2175 			insn      = new_prog->insnsi + i + delta;
2176 			goto next_insn;
2177 		}
2178 
2179 		/* Implement bpf_get_func_ret inline. */
2180 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2181 		    insn->imm == BPF_FUNC_get_func_ret) {
2182 			if (eatype == BPF_TRACE_FEXIT ||
2183 			    eatype == BPF_TRACE_FSESSION ||
2184 			    eatype == BPF_MODIFY_RETURN) {
2185 				/* Load nr_args from ctx - 8 */
2186 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2187 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2188 				insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
2189 				insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
2190 				insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2191 				insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
2192 				insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
2193 				cnt = 7;
2194 			} else {
2195 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
2196 				cnt = 1;
2197 			}
2198 
2199 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2200 			if (!new_prog)
2201 				return -ENOMEM;
2202 
2203 			delta    += cnt - 1;
2204 			env->prog = prog = new_prog;
2205 			insn      = new_prog->insnsi + i + delta;
2206 			goto next_insn;
2207 		}
2208 
2209 		/* Implement get_func_arg_cnt inline. */
2210 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2211 		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
2212 			if (eatype == BPF_TRACE_RAW_TP) {
2213 				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2214 
2215 				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2216 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2217 				cnt = 1;
2218 			} else {
2219 				/* Load nr_args from ctx - 8 */
2220 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2221 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2222 				cnt = 2;
2223 			}
2224 
2225 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2226 			if (!new_prog)
2227 				return -ENOMEM;
2228 
2229 			delta    += cnt - 1;
2230 			env->prog = prog = new_prog;
2231 			insn      = new_prog->insnsi + i + delta;
2232 			goto next_insn;
2233 		}
2234 
2235 		/* Implement bpf_get_func_ip inline. */
2236 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2237 		    insn->imm == BPF_FUNC_get_func_ip) {
2238 			/* Load IP address from ctx - 16 */
2239 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
2240 
2241 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
2242 			if (!new_prog)
2243 				return -ENOMEM;
2244 
2245 			env->prog = prog = new_prog;
2246 			insn      = new_prog->insnsi + i + delta;
2247 			goto next_insn;
2248 		}
2249 
2250 		/* Implement bpf_get_branch_snapshot inline. */
2251 		if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
2252 		    prog->jit_requested && BITS_PER_LONG == 64 &&
2253 		    insn->imm == BPF_FUNC_get_branch_snapshot) {
2254 			/* We are dealing with the following func protos:
2255 			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
2256 			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
2257 			 */
2258 			const u32 br_entry_size = sizeof(struct perf_branch_entry);
2259 
2260 			/* struct perf_branch_entry is part of UAPI and is
2261 			 * used as an array element, so extremely unlikely to
2262 			 * ever grow or shrink
2263 			 */
2264 			BUILD_BUG_ON(br_entry_size != 24);
2265 
2266 			/* if (unlikely(flags)) return -EINVAL */
2267 			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
2268 
2269 			/* Transform size (bytes) into number of entries (cnt = size / 24).
2270 			 * But to avoid expensive division instruction, we implement
2271 			 * divide-by-3 through multiplication, followed by further
2272 			 * division by 8 through 3-bit right shift.
2273 			 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
2274 			 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
2275 			 *
2276 			 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
2277 			 */
2278 			insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
2279 			insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
2280 			insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
2281 
2282 			/* call perf_snapshot_branch_stack implementation */
2283 			insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
2284 			/* if (entry_cnt == 0) return -ENOENT */
2285 			insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
2286 			/* return entry_cnt * sizeof(struct perf_branch_entry) */
2287 			insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
2288 			insn_buf[7] = BPF_JMP_A(3);
2289 			/* return -EINVAL; */
2290 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2291 			insn_buf[9] = BPF_JMP_A(1);
2292 			/* return -ENOENT; */
2293 			insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
2294 			cnt = 11;
2295 
2296 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2297 			if (!new_prog)
2298 				return -ENOMEM;
2299 
2300 			delta    += cnt - 1;
2301 			env->prog = prog = new_prog;
2302 			insn      = new_prog->insnsi + i + delta;
2303 			goto next_insn;
2304 		}
2305 
2306 		/* Implement bpf_kptr_xchg inline */
2307 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
2308 		    insn->imm == BPF_FUNC_kptr_xchg &&
2309 		    bpf_jit_supports_ptr_xchg()) {
2310 			insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
2311 			insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
2312 			cnt = 2;
2313 
2314 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2315 			if (!new_prog)
2316 				return -ENOMEM;
2317 
2318 			delta    += cnt - 1;
2319 			env->prog = prog = new_prog;
2320 			insn      = new_prog->insnsi + i + delta;
2321 			goto next_insn;
2322 		}
2323 patch_call_imm:
2324 		fn = env->ops->get_func_proto(insn->imm, env->prog);
2325 		/* all functions that have prototype and verifier allowed
2326 		 * programs to call them, must be real in-kernel functions
2327 		 */
2328 		if (!fn->func) {
2329 			verifier_bug(env,
2330 				     "not inlined functions %s#%d is missing func",
2331 				     func_id_name(insn->imm), insn->imm);
2332 			return -EFAULT;
2333 		}
2334 		insn->imm = fn->func - __bpf_call_base;
2335 next_insn:
2336 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2337 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
2338 			subprogs[cur_subprog].stack_extra = stack_depth_extra;
2339 
2340 			stack_depth = subprogs[cur_subprog].stack_depth;
2341 			if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
2342 				verbose(env, "stack size %d(extra %d) is too large\n",
2343 					stack_depth, stack_depth_extra);
2344 				return -EINVAL;
2345 			}
2346 			cur_subprog++;
2347 			stack_depth = subprogs[cur_subprog].stack_depth;
2348 			stack_depth_extra = 0;
2349 		}
2350 		i++;
2351 		insn++;
2352 	}
2353 
2354 	env->prog->aux->stack_depth = subprogs[0].stack_depth;
2355 	for (i = 0; i < env->subprog_cnt; i++) {
2356 		int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
2357 		int subprog_start = subprogs[i].start;
2358 		int stack_slots = subprogs[i].stack_extra / 8;
2359 		int slots = delta, cnt = 0;
2360 
2361 		if (!stack_slots)
2362 			continue;
2363 		/* We need two slots in case timed may_goto is supported. */
2364 		if (stack_slots > slots) {
2365 			verifier_bug(env, "stack_slots supports may_goto only");
2366 			return -EFAULT;
2367 		}
2368 
2369 		stack_depth = subprogs[i].stack_depth;
2370 		if (bpf_jit_supports_timed_may_goto()) {
2371 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2372 						     BPF_MAX_TIMED_LOOPS);
2373 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
2374 		} else {
2375 			/* Add ST insn to subprog prologue to init extra stack */
2376 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2377 						     BPF_MAX_LOOPS);
2378 		}
2379 		/* Copy first actual insn to preserve it */
2380 		insn_buf[cnt++] = env->prog->insnsi[subprog_start];
2381 
2382 		new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
2383 		if (!new_prog)
2384 			return -ENOMEM;
2385 		env->prog = prog = new_prog;
2386 		/*
2387 		 * If may_goto is a first insn of a prog there could be a jmp
2388 		 * insn that points to it, hence adjust all such jmps to point
2389 		 * to insn after BPF_ST that inits may_goto count.
2390 		 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
2391 		 */
2392 		WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
2393 	}
2394 
2395 	/* Since poke tab is now finalized, publish aux to tracker. */
2396 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
2397 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
2398 		if (!map_ptr->ops->map_poke_track ||
2399 		    !map_ptr->ops->map_poke_untrack ||
2400 		    !map_ptr->ops->map_poke_run) {
2401 			verifier_bug(env, "poke tab is misconfigured");
2402 			return -EFAULT;
2403 		}
2404 
2405 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
2406 		if (ret < 0) {
2407 			verbose(env, "tracking tail call prog failed\n");
2408 			return ret;
2409 		}
2410 	}
2411 
2412 	ret = sort_kfunc_descs_by_imm_off(env);
2413 	if (ret)
2414 		return ret;
2415 
2416 	return 0;
2417 }
2418 
2419 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
2420 					int position,
2421 					s32 stack_base,
2422 					u32 callback_subprogno,
2423 					u32 *total_cnt)
2424 {
2425 	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
2426 	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
2427 	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
2428 	int reg_loop_max = BPF_REG_6;
2429 	int reg_loop_cnt = BPF_REG_7;
2430 	int reg_loop_ctx = BPF_REG_8;
2431 
2432 	struct bpf_insn *insn_buf = env->insn_buf;
2433 	struct bpf_prog *new_prog;
2434 	u32 callback_start;
2435 	u32 call_insn_offset;
2436 	s32 callback_offset;
2437 	u32 cnt = 0;
2438 
2439 	/* This represents an inlined version of bpf_iter.c:bpf_loop,
2440 	 * be careful to modify this code in sync.
2441 	 */
2442 
2443 	/* Return error and jump to the end of the patch if
2444 	 * expected number of iterations is too big.
2445 	 */
2446 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
2447 	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
2448 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
2449 	/* spill R6, R7, R8 to use these as loop vars */
2450 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
2451 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
2452 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
2453 	/* initialize loop vars */
2454 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
2455 	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
2456 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
2457 	/* loop header,
2458 	 * if reg_loop_cnt >= reg_loop_max skip the loop body
2459 	 */
2460 	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
2461 	/* callback call,
2462 	 * correct callback offset would be set after patching
2463 	 */
2464 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
2465 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
2466 	insn_buf[cnt++] = BPF_CALL_REL(0);
2467 	/* increment loop counter */
2468 	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
2469 	/* jump to loop header if callback returned 0 */
2470 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
2471 	/* return value of bpf_loop,
2472 	 * set R0 to the number of iterations
2473 	 */
2474 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
2475 	/* restore original values of R6, R7, R8 */
2476 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
2477 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
2478 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
2479 
2480 	*total_cnt = cnt;
2481 	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
2482 	if (!new_prog)
2483 		return new_prog;
2484 
2485 	/* callback start is known only after patching */
2486 	callback_start = env->subprog_info[callback_subprogno].start;
2487 	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
2488 	call_insn_offset = position + 12;
2489 	callback_offset = callback_start - call_insn_offset - 1;
2490 	new_prog->insnsi[call_insn_offset].imm = callback_offset;
2491 
2492 	return new_prog;
2493 }
2494 
2495 static bool is_bpf_loop_call(struct bpf_insn *insn)
2496 {
2497 	return insn->code == (BPF_JMP | BPF_CALL) &&
2498 		insn->src_reg == 0 &&
2499 		insn->imm == BPF_FUNC_loop;
2500 }
2501 
2502 /* For all sub-programs in the program (including main) check
2503  * insn_aux_data to see if there are bpf_loop calls that require
2504  * inlining. If such calls are found the calls are replaced with a
2505  * sequence of instructions produced by `inline_bpf_loop` function and
2506  * subprog stack_depth is increased by the size of 3 registers.
2507  * This stack space is used to spill values of the R6, R7, R8.  These
2508  * registers are used to store the loop bound, counter and context
2509  * variables.
2510  */
2511 int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
2512 {
2513 	struct bpf_subprog_info *subprogs = env->subprog_info;
2514 	int i, cur_subprog = 0, cnt, delta = 0;
2515 	struct bpf_insn *insn = env->prog->insnsi;
2516 	int insn_cnt = env->prog->len;
2517 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
2518 	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2519 	u16 stack_depth_extra = 0;
2520 
2521 	for (i = 0; i < insn_cnt; i++, insn++) {
2522 		struct bpf_loop_inline_state *inline_state =
2523 			&env->insn_aux_data[i + delta].loop_inline_state;
2524 
2525 		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
2526 			struct bpf_prog *new_prog;
2527 
2528 			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
2529 			new_prog = inline_bpf_loop(env,
2530 						   i + delta,
2531 						   -(stack_depth + stack_depth_extra),
2532 						   inline_state->callback_subprogno,
2533 						   &cnt);
2534 			if (!new_prog)
2535 				return -ENOMEM;
2536 
2537 			delta     += cnt - 1;
2538 			env->prog  = new_prog;
2539 			insn       = new_prog->insnsi + i + delta;
2540 		}
2541 
2542 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2543 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
2544 			cur_subprog++;
2545 			stack_depth = subprogs[cur_subprog].stack_depth;
2546 			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2547 			stack_depth_extra = 0;
2548 		}
2549 	}
2550 
2551 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
2552 
2553 	return 0;
2554 }
2555 
2556 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
2557  * adjust subprograms stack depth when possible.
2558  */
2559 int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
2560 {
2561 	struct bpf_subprog_info *subprog = env->subprog_info;
2562 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
2563 	struct bpf_insn *insn = env->prog->insnsi;
2564 	int insn_cnt = env->prog->len;
2565 	u32 spills_num;
2566 	bool modified = false;
2567 	int i, j;
2568 
2569 	for (i = 0; i < insn_cnt; i++, insn++) {
2570 		if (aux[i].fastcall_spills_num > 0) {
2571 			spills_num = aux[i].fastcall_spills_num;
2572 			/* NOPs would be removed by opt_remove_nops() */
2573 			for (j = 1; j <= spills_num; ++j) {
2574 				*(insn - j) = NOP;
2575 				*(insn + j) = NOP;
2576 			}
2577 			modified = true;
2578 		}
2579 		if ((subprog + 1)->start == i + 1) {
2580 			if (modified && !subprog->keep_fastcall_stack)
2581 				subprog->stack_depth = -subprog->fastcall_stack_off;
2582 			subprog++;
2583 			modified = false;
2584 		}
2585 	}
2586 
2587 	return 0;
2588 }
2589 
2590