xref: /linux/kernel/bpf/fixups.c (revision aec3202247b4ab41c5bf3b9f704a2d9a323a051b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <linux/bpf.h>
4 #include <linux/btf.h>
5 #include <linux/bpf_verifier.h>
6 #include <linux/filter.h>
7 #include <linux/vmalloc.h>
8 #include <linux/bsearch.h>
9 #include <linux/sort.h>
10 #include <linux/perf_event.h>
11 #include <net/xdp.h>
12 #include "disasm.h"
13 
14 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
15 
16 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
17 {
18 	return BPF_CLASS(insn->code) == BPF_STX &&
19 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
20 	       insn->imm == BPF_CMPXCHG;
21 }
22 
23 /* Return the regno defined by the insn, or -1. */
24 static int insn_def_regno(const struct bpf_insn *insn)
25 {
26 	switch (BPF_CLASS(insn->code)) {
27 	case BPF_JMP:
28 	case BPF_JMP32:
29 	case BPF_ST:
30 		return -1;
31 	case BPF_STX:
32 		if (BPF_MODE(insn->code) == BPF_ATOMIC ||
33 		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
34 			if (insn->imm == BPF_CMPXCHG)
35 				return BPF_REG_0;
36 			else if (insn->imm == BPF_LOAD_ACQ)
37 				return insn->dst_reg;
38 			else if (insn->imm & BPF_FETCH)
39 				return insn->src_reg;
40 		}
41 		return -1;
42 	default:
43 		return insn->dst_reg;
44 	}
45 }
46 
47 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
48 static bool insn_has_def32(struct bpf_insn *insn)
49 {
50 	int dst_reg = insn_def_regno(insn);
51 
52 	if (dst_reg == -1)
53 		return false;
54 
55 	return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
56 }
57 
58 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
59 {
60 	const struct bpf_kfunc_desc *d0 = a;
61 	const struct bpf_kfunc_desc *d1 = b;
62 
63 	if (d0->imm != d1->imm)
64 		return d0->imm < d1->imm ? -1 : 1;
65 	if (d0->offset != d1->offset)
66 		return d0->offset < d1->offset ? -1 : 1;
67 	return 0;
68 }
69 
70 const struct btf_func_model *
71 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
72 			 const struct bpf_insn *insn)
73 {
74 	const struct bpf_kfunc_desc desc = {
75 		.imm = insn->imm,
76 		.offset = insn->off,
77 	};
78 	const struct bpf_kfunc_desc *res;
79 	struct bpf_kfunc_desc_tab *tab;
80 
81 	tab = prog->aux->kfunc_tab;
82 	res = bsearch(&desc, tab->descs, tab->nr_descs,
83 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
84 
85 	return res ? &res->func_model : NULL;
86 }
87 
88 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
89 {
90 	unsigned long call_imm;
91 
92 	if (bpf_jit_supports_far_kfunc_call()) {
93 		call_imm = desc->func_id;
94 	} else {
95 		call_imm = BPF_CALL_IMM(desc->addr);
96 		/* Check whether the relative offset overflows desc->imm */
97 		if ((unsigned long)(s32)call_imm != call_imm) {
98 			verbose(env, "address of kernel func_id %u is out of range\n",
99 				desc->func_id);
100 			return -EINVAL;
101 		}
102 	}
103 	desc->imm = call_imm;
104 	return 0;
105 }
106 
107 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
108 {
109 	struct bpf_kfunc_desc_tab *tab;
110 	int i, err;
111 
112 	tab = env->prog->aux->kfunc_tab;
113 	if (!tab)
114 		return 0;
115 
116 	for (i = 0; i < tab->nr_descs; i++) {
117 		err = set_kfunc_desc_imm(env, &tab->descs[i]);
118 		if (err)
119 			return err;
120 	}
121 
122 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
123 	     kfunc_desc_cmp_by_imm_off, NULL);
124 	return 0;
125 }
126 
127 static int add_kfunc_in_insns(struct bpf_verifier_env *env,
128 			      struct bpf_insn *insn, int cnt)
129 {
130 	int i, ret;
131 
132 	for (i = 0; i < cnt; i++, insn++) {
133 		if (bpf_pseudo_kfunc_call(insn)) {
134 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
135 			if (ret < 0)
136 				return ret;
137 		}
138 	}
139 	return 0;
140 }
141 
142 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
143 static int get_callee_stack_depth(struct bpf_verifier_env *env,
144 				  const struct bpf_insn *insn, int idx)
145 {
146 	int start = idx + insn->imm + 1, subprog;
147 
148 	subprog = bpf_find_subprog(env, start);
149 	if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
150 		return -EFAULT;
151 	return env->subprog_info[subprog].stack_depth;
152 }
153 #endif
154 
155 /* single env->prog->insni[off] instruction was replaced with the range
156  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
157  * [0, off) and [off, end) to new locations, so the patched range stays zero
158  */
159 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
160 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
161 {
162 	struct bpf_insn_aux_data *data = env->insn_aux_data;
163 	struct bpf_insn *insn = new_prog->insnsi;
164 	u32 old_seen = data[off].seen;
165 	u32 prog_len;
166 	int i;
167 
168 	/* aux info at OFF always needs adjustment, no matter fast path
169 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
170 	 * original insn at old prog.
171 	 */
172 	data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
173 
174 	if (cnt == 1)
175 		return;
176 	prog_len = new_prog->len;
177 
178 	memmove(data + off + cnt - 1, data + off,
179 		sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
180 	memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
181 	for (i = off; i < off + cnt - 1; i++) {
182 		/* Expand insni[off]'s seen count to the patched range. */
183 		data[i].seen = old_seen;
184 		data[i].zext_dst = insn_has_def32(insn + i);
185 	}
186 }
187 
188 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
189 {
190 	int i;
191 
192 	if (len == 1)
193 		return;
194 	/* NOTE: fake 'exit' subprog should be updated as well. */
195 	for (i = 0; i <= env->subprog_cnt; i++) {
196 		if (env->subprog_info[i].start <= off)
197 			continue;
198 		env->subprog_info[i].start += len - 1;
199 	}
200 }
201 
202 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
203 {
204 	int i;
205 
206 	if (len == 1)
207 		return;
208 
209 	for (i = 0; i < env->insn_array_map_cnt; i++)
210 		bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
211 }
212 
213 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
214 {
215 	int i;
216 
217 	for (i = 0; i < env->insn_array_map_cnt; i++)
218 		bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
219 }
220 
221 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
222 {
223 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
224 	int i, sz = prog->aux->size_poke_tab;
225 	struct bpf_jit_poke_descriptor *desc;
226 
227 	for (i = 0; i < sz; i++) {
228 		desc = &tab[i];
229 		if (desc->insn_idx <= off)
230 			continue;
231 		desc->insn_idx += len - 1;
232 	}
233 }
234 
235 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
236 					    const struct bpf_insn *patch, u32 len)
237 {
238 	struct bpf_prog *new_prog;
239 	struct bpf_insn_aux_data *new_data = NULL;
240 
241 	if (len > 1) {
242 		new_data = vrealloc(env->insn_aux_data,
243 				    array_size(env->prog->len + len - 1,
244 					       sizeof(struct bpf_insn_aux_data)),
245 				    GFP_KERNEL_ACCOUNT | __GFP_ZERO);
246 		if (!new_data)
247 			return NULL;
248 
249 		env->insn_aux_data = new_data;
250 	}
251 
252 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
253 	if (IS_ERR(new_prog)) {
254 		if (PTR_ERR(new_prog) == -ERANGE)
255 			verbose(env,
256 				"insn %d cannot be patched due to 16-bit range\n",
257 				env->insn_aux_data[off].orig_idx);
258 		return NULL;
259 	}
260 	adjust_insn_aux_data(env, new_prog, off, len);
261 	adjust_subprog_starts(env, off, len);
262 	adjust_insn_arrays(env, off, len);
263 	adjust_poke_descs(new_prog, off, len);
264 	return new_prog;
265 }
266 
267 /*
268  * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
269  * jump offset by 'delta'.
270  */
271 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
272 {
273 	struct bpf_insn *insn = prog->insnsi;
274 	u32 insn_cnt = prog->len, i;
275 	s32 imm;
276 	s16 off;
277 
278 	for (i = 0; i < insn_cnt; i++, insn++) {
279 		u8 code = insn->code;
280 
281 		if (tgt_idx <= i && i < tgt_idx + delta)
282 			continue;
283 
284 		if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
285 		    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
286 			continue;
287 
288 		if (insn->code == (BPF_JMP32 | BPF_JA)) {
289 			if (i + 1 + insn->imm != tgt_idx)
290 				continue;
291 			if (check_add_overflow(insn->imm, delta, &imm))
292 				return -ERANGE;
293 			insn->imm = imm;
294 		} else {
295 			if (i + 1 + insn->off != tgt_idx)
296 				continue;
297 			if (check_add_overflow(insn->off, delta, &off))
298 				return -ERANGE;
299 			insn->off = off;
300 		}
301 	}
302 	return 0;
303 }
304 
305 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
306 					      u32 off, u32 cnt)
307 {
308 	int i, j;
309 
310 	/* find first prog starting at or after off (first to remove) */
311 	for (i = 0; i < env->subprog_cnt; i++)
312 		if (env->subprog_info[i].start >= off)
313 			break;
314 	/* find first prog starting at or after off + cnt (first to stay) */
315 	for (j = i; j < env->subprog_cnt; j++)
316 		if (env->subprog_info[j].start >= off + cnt)
317 			break;
318 	/* if j doesn't start exactly at off + cnt, we are just removing
319 	 * the front of previous prog
320 	 */
321 	if (env->subprog_info[j].start != off + cnt)
322 		j--;
323 
324 	if (j > i) {
325 		struct bpf_prog_aux *aux = env->prog->aux;
326 		int move;
327 
328 		/* move fake 'exit' subprog as well */
329 		move = env->subprog_cnt + 1 - j;
330 
331 		memmove(env->subprog_info + i,
332 			env->subprog_info + j,
333 			sizeof(*env->subprog_info) * move);
334 		env->subprog_cnt -= j - i;
335 
336 		/* remove func_info */
337 		if (aux->func_info) {
338 			move = aux->func_info_cnt - j;
339 
340 			memmove(aux->func_info + i,
341 				aux->func_info + j,
342 				sizeof(*aux->func_info) * move);
343 			aux->func_info_cnt -= j - i;
344 			/* func_info->insn_off is set after all code rewrites,
345 			 * in adjust_btf_func() - no need to adjust
346 			 */
347 		}
348 	} else {
349 		/* convert i from "first prog to remove" to "first to adjust" */
350 		if (env->subprog_info[i].start == off)
351 			i++;
352 	}
353 
354 	/* update fake 'exit' subprog as well */
355 	for (; i <= env->subprog_cnt; i++)
356 		env->subprog_info[i].start -= cnt;
357 
358 	return 0;
359 }
360 
361 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
362 				      u32 cnt)
363 {
364 	struct bpf_prog *prog = env->prog;
365 	u32 i, l_off, l_cnt, nr_linfo;
366 	struct bpf_line_info *linfo;
367 
368 	nr_linfo = prog->aux->nr_linfo;
369 	if (!nr_linfo)
370 		return 0;
371 
372 	linfo = prog->aux->linfo;
373 
374 	/* find first line info to remove, count lines to be removed */
375 	for (i = 0; i < nr_linfo; i++)
376 		if (linfo[i].insn_off >= off)
377 			break;
378 
379 	l_off = i;
380 	l_cnt = 0;
381 	for (; i < nr_linfo; i++)
382 		if (linfo[i].insn_off < off + cnt)
383 			l_cnt++;
384 		else
385 			break;
386 
387 	/* First live insn doesn't match first live linfo, it needs to "inherit"
388 	 * last removed linfo.  prog is already modified, so prog->len == off
389 	 * means no live instructions after (tail of the program was removed).
390 	 */
391 	if (prog->len != off && l_cnt &&
392 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
393 		l_cnt--;
394 		linfo[--i].insn_off = off + cnt;
395 	}
396 
397 	/* remove the line info which refer to the removed instructions */
398 	if (l_cnt) {
399 		memmove(linfo + l_off, linfo + i,
400 			sizeof(*linfo) * (nr_linfo - i));
401 
402 		prog->aux->nr_linfo -= l_cnt;
403 		nr_linfo = prog->aux->nr_linfo;
404 	}
405 
406 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
407 	for (i = l_off; i < nr_linfo; i++)
408 		linfo[i].insn_off -= cnt;
409 
410 	/* fix up all subprogs (incl. 'exit') which start >= off */
411 	for (i = 0; i <= env->subprog_cnt; i++)
412 		if (env->subprog_info[i].linfo_idx > l_off) {
413 			/* program may have started in the removed region but
414 			 * may not be fully removed
415 			 */
416 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
417 				env->subprog_info[i].linfo_idx -= l_cnt;
418 			else
419 				env->subprog_info[i].linfo_idx = l_off;
420 		}
421 
422 	return 0;
423 }
424 
425 /*
426  * Clean up dynamically allocated fields of aux data for instructions [start, ...]
427  */
428 void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
429 {
430 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
431 	struct bpf_insn *insns = env->prog->insnsi;
432 	int end = start + len;
433 	int i;
434 
435 	for (i = start; i < end; i++) {
436 		if (aux_data[i].jt) {
437 			kvfree(aux_data[i].jt);
438 			aux_data[i].jt = NULL;
439 		}
440 
441 		if (bpf_is_ldimm64(&insns[i]))
442 			i++;
443 	}
444 }
445 
446 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
447 {
448 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
449 	unsigned int orig_prog_len = env->prog->len;
450 	int err;
451 
452 	if (bpf_prog_is_offloaded(env->prog->aux))
453 		bpf_prog_offload_remove_insns(env, off, cnt);
454 
455 	/* Should be called before bpf_remove_insns, as it uses prog->insnsi */
456 	bpf_clear_insn_aux_data(env, off, cnt);
457 
458 	err = bpf_remove_insns(env->prog, off, cnt);
459 	if (err)
460 		return err;
461 
462 	err = adjust_subprog_starts_after_remove(env, off, cnt);
463 	if (err)
464 		return err;
465 
466 	err = bpf_adj_linfo_after_remove(env, off, cnt);
467 	if (err)
468 		return err;
469 
470 	adjust_insn_arrays_after_remove(env, off, cnt);
471 
472 	memmove(aux_data + off,	aux_data + off + cnt,
473 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
474 
475 	return 0;
476 }
477 
478 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
479 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
480 
481 bool bpf_insn_is_cond_jump(u8 code)
482 {
483 	u8 op;
484 
485 	op = BPF_OP(code);
486 	if (BPF_CLASS(code) == BPF_JMP32)
487 		return op != BPF_JA;
488 
489 	if (BPF_CLASS(code) != BPF_JMP)
490 		return false;
491 
492 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
493 }
494 
495 void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
496 {
497 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
498 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
499 	struct bpf_insn *insn = env->prog->insnsi;
500 	const int insn_cnt = env->prog->len;
501 	int i;
502 
503 	for (i = 0; i < insn_cnt; i++, insn++) {
504 		if (!bpf_insn_is_cond_jump(insn->code))
505 			continue;
506 
507 		if (!aux_data[i + 1].seen)
508 			ja.off = insn->off;
509 		else if (!aux_data[i + 1 + insn->off].seen)
510 			ja.off = 0;
511 		else
512 			continue;
513 
514 		if (bpf_prog_is_offloaded(env->prog->aux))
515 			bpf_prog_offload_replace_insn(env, i, &ja);
516 
517 		memcpy(insn, &ja, sizeof(ja));
518 	}
519 }
520 
521 int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
522 {
523 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
524 	int insn_cnt = env->prog->len;
525 	int i, err;
526 
527 	for (i = 0; i < insn_cnt; i++) {
528 		int j;
529 
530 		j = 0;
531 		while (i + j < insn_cnt && !aux_data[i + j].seen)
532 			j++;
533 		if (!j)
534 			continue;
535 
536 		err = verifier_remove_insns(env, i, j);
537 		if (err)
538 			return err;
539 		insn_cnt = env->prog->len;
540 	}
541 
542 	return 0;
543 }
544 
545 int bpf_opt_remove_nops(struct bpf_verifier_env *env)
546 {
547 	struct bpf_insn *insn = env->prog->insnsi;
548 	int insn_cnt = env->prog->len;
549 	bool is_may_goto_0, is_ja;
550 	int i, err;
551 
552 	for (i = 0; i < insn_cnt; i++) {
553 		is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
554 		is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
555 
556 		if (!is_may_goto_0 && !is_ja)
557 			continue;
558 
559 		err = verifier_remove_insns(env, i, 1);
560 		if (err)
561 			return err;
562 		insn_cnt--;
563 		/* Go back one insn to catch may_goto +1; may_goto +0 sequence */
564 		i -= (is_may_goto_0 && i > 0) ? 2 : 1;
565 	}
566 
567 	return 0;
568 }
569 
570 int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
571 					 const union bpf_attr *attr)
572 {
573 	struct bpf_insn *patch;
574 	/* use env->insn_buf as two independent buffers */
575 	struct bpf_insn *zext_patch = env->insn_buf;
576 	struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
577 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
578 	int i, patch_len, delta = 0, len = env->prog->len;
579 	struct bpf_insn *insns = env->prog->insnsi;
580 	struct bpf_prog *new_prog;
581 	bool rnd_hi32;
582 
583 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
584 	zext_patch[1] = BPF_ZEXT_REG(0);
585 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
586 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
587 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
588 	for (i = 0; i < len; i++) {
589 		int adj_idx = i + delta;
590 		struct bpf_insn insn;
591 		int load_reg;
592 
593 		insn = insns[adj_idx];
594 		load_reg = insn_def_regno(&insn);
595 		if (!aux[adj_idx].zext_dst) {
596 			u8 code, class;
597 			u32 imm_rnd;
598 
599 			if (!rnd_hi32)
600 				continue;
601 
602 			code = insn.code;
603 			class = BPF_CLASS(code);
604 			if (load_reg == -1)
605 				continue;
606 
607 			/* NOTE: arg "reg" (the fourth one) is only used for
608 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
609 			 *       here.
610 			 */
611 			if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
612 				if (class == BPF_LD &&
613 				    BPF_MODE(code) == BPF_IMM)
614 					i++;
615 				continue;
616 			}
617 
618 			/* ctx load could be transformed into wider load. */
619 			if (class == BPF_LDX &&
620 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
621 				continue;
622 
623 			imm_rnd = get_random_u32();
624 			rnd_hi32_patch[0] = insn;
625 			rnd_hi32_patch[1].imm = imm_rnd;
626 			rnd_hi32_patch[3].dst_reg = load_reg;
627 			patch = rnd_hi32_patch;
628 			patch_len = 4;
629 			goto apply_patch_buffer;
630 		}
631 
632 		/* Add in an zero-extend instruction if a) the JIT has requested
633 		 * it or b) it's a CMPXCHG.
634 		 *
635 		 * The latter is because: BPF_CMPXCHG always loads a value into
636 		 * R0, therefore always zero-extends. However some archs'
637 		 * equivalent instruction only does this load when the
638 		 * comparison is successful. This detail of CMPXCHG is
639 		 * orthogonal to the general zero-extension behaviour of the
640 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
641 		 */
642 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
643 			continue;
644 
645 		/* Zero-extension is done by the caller. */
646 		if (bpf_pseudo_kfunc_call(&insn))
647 			continue;
648 
649 		if (verifier_bug_if(load_reg == -1, env,
650 				    "zext_dst is set, but no reg is defined"))
651 			return -EFAULT;
652 
653 		zext_patch[0] = insn;
654 		zext_patch[1].dst_reg = load_reg;
655 		zext_patch[1].src_reg = load_reg;
656 		patch = zext_patch;
657 		patch_len = 2;
658 apply_patch_buffer:
659 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
660 		if (!new_prog)
661 			return -ENOMEM;
662 		env->prog = new_prog;
663 		insns = new_prog->insnsi;
664 		aux = env->insn_aux_data;
665 		delta += patch_len - 1;
666 	}
667 
668 	return 0;
669 }
670 
671 /* convert load instructions that access fields of a context type into a
672  * sequence of instructions that access fields of the underlying structure:
673  *     struct __sk_buff    -> struct sk_buff
674  *     struct bpf_sock_ops -> struct sock
675  */
676 int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
677 {
678 	struct bpf_subprog_info *subprogs = env->subprog_info;
679 	const struct bpf_verifier_ops *ops = env->ops;
680 	int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
681 	const int insn_cnt = env->prog->len;
682 	struct bpf_insn *epilogue_buf = env->epilogue_buf;
683 	struct bpf_insn *insn_buf = env->insn_buf;
684 	struct bpf_insn *insn;
685 	u32 target_size, size_default, off;
686 	struct bpf_prog *new_prog;
687 	enum bpf_access_type type;
688 	bool is_narrower_load;
689 	int epilogue_idx = 0;
690 
691 	if (ops->gen_epilogue) {
692 		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
693 						 -(subprogs[0].stack_depth + 8));
694 		if (epilogue_cnt >= INSN_BUF_SIZE) {
695 			verifier_bug(env, "epilogue is too long");
696 			return -EFAULT;
697 		} else if (epilogue_cnt) {
698 			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
699 			cnt = 0;
700 			subprogs[0].stack_depth += 8;
701 			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
702 						      -subprogs[0].stack_depth);
703 			insn_buf[cnt++] = env->prog->insnsi[0];
704 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
705 			if (!new_prog)
706 				return -ENOMEM;
707 			env->prog = new_prog;
708 			delta += cnt - 1;
709 
710 			ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
711 			if (ret < 0)
712 				return ret;
713 		}
714 	}
715 
716 	if (ops->gen_prologue || env->seen_direct_write) {
717 		if (!ops->gen_prologue) {
718 			verifier_bug(env, "gen_prologue is null");
719 			return -EFAULT;
720 		}
721 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
722 					env->prog);
723 		if (cnt >= INSN_BUF_SIZE) {
724 			verifier_bug(env, "prologue is too long");
725 			return -EFAULT;
726 		} else if (cnt) {
727 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
728 			if (!new_prog)
729 				return -ENOMEM;
730 
731 			env->prog = new_prog;
732 			delta += cnt - 1;
733 
734 			ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
735 			if (ret < 0)
736 				return ret;
737 		}
738 	}
739 
740 	if (delta)
741 		WARN_ON(adjust_jmp_off(env->prog, 0, delta));
742 
743 	if (bpf_prog_is_offloaded(env->prog->aux))
744 		return 0;
745 
746 	insn = env->prog->insnsi + delta;
747 
748 	for (i = 0; i < insn_cnt; i++, insn++) {
749 		bpf_convert_ctx_access_t convert_ctx_access;
750 		u8 mode;
751 
752 		if (env->insn_aux_data[i + delta].nospec) {
753 			WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
754 			struct bpf_insn *patch = insn_buf;
755 
756 			*patch++ = BPF_ST_NOSPEC();
757 			*patch++ = *insn;
758 			cnt = patch - insn_buf;
759 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
760 			if (!new_prog)
761 				return -ENOMEM;
762 
763 			delta    += cnt - 1;
764 			env->prog = new_prog;
765 			insn      = new_prog->insnsi + i + delta;
766 			/* This can not be easily merged with the
767 			 * nospec_result-case, because an insn may require a
768 			 * nospec before and after itself. Therefore also do not
769 			 * 'continue' here but potentially apply further
770 			 * patching to insn. *insn should equal patch[1] now.
771 			 */
772 		}
773 
774 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
775 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
776 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
777 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
778 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
779 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
780 		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
781 			type = BPF_READ;
782 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
783 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
784 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
785 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
786 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
787 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
788 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
789 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
790 			type = BPF_WRITE;
791 		} else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
792 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
793 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
794 			    insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
795 			   env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
796 			insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
797 			env->prog->aux->num_exentries++;
798 			continue;
799 		} else if (insn->code == (BPF_JMP | BPF_EXIT) &&
800 			   epilogue_cnt &&
801 			   i + delta < subprogs[1].start) {
802 			/* Generate epilogue for the main prog */
803 			if (epilogue_idx) {
804 				/* jump back to the earlier generated epilogue */
805 				insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
806 				cnt = 1;
807 			} else {
808 				memcpy(insn_buf, epilogue_buf,
809 				       epilogue_cnt * sizeof(*epilogue_buf));
810 				cnt = epilogue_cnt;
811 				/* epilogue_idx cannot be 0. It must have at
812 				 * least one ctx ptr saving insn before the
813 				 * epilogue.
814 				 */
815 				epilogue_idx = i + delta;
816 			}
817 			goto patch_insn_buf;
818 		} else {
819 			continue;
820 		}
821 
822 		if (type == BPF_WRITE &&
823 		    env->insn_aux_data[i + delta].nospec_result) {
824 			/* nospec_result is only used to mitigate Spectre v4 and
825 			 * to limit verification-time for Spectre v1.
826 			 */
827 			struct bpf_insn *patch = insn_buf;
828 
829 			*patch++ = *insn;
830 			*patch++ = BPF_ST_NOSPEC();
831 			cnt = patch - insn_buf;
832 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
833 			if (!new_prog)
834 				return -ENOMEM;
835 
836 			delta    += cnt - 1;
837 			env->prog = new_prog;
838 			insn      = new_prog->insnsi + i + delta;
839 			continue;
840 		}
841 
842 		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
843 		case PTR_TO_CTX:
844 			if (!ops->convert_ctx_access)
845 				continue;
846 			convert_ctx_access = ops->convert_ctx_access;
847 			break;
848 		case PTR_TO_SOCKET:
849 		case PTR_TO_SOCK_COMMON:
850 			convert_ctx_access = bpf_sock_convert_ctx_access;
851 			break;
852 		case PTR_TO_TCP_SOCK:
853 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
854 			break;
855 		case PTR_TO_XDP_SOCK:
856 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
857 			break;
858 		case PTR_TO_BTF_ID:
859 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
860 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
861 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
862 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
863 		 * any faults for loads into such types. BPF_WRITE is disallowed
864 		 * for this case.
865 		 */
866 		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
867 		case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
868 			if (type == BPF_READ) {
869 				if (BPF_MODE(insn->code) == BPF_MEM)
870 					insn->code = BPF_LDX | BPF_PROBE_MEM |
871 						     BPF_SIZE((insn)->code);
872 				else
873 					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
874 						     BPF_SIZE((insn)->code);
875 				env->prog->aux->num_exentries++;
876 			}
877 			continue;
878 		case PTR_TO_ARENA:
879 			if (BPF_MODE(insn->code) == BPF_MEMSX) {
880 				if (!bpf_jit_supports_insn(insn, true)) {
881 					verbose(env, "sign extending loads from arena are not supported yet\n");
882 					return -EOPNOTSUPP;
883 				}
884 				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
885 			} else {
886 				insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
887 			}
888 			env->prog->aux->num_exentries++;
889 			continue;
890 		default:
891 			continue;
892 		}
893 
894 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
895 		size = BPF_LDST_BYTES(insn);
896 		mode = BPF_MODE(insn->code);
897 
898 		/* If the read access is a narrower load of the field,
899 		 * convert to a 4/8-byte load, to minimum program type specific
900 		 * convert_ctx_access changes. If conversion is successful,
901 		 * we will apply proper mask to the result.
902 		 */
903 		is_narrower_load = size < ctx_field_size;
904 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
905 		off = insn->off;
906 		if (is_narrower_load) {
907 			u8 size_code;
908 
909 			if (type == BPF_WRITE) {
910 				verifier_bug(env, "narrow ctx access misconfigured");
911 				return -EFAULT;
912 			}
913 
914 			size_code = BPF_H;
915 			if (ctx_field_size == 4)
916 				size_code = BPF_W;
917 			else if (ctx_field_size == 8)
918 				size_code = BPF_DW;
919 
920 			insn->off = off & ~(size_default - 1);
921 			insn->code = BPF_LDX | BPF_MEM | size_code;
922 		}
923 
924 		target_size = 0;
925 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
926 					 &target_size);
927 		if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
928 		    (ctx_field_size && !target_size)) {
929 			verifier_bug(env, "error during ctx access conversion (%d)", cnt);
930 			return -EFAULT;
931 		}
932 
933 		if (is_narrower_load && size < target_size) {
934 			u8 shift = bpf_ctx_narrow_access_offset(
935 				off, size, size_default) * 8;
936 			if (shift && cnt + 1 >= INSN_BUF_SIZE) {
937 				verifier_bug(env, "narrow ctx load misconfigured");
938 				return -EFAULT;
939 			}
940 			if (ctx_field_size <= 4) {
941 				if (shift)
942 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
943 									insn->dst_reg,
944 									shift);
945 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
946 								(1 << size * 8) - 1);
947 			} else {
948 				if (shift)
949 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
950 									insn->dst_reg,
951 									shift);
952 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
953 								(1ULL << size * 8) - 1);
954 			}
955 		}
956 		if (mode == BPF_MEMSX)
957 			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
958 						       insn->dst_reg, insn->dst_reg,
959 						       size * 8, 0);
960 
961 patch_insn_buf:
962 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
963 		if (!new_prog)
964 			return -ENOMEM;
965 
966 		delta += cnt - 1;
967 
968 		/* keep walking new program and skip insns we just inserted */
969 		env->prog = new_prog;
970 		insn      = new_prog->insnsi + i + delta;
971 	}
972 
973 	return 0;
974 }
975 
976 int bpf_jit_subprogs(struct bpf_verifier_env *env)
977 {
978 	struct bpf_prog *prog = env->prog, **func, *tmp;
979 	int i, j, subprog_start, subprog_end = 0, len, subprog;
980 	struct bpf_map *map_ptr;
981 	struct bpf_insn *insn;
982 	void *old_bpf_func;
983 	int err, num_exentries;
984 	int old_len, subprog_start_adjustment = 0;
985 
986 	if (env->subprog_cnt <= 1)
987 		return 0;
988 
989 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
990 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
991 			continue;
992 
993 		/* Upon error here we cannot fall back to interpreter but
994 		 * need a hard reject of the program. Thus -EFAULT is
995 		 * propagated in any case.
996 		 */
997 		subprog = bpf_find_subprog(env, i + insn->imm + 1);
998 		if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
999 				    i + insn->imm + 1))
1000 			return -EFAULT;
1001 		/* temporarily remember subprog id inside insn instead of
1002 		 * aux_data, since next loop will split up all insns into funcs
1003 		 */
1004 		insn->off = subprog;
1005 		/* remember original imm in case JIT fails and fallback
1006 		 * to interpreter will be needed
1007 		 */
1008 		env->insn_aux_data[i].call_imm = insn->imm;
1009 		/* point imm to __bpf_call_base+1 from JITs point of view */
1010 		insn->imm = 1;
1011 		if (bpf_pseudo_func(insn)) {
1012 #if defined(MODULES_VADDR)
1013 			u64 addr = MODULES_VADDR;
1014 #else
1015 			u64 addr = VMALLOC_START;
1016 #endif
1017 			/* jit (e.g. x86_64) may emit fewer instructions
1018 			 * if it learns a u32 imm is the same as a u64 imm.
1019 			 * Set close enough to possible prog address.
1020 			 */
1021 			insn[0].imm = (u32)addr;
1022 			insn[1].imm = addr >> 32;
1023 		}
1024 	}
1025 
1026 	err = bpf_prog_alloc_jited_linfo(prog);
1027 	if (err)
1028 		goto out_undo_insn;
1029 
1030 	err = -ENOMEM;
1031 	func = kzalloc_objs(prog, env->subprog_cnt);
1032 	if (!func)
1033 		goto out_undo_insn;
1034 
1035 	for (i = 0; i < env->subprog_cnt; i++) {
1036 		subprog_start = subprog_end;
1037 		subprog_end = env->subprog_info[i + 1].start;
1038 
1039 		len = subprog_end - subprog_start;
1040 		/* bpf_prog_run() doesn't call subprogs directly,
1041 		 * hence main prog stats include the runtime of subprogs.
1042 		 * subprogs don't have IDs and not reachable via prog_get_next_id
1043 		 * func[i]->stats will never be accessed and stays NULL
1044 		 */
1045 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1046 		if (!func[i])
1047 			goto out_free;
1048 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
1049 		       len * sizeof(struct bpf_insn));
1050 		func[i]->type = prog->type;
1051 		func[i]->len = len;
1052 		if (bpf_prog_calc_tag(func[i]))
1053 			goto out_free;
1054 		func[i]->is_func = 1;
1055 		func[i]->sleepable = prog->sleepable;
1056 		func[i]->aux->func_idx = i;
1057 		/* Below members will be freed only at prog->aux */
1058 		func[i]->aux->btf = prog->aux->btf;
1059 		func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
1060 		func[i]->aux->func_info = prog->aux->func_info;
1061 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
1062 		func[i]->aux->poke_tab = prog->aux->poke_tab;
1063 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
1064 		func[i]->aux->main_prog_aux = prog->aux;
1065 
1066 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
1067 			struct bpf_jit_poke_descriptor *poke;
1068 
1069 			poke = &prog->aux->poke_tab[j];
1070 			if (poke->insn_idx < subprog_end &&
1071 			    poke->insn_idx >= subprog_start)
1072 				poke->aux = func[i]->aux;
1073 		}
1074 
1075 		func[i]->aux->name[0] = 'F';
1076 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1077 		if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
1078 			func[i]->aux->jits_use_priv_stack = true;
1079 
1080 		func[i]->jit_requested = 1;
1081 		func[i]->blinding_requested = prog->blinding_requested;
1082 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
1083 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
1084 		func[i]->aux->linfo = prog->aux->linfo;
1085 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
1086 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
1087 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1088 		func[i]->aux->arena = prog->aux->arena;
1089 		func[i]->aux->used_maps = env->used_maps;
1090 		func[i]->aux->used_map_cnt = env->used_map_cnt;
1091 		num_exentries = 0;
1092 		insn = func[i]->insnsi;
1093 		for (j = 0; j < func[i]->len; j++, insn++) {
1094 			if (BPF_CLASS(insn->code) == BPF_LDX &&
1095 			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1096 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1097 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
1098 			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
1099 				num_exentries++;
1100 			if ((BPF_CLASS(insn->code) == BPF_STX ||
1101 			     BPF_CLASS(insn->code) == BPF_ST) &&
1102 			     BPF_MODE(insn->code) == BPF_PROBE_MEM32)
1103 				num_exentries++;
1104 			if (BPF_CLASS(insn->code) == BPF_STX &&
1105 			     BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
1106 				num_exentries++;
1107 		}
1108 		func[i]->aux->num_exentries = num_exentries;
1109 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1110 		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
1111 		func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
1112 		func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
1113 		if (!i)
1114 			func[i]->aux->exception_boundary = env->seen_exception;
1115 
1116 		/*
1117 		 * To properly pass the absolute subprog start to jit
1118 		 * all instruction adjustments should be accumulated
1119 		 */
1120 		old_len = func[i]->len;
1121 		func[i] = bpf_int_jit_compile(func[i]);
1122 		subprog_start_adjustment += func[i]->len - old_len;
1123 
1124 		if (!func[i]->jited) {
1125 			err = -ENOTSUPP;
1126 			goto out_free;
1127 		}
1128 		cond_resched();
1129 	}
1130 
1131 	/* at this point all bpf functions were successfully JITed
1132 	 * now populate all bpf_calls with correct addresses and
1133 	 * run last pass of JIT
1134 	 */
1135 	for (i = 0; i < env->subprog_cnt; i++) {
1136 		insn = func[i]->insnsi;
1137 		for (j = 0; j < func[i]->len; j++, insn++) {
1138 			if (bpf_pseudo_func(insn)) {
1139 				subprog = insn->off;
1140 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
1141 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
1142 				continue;
1143 			}
1144 			if (!bpf_pseudo_call(insn))
1145 				continue;
1146 			subprog = insn->off;
1147 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1148 		}
1149 
1150 		/* we use the aux data to keep a list of the start addresses
1151 		 * of the JITed images for each function in the program
1152 		 *
1153 		 * for some architectures, such as powerpc64, the imm field
1154 		 * might not be large enough to hold the offset of the start
1155 		 * address of the callee's JITed image from __bpf_call_base
1156 		 *
1157 		 * in such cases, we can lookup the start address of a callee
1158 		 * by using its subprog id, available from the off field of
1159 		 * the call instruction, as an index for this list
1160 		 */
1161 		func[i]->aux->func = func;
1162 		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1163 		func[i]->aux->real_func_cnt = env->subprog_cnt;
1164 	}
1165 	for (i = 0; i < env->subprog_cnt; i++) {
1166 		old_bpf_func = func[i]->bpf_func;
1167 		tmp = bpf_int_jit_compile(func[i]);
1168 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
1169 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
1170 			err = -ENOTSUPP;
1171 			goto out_free;
1172 		}
1173 		cond_resched();
1174 	}
1175 
1176 	/*
1177 	 * Cleanup func[i]->aux fields which aren't required
1178 	 * or can become invalid in future
1179 	 */
1180 	for (i = 0; i < env->subprog_cnt; i++) {
1181 		func[i]->aux->used_maps = NULL;
1182 		func[i]->aux->used_map_cnt = 0;
1183 	}
1184 
1185 	/* finally lock prog and jit images for all functions and
1186 	 * populate kallsysm. Begin at the first subprogram, since
1187 	 * bpf_prog_load will add the kallsyms for the main program.
1188 	 */
1189 	for (i = 1; i < env->subprog_cnt; i++) {
1190 		err = bpf_prog_lock_ro(func[i]);
1191 		if (err)
1192 			goto out_free;
1193 	}
1194 
1195 	for (i = 1; i < env->subprog_cnt; i++)
1196 		bpf_prog_kallsyms_add(func[i]);
1197 
1198 	/* Last step: make now unused interpreter insns from main
1199 	 * prog consistent for later dump requests, so they can
1200 	 * later look the same as if they were interpreted only.
1201 	 */
1202 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1203 		if (bpf_pseudo_func(insn)) {
1204 			insn[0].imm = env->insn_aux_data[i].call_imm;
1205 			insn[1].imm = insn->off;
1206 			insn->off = 0;
1207 			continue;
1208 		}
1209 		if (!bpf_pseudo_call(insn))
1210 			continue;
1211 		insn->off = env->insn_aux_data[i].call_imm;
1212 		subprog = bpf_find_subprog(env, i + insn->off + 1);
1213 		insn->imm = subprog;
1214 	}
1215 
1216 	prog->jited = 1;
1217 	prog->bpf_func = func[0]->bpf_func;
1218 	prog->jited_len = func[0]->jited_len;
1219 	prog->aux->extable = func[0]->aux->extable;
1220 	prog->aux->num_exentries = func[0]->aux->num_exentries;
1221 	prog->aux->func = func;
1222 	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1223 	prog->aux->real_func_cnt = env->subprog_cnt;
1224 	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
1225 	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
1226 	bpf_prog_jit_attempt_done(prog);
1227 	return 0;
1228 out_free:
1229 	/* We failed JIT'ing, so at this point we need to unregister poke
1230 	 * descriptors from subprogs, so that kernel is not attempting to
1231 	 * patch it anymore as we're freeing the subprog JIT memory.
1232 	 */
1233 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
1234 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
1235 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
1236 	}
1237 	/* At this point we're guaranteed that poke descriptors are not
1238 	 * live anymore. We can just unlink its descriptor table as it's
1239 	 * released with the main prog.
1240 	 */
1241 	for (i = 0; i < env->subprog_cnt; i++) {
1242 		if (!func[i])
1243 			continue;
1244 		func[i]->aux->poke_tab = NULL;
1245 		bpf_jit_free(func[i]);
1246 	}
1247 	kfree(func);
1248 out_undo_insn:
1249 	/* cleanup main prog to be interpreted */
1250 	prog->jit_requested = 0;
1251 	prog->blinding_requested = 0;
1252 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1253 		if (!bpf_pseudo_call(insn))
1254 			continue;
1255 		insn->off = 0;
1256 		insn->imm = env->insn_aux_data[i].call_imm;
1257 	}
1258 	bpf_prog_jit_attempt_done(prog);
1259 	return err;
1260 }
1261 
1262 int bpf_fixup_call_args(struct bpf_verifier_env *env)
1263 {
1264 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1265 	struct bpf_prog *prog = env->prog;
1266 	struct bpf_insn *insn = prog->insnsi;
1267 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1268 	int i, depth;
1269 #endif
1270 	int err = 0;
1271 
1272 	if (env->prog->jit_requested &&
1273 	    !bpf_prog_is_offloaded(env->prog->aux)) {
1274 		err = bpf_jit_subprogs(env);
1275 		if (err == 0)
1276 			return 0;
1277 		if (err == -EFAULT)
1278 			return err;
1279 	}
1280 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1281 	if (has_kfunc_call) {
1282 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
1283 		return -EINVAL;
1284 	}
1285 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
1286 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
1287 		 * have to be rejected, since interpreter doesn't support them yet.
1288 		 */
1289 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
1290 		return -EINVAL;
1291 	}
1292 	for (i = 0; i < prog->len; i++, insn++) {
1293 		if (bpf_pseudo_func(insn)) {
1294 			/* When JIT fails the progs with callback calls
1295 			 * have to be rejected, since interpreter doesn't support them yet.
1296 			 */
1297 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
1298 			return -EINVAL;
1299 		}
1300 
1301 		if (!bpf_pseudo_call(insn))
1302 			continue;
1303 		depth = get_callee_stack_depth(env, insn, i);
1304 		if (depth < 0)
1305 			return depth;
1306 		bpf_patch_call_args(insn, depth);
1307 	}
1308 	err = 0;
1309 #endif
1310 	return err;
1311 }
1312 
1313 
1314 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
1315 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
1316 {
1317 	struct bpf_subprog_info *info = env->subprog_info;
1318 	int cnt = env->subprog_cnt;
1319 	struct bpf_prog *prog;
1320 
1321 	/* We only reserve one slot for hidden subprogs in subprog_info. */
1322 	if (env->hidden_subprog_cnt) {
1323 		verifier_bug(env, "only one hidden subprog supported");
1324 		return -EFAULT;
1325 	}
1326 	/* We're not patching any existing instruction, just appending the new
1327 	 * ones for the hidden subprog. Hence all of the adjustment operations
1328 	 * in bpf_patch_insn_data are no-ops.
1329 	 */
1330 	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
1331 	if (!prog)
1332 		return -ENOMEM;
1333 	env->prog = prog;
1334 	info[cnt + 1].start = info[cnt].start;
1335 	info[cnt].start = prog->len - len + 1;
1336 	env->subprog_cnt++;
1337 	env->hidden_subprog_cnt++;
1338 	return 0;
1339 }
1340 
1341 /* Do various post-verification rewrites in a single program pass.
1342  * These rewrites simplify JIT and interpreter implementations.
1343  */
1344 int bpf_do_misc_fixups(struct bpf_verifier_env *env)
1345 {
1346 	struct bpf_prog *prog = env->prog;
1347 	enum bpf_attach_type eatype = prog->expected_attach_type;
1348 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
1349 	struct bpf_insn *insn = prog->insnsi;
1350 	const struct bpf_func_proto *fn;
1351 	const int insn_cnt = prog->len;
1352 	const struct bpf_map_ops *ops;
1353 	struct bpf_insn_aux_data *aux;
1354 	struct bpf_insn *insn_buf = env->insn_buf;
1355 	struct bpf_prog *new_prog;
1356 	struct bpf_map *map_ptr;
1357 	int i, ret, cnt, delta = 0, cur_subprog = 0;
1358 	struct bpf_subprog_info *subprogs = env->subprog_info;
1359 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
1360 	u16 stack_depth_extra = 0;
1361 
1362 	if (env->seen_exception && !env->exception_callback_subprog) {
1363 		struct bpf_insn *patch = insn_buf;
1364 
1365 		*patch++ = env->prog->insnsi[insn_cnt - 1];
1366 		*patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
1367 		*patch++ = BPF_EXIT_INSN();
1368 		ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
1369 		if (ret < 0)
1370 			return ret;
1371 		prog = env->prog;
1372 		insn = prog->insnsi;
1373 
1374 		env->exception_callback_subprog = env->subprog_cnt - 1;
1375 		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
1376 		bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
1377 	}
1378 
1379 	for (i = 0; i < insn_cnt;) {
1380 		if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
1381 			if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
1382 			    (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
1383 				/* convert to 32-bit mov that clears upper 32-bit */
1384 				insn->code = BPF_ALU | BPF_MOV | BPF_X;
1385 				/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
1386 				insn->off = 0;
1387 				insn->imm = 0;
1388 			} /* cast from as(0) to as(1) should be handled by JIT */
1389 			goto next_insn;
1390 		}
1391 
1392 		if (env->insn_aux_data[i + delta].needs_zext)
1393 			/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
1394 			insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
1395 
1396 		/* Make sdiv/smod divide-by-minus-one exceptions impossible. */
1397 		if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
1398 		     insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
1399 		     insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
1400 		     insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
1401 		    insn->off == 1 && insn->imm == -1) {
1402 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1403 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1404 			struct bpf_insn *patch = insn_buf;
1405 
1406 			if (isdiv)
1407 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1408 							BPF_NEG | BPF_K, insn->dst_reg,
1409 							0, 0, 0);
1410 			else
1411 				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1412 
1413 			cnt = patch - insn_buf;
1414 
1415 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1416 			if (!new_prog)
1417 				return -ENOMEM;
1418 
1419 			delta    += cnt - 1;
1420 			env->prog = prog = new_prog;
1421 			insn      = new_prog->insnsi + i + delta;
1422 			goto next_insn;
1423 		}
1424 
1425 		/* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
1426 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
1427 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
1428 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
1429 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
1430 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1431 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1432 			bool is_sdiv = isdiv && insn->off == 1;
1433 			bool is_smod = !isdiv && insn->off == 1;
1434 			struct bpf_insn *patch = insn_buf;
1435 
1436 			if (is_sdiv) {
1437 				/* [R,W]x sdiv 0 -> 0
1438 				 * LLONG_MIN sdiv -1 -> LLONG_MIN
1439 				 * INT_MIN sdiv -1 -> INT_MIN
1440 				 */
1441 				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1442 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1443 							BPF_ADD | BPF_K, BPF_REG_AX,
1444 							0, 0, 1);
1445 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1446 							BPF_JGT | BPF_K, BPF_REG_AX,
1447 							0, 4, 1);
1448 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1449 							BPF_JEQ | BPF_K, BPF_REG_AX,
1450 							0, 1, 0);
1451 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1452 							BPF_MOV | BPF_K, insn->dst_reg,
1453 							0, 0, 0);
1454 				/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
1455 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1456 							BPF_NEG | BPF_K, insn->dst_reg,
1457 							0, 0, 0);
1458 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1459 				*patch++ = *insn;
1460 				cnt = patch - insn_buf;
1461 			} else if (is_smod) {
1462 				/* [R,W]x mod 0 -> [R,W]x */
1463 				/* [R,W]x mod -1 -> 0 */
1464 				*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1465 				*patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1466 							BPF_ADD | BPF_K, BPF_REG_AX,
1467 							0, 0, 1);
1468 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1469 							BPF_JGT | BPF_K, BPF_REG_AX,
1470 							0, 3, 1);
1471 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1472 							BPF_JEQ | BPF_K, BPF_REG_AX,
1473 							0, 3 + (is64 ? 0 : 1), 1);
1474 				*patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1475 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1476 				*patch++ = *insn;
1477 
1478 				if (!is64) {
1479 					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1480 					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1481 				}
1482 				cnt = patch - insn_buf;
1483 			} else if (isdiv) {
1484 				/* [R,W]x div 0 -> 0 */
1485 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1486 							BPF_JNE | BPF_K, insn->src_reg,
1487 							0, 2, 0);
1488 				*patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
1489 				*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1490 				*patch++ = *insn;
1491 				cnt = patch - insn_buf;
1492 			} else {
1493 				/* [R,W]x mod 0 -> [R,W]x */
1494 				*patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1495 							BPF_JEQ | BPF_K, insn->src_reg,
1496 							0, 1 + (is64 ? 0 : 1), 0);
1497 				*patch++ = *insn;
1498 
1499 				if (!is64) {
1500 					*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1501 					*patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1502 				}
1503 				cnt = patch - insn_buf;
1504 			}
1505 
1506 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1507 			if (!new_prog)
1508 				return -ENOMEM;
1509 
1510 			delta    += cnt - 1;
1511 			env->prog = prog = new_prog;
1512 			insn      = new_prog->insnsi + i + delta;
1513 			goto next_insn;
1514 		}
1515 
1516 		/* Make it impossible to de-reference a userspace address */
1517 		if (BPF_CLASS(insn->code) == BPF_LDX &&
1518 		    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1519 		     BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
1520 			struct bpf_insn *patch = insn_buf;
1521 			u64 uaddress_limit = bpf_arch_uaddress_limit();
1522 
1523 			if (!uaddress_limit)
1524 				goto next_insn;
1525 
1526 			*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1527 			if (insn->off)
1528 				*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
1529 			*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
1530 			*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
1531 			*patch++ = *insn;
1532 			*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1533 			*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
1534 
1535 			cnt = patch - insn_buf;
1536 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1537 			if (!new_prog)
1538 				return -ENOMEM;
1539 
1540 			delta    += cnt - 1;
1541 			env->prog = prog = new_prog;
1542 			insn      = new_prog->insnsi + i + delta;
1543 			goto next_insn;
1544 		}
1545 
1546 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
1547 		if (BPF_CLASS(insn->code) == BPF_LD &&
1548 		    (BPF_MODE(insn->code) == BPF_ABS ||
1549 		     BPF_MODE(insn->code) == BPF_IND)) {
1550 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
1551 			if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
1552 				verifier_bug(env, "%d insns generated for ld_abs", cnt);
1553 				return -EFAULT;
1554 			}
1555 
1556 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1557 			if (!new_prog)
1558 				return -ENOMEM;
1559 
1560 			delta    += cnt - 1;
1561 			env->prog = prog = new_prog;
1562 			insn      = new_prog->insnsi + i + delta;
1563 			goto next_insn;
1564 		}
1565 
1566 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
1567 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
1568 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
1569 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
1570 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
1571 			struct bpf_insn *patch = insn_buf;
1572 			bool issrc, isneg, isimm;
1573 			u32 off_reg;
1574 
1575 			aux = &env->insn_aux_data[i + delta];
1576 			if (!aux->alu_state ||
1577 			    aux->alu_state == BPF_ALU_NON_POINTER)
1578 				goto next_insn;
1579 
1580 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
1581 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
1582 				BPF_ALU_SANITIZE_SRC;
1583 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
1584 
1585 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
1586 			if (isimm) {
1587 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1588 			} else {
1589 				if (isneg)
1590 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1591 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1592 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
1593 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
1594 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
1595 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
1596 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
1597 			}
1598 			if (!issrc)
1599 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
1600 			insn->src_reg = BPF_REG_AX;
1601 			if (isneg)
1602 				insn->code = insn->code == code_add ?
1603 					     code_sub : code_add;
1604 			*patch++ = *insn;
1605 			if (issrc && isneg && !isimm)
1606 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1607 			cnt = patch - insn_buf;
1608 
1609 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1610 			if (!new_prog)
1611 				return -ENOMEM;
1612 
1613 			delta    += cnt - 1;
1614 			env->prog = prog = new_prog;
1615 			insn      = new_prog->insnsi + i + delta;
1616 			goto next_insn;
1617 		}
1618 
1619 		if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
1620 			int stack_off_cnt = -stack_depth - 16;
1621 
1622 			/*
1623 			 * Two 8 byte slots, depth-16 stores the count, and
1624 			 * depth-8 stores the start timestamp of the loop.
1625 			 *
1626 			 * The starting value of count is BPF_MAX_TIMED_LOOPS
1627 			 * (0xffff).  Every iteration loads it and subs it by 1,
1628 			 * until the value becomes 0 in AX (thus, 1 in stack),
1629 			 * after which we call arch_bpf_timed_may_goto, which
1630 			 * either sets AX to 0xffff to keep looping, or to 0
1631 			 * upon timeout. AX is then stored into the stack. In
1632 			 * the next iteration, we either see 0 and break out, or
1633 			 * continue iterating until the next time value is 0
1634 			 * after subtraction, rinse and repeat.
1635 			 */
1636 			stack_depth_extra = 16;
1637 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
1638 			if (insn->off >= 0)
1639 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
1640 			else
1641 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1642 			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1643 			insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
1644 			/*
1645 			 * AX is used as an argument to pass in stack_off_cnt
1646 			 * (to add to r10/fp), and also as the return value of
1647 			 * the call to arch_bpf_timed_may_goto.
1648 			 */
1649 			insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
1650 			insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
1651 			insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
1652 			cnt = 7;
1653 
1654 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1655 			if (!new_prog)
1656 				return -ENOMEM;
1657 
1658 			delta += cnt - 1;
1659 			env->prog = prog = new_prog;
1660 			insn = new_prog->insnsi + i + delta;
1661 			goto next_insn;
1662 		} else if (bpf_is_may_goto_insn(insn)) {
1663 			int stack_off = -stack_depth - 8;
1664 
1665 			stack_depth_extra = 8;
1666 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
1667 			if (insn->off >= 0)
1668 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
1669 			else
1670 				insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1671 			insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1672 			insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
1673 			cnt = 4;
1674 
1675 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1676 			if (!new_prog)
1677 				return -ENOMEM;
1678 
1679 			delta += cnt - 1;
1680 			env->prog = prog = new_prog;
1681 			insn = new_prog->insnsi + i + delta;
1682 			goto next_insn;
1683 		}
1684 
1685 		if (insn->code != (BPF_JMP | BPF_CALL))
1686 			goto next_insn;
1687 		if (insn->src_reg == BPF_PSEUDO_CALL)
1688 			goto next_insn;
1689 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1690 			ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
1691 			if (ret)
1692 				return ret;
1693 			if (cnt == 0)
1694 				goto next_insn;
1695 
1696 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1697 			if (!new_prog)
1698 				return -ENOMEM;
1699 
1700 			delta	 += cnt - 1;
1701 			env->prog = prog = new_prog;
1702 			insn	  = new_prog->insnsi + i + delta;
1703 			goto next_insn;
1704 		}
1705 
1706 		/* Skip inlining the helper call if the JIT does it. */
1707 		if (bpf_jit_inlines_helper_call(insn->imm))
1708 			goto next_insn;
1709 
1710 		if (insn->imm == BPF_FUNC_get_route_realm)
1711 			prog->dst_needed = 1;
1712 		if (insn->imm == BPF_FUNC_get_prandom_u32)
1713 			bpf_user_rnd_init_once();
1714 		if (insn->imm == BPF_FUNC_override_return)
1715 			prog->kprobe_override = 1;
1716 		if (insn->imm == BPF_FUNC_tail_call) {
1717 			/* If we tail call into other programs, we
1718 			 * cannot make any assumptions since they can
1719 			 * be replaced dynamically during runtime in
1720 			 * the program array.
1721 			 */
1722 			prog->cb_access = 1;
1723 			if (!bpf_allow_tail_call_in_subprogs(env))
1724 				prog->aux->stack_depth = MAX_BPF_STACK;
1725 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
1726 
1727 			/* mark bpf_tail_call as different opcode to avoid
1728 			 * conditional branch in the interpreter for every normal
1729 			 * call and to prevent accidental JITing by JIT compiler
1730 			 * that doesn't support bpf_tail_call yet
1731 			 */
1732 			insn->imm = 0;
1733 			insn->code = BPF_JMP | BPF_TAIL_CALL;
1734 
1735 			aux = &env->insn_aux_data[i + delta];
1736 			if (env->bpf_capable && !prog->blinding_requested &&
1737 			    prog->jit_requested &&
1738 			    !bpf_map_key_poisoned(aux) &&
1739 			    !bpf_map_ptr_poisoned(aux) &&
1740 			    !bpf_map_ptr_unpriv(aux)) {
1741 				struct bpf_jit_poke_descriptor desc = {
1742 					.reason = BPF_POKE_REASON_TAIL_CALL,
1743 					.tail_call.map = aux->map_ptr_state.map_ptr,
1744 					.tail_call.key = bpf_map_key_immediate(aux),
1745 					.insn_idx = i + delta,
1746 				};
1747 
1748 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
1749 				if (ret < 0) {
1750 					verbose(env, "adding tail call poke descriptor failed\n");
1751 					return ret;
1752 				}
1753 
1754 				insn->imm = ret + 1;
1755 				goto next_insn;
1756 			}
1757 
1758 			if (!bpf_map_ptr_unpriv(aux))
1759 				goto next_insn;
1760 
1761 			/* instead of changing every JIT dealing with tail_call
1762 			 * emit two extra insns:
1763 			 * if (index >= max_entries) goto out;
1764 			 * index &= array->index_mask;
1765 			 * to avoid out-of-bounds cpu speculation
1766 			 */
1767 			if (bpf_map_ptr_poisoned(aux)) {
1768 				verbose(env, "tail_call abusing map_ptr\n");
1769 				return -EINVAL;
1770 			}
1771 
1772 			map_ptr = aux->map_ptr_state.map_ptr;
1773 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
1774 						  map_ptr->max_entries, 2);
1775 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
1776 						    container_of(map_ptr,
1777 								 struct bpf_array,
1778 								 map)->index_mask);
1779 			insn_buf[2] = *insn;
1780 			cnt = 3;
1781 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1782 			if (!new_prog)
1783 				return -ENOMEM;
1784 
1785 			delta    += cnt - 1;
1786 			env->prog = prog = new_prog;
1787 			insn      = new_prog->insnsi + i + delta;
1788 			goto next_insn;
1789 		}
1790 
1791 		if (insn->imm == BPF_FUNC_timer_set_callback) {
1792 			/* The verifier will process callback_fn as many times as necessary
1793 			 * with different maps and the register states prepared by
1794 			 * set_timer_callback_state will be accurate.
1795 			 *
1796 			 * The following use case is valid:
1797 			 *   map1 is shared by prog1, prog2, prog3.
1798 			 *   prog1 calls bpf_timer_init for some map1 elements
1799 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
1800 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
1801 			 *   prog3 calls bpf_timer_start for some map1 elements.
1802 			 *     Those that were not both bpf_timer_init-ed and
1803 			 *     bpf_timer_set_callback-ed will return -EINVAL.
1804 			 */
1805 			struct bpf_insn ld_addrs[2] = {
1806 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
1807 			};
1808 
1809 			insn_buf[0] = ld_addrs[0];
1810 			insn_buf[1] = ld_addrs[1];
1811 			insn_buf[2] = *insn;
1812 			cnt = 3;
1813 
1814 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1815 			if (!new_prog)
1816 				return -ENOMEM;
1817 
1818 			delta    += cnt - 1;
1819 			env->prog = prog = new_prog;
1820 			insn      = new_prog->insnsi + i + delta;
1821 			goto patch_call_imm;
1822 		}
1823 
1824 		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
1825 		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
1826 			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
1827 			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
1828 			 */
1829 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
1830 			insn_buf[1] = *insn;
1831 			cnt = 2;
1832 
1833 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1834 			if (!new_prog)
1835 				return -ENOMEM;
1836 
1837 			delta += cnt - 1;
1838 			env->prog = prog = new_prog;
1839 			insn = new_prog->insnsi + i + delta;
1840 			goto patch_call_imm;
1841 		}
1842 
1843 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
1844 		 * and other inlining handlers are currently limited to 64 bit
1845 		 * only.
1846 		 */
1847 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
1848 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
1849 		     insn->imm == BPF_FUNC_map_update_elem ||
1850 		     insn->imm == BPF_FUNC_map_delete_elem ||
1851 		     insn->imm == BPF_FUNC_map_push_elem   ||
1852 		     insn->imm == BPF_FUNC_map_pop_elem    ||
1853 		     insn->imm == BPF_FUNC_map_peek_elem   ||
1854 		     insn->imm == BPF_FUNC_redirect_map    ||
1855 		     insn->imm == BPF_FUNC_for_each_map_elem ||
1856 		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
1857 			aux = &env->insn_aux_data[i + delta];
1858 			if (bpf_map_ptr_poisoned(aux))
1859 				goto patch_call_imm;
1860 
1861 			map_ptr = aux->map_ptr_state.map_ptr;
1862 			ops = map_ptr->ops;
1863 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
1864 			    ops->map_gen_lookup) {
1865 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
1866 				if (cnt == -EOPNOTSUPP)
1867 					goto patch_map_ops_generic;
1868 				if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
1869 					verifier_bug(env, "%d insns generated for map lookup", cnt);
1870 					return -EFAULT;
1871 				}
1872 
1873 				new_prog = bpf_patch_insn_data(env, i + delta,
1874 							       insn_buf, cnt);
1875 				if (!new_prog)
1876 					return -ENOMEM;
1877 
1878 				delta    += cnt - 1;
1879 				env->prog = prog = new_prog;
1880 				insn      = new_prog->insnsi + i + delta;
1881 				goto next_insn;
1882 			}
1883 
1884 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
1885 				     (void *(*)(struct bpf_map *map, void *key))NULL));
1886 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
1887 				     (long (*)(struct bpf_map *map, void *key))NULL));
1888 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
1889 				     (long (*)(struct bpf_map *map, void *key, void *value,
1890 					      u64 flags))NULL));
1891 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
1892 				     (long (*)(struct bpf_map *map, void *value,
1893 					      u64 flags))NULL));
1894 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
1895 				     (long (*)(struct bpf_map *map, void *value))NULL));
1896 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
1897 				     (long (*)(struct bpf_map *map, void *value))NULL));
1898 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
1899 				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
1900 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
1901 				     (long (*)(struct bpf_map *map,
1902 					      bpf_callback_t callback_fn,
1903 					      void *callback_ctx,
1904 					      u64 flags))NULL));
1905 			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
1906 				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
1907 
1908 patch_map_ops_generic:
1909 			switch (insn->imm) {
1910 			case BPF_FUNC_map_lookup_elem:
1911 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
1912 				goto next_insn;
1913 			case BPF_FUNC_map_update_elem:
1914 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
1915 				goto next_insn;
1916 			case BPF_FUNC_map_delete_elem:
1917 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
1918 				goto next_insn;
1919 			case BPF_FUNC_map_push_elem:
1920 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
1921 				goto next_insn;
1922 			case BPF_FUNC_map_pop_elem:
1923 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
1924 				goto next_insn;
1925 			case BPF_FUNC_map_peek_elem:
1926 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
1927 				goto next_insn;
1928 			case BPF_FUNC_redirect_map:
1929 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
1930 				goto next_insn;
1931 			case BPF_FUNC_for_each_map_elem:
1932 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
1933 				goto next_insn;
1934 			case BPF_FUNC_map_lookup_percpu_elem:
1935 				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
1936 				goto next_insn;
1937 			}
1938 
1939 			goto patch_call_imm;
1940 		}
1941 
1942 		/* Implement bpf_jiffies64 inline. */
1943 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
1944 		    insn->imm == BPF_FUNC_jiffies64) {
1945 			struct bpf_insn ld_jiffies_addr[2] = {
1946 				BPF_LD_IMM64(BPF_REG_0,
1947 					     (unsigned long)&jiffies),
1948 			};
1949 
1950 			insn_buf[0] = ld_jiffies_addr[0];
1951 			insn_buf[1] = ld_jiffies_addr[1];
1952 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
1953 						  BPF_REG_0, 0);
1954 			cnt = 3;
1955 
1956 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
1957 						       cnt);
1958 			if (!new_prog)
1959 				return -ENOMEM;
1960 
1961 			delta    += cnt - 1;
1962 			env->prog = prog = new_prog;
1963 			insn      = new_prog->insnsi + i + delta;
1964 			goto next_insn;
1965 		}
1966 
1967 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
1968 		/* Implement bpf_get_smp_processor_id() inline. */
1969 		if (insn->imm == BPF_FUNC_get_smp_processor_id &&
1970 		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
1971 			/* BPF_FUNC_get_smp_processor_id inlining is an
1972 			 * optimization, so if cpu_number is ever
1973 			 * changed in some incompatible and hard to support
1974 			 * way, it's fine to back out this inlining logic
1975 			 */
1976 #ifdef CONFIG_SMP
1977 			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
1978 			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
1979 			insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
1980 			cnt = 3;
1981 #else
1982 			insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
1983 			cnt = 1;
1984 #endif
1985 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1986 			if (!new_prog)
1987 				return -ENOMEM;
1988 
1989 			delta    += cnt - 1;
1990 			env->prog = prog = new_prog;
1991 			insn      = new_prog->insnsi + i + delta;
1992 			goto next_insn;
1993 		}
1994 
1995 		/* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
1996 		if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
1997 		    bpf_verifier_inlines_helper_call(env, insn->imm)) {
1998 			insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&current_task);
1999 			insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2000 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
2001 			cnt = 3;
2002 
2003 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2004 			if (!new_prog)
2005 				return -ENOMEM;
2006 
2007 			delta    += cnt - 1;
2008 			env->prog = prog = new_prog;
2009 			insn      = new_prog->insnsi + i + delta;
2010 			goto next_insn;
2011 		}
2012 #endif
2013 		/* Implement bpf_get_func_arg inline. */
2014 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2015 		    insn->imm == BPF_FUNC_get_func_arg) {
2016 			if (eatype == BPF_TRACE_RAW_TP) {
2017 				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2018 
2019 				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2020 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2021 				cnt = 1;
2022 			} else {
2023 				/* Load nr_args from ctx - 8 */
2024 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2025 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2026 				cnt = 2;
2027 			}
2028 			insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
2029 			insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
2030 			insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
2031 			insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
2032 			insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2033 			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
2034 			insn_buf[cnt++] = BPF_JMP_A(1);
2035 			insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2036 
2037 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2038 			if (!new_prog)
2039 				return -ENOMEM;
2040 
2041 			delta    += cnt - 1;
2042 			env->prog = prog = new_prog;
2043 			insn      = new_prog->insnsi + i + delta;
2044 			goto next_insn;
2045 		}
2046 
2047 		/* Implement bpf_get_func_ret inline. */
2048 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2049 		    insn->imm == BPF_FUNC_get_func_ret) {
2050 			if (eatype == BPF_TRACE_FEXIT ||
2051 			    eatype == BPF_TRACE_FSESSION ||
2052 			    eatype == BPF_MODIFY_RETURN) {
2053 				/* Load nr_args from ctx - 8 */
2054 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2055 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2056 				insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
2057 				insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
2058 				insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2059 				insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
2060 				insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
2061 				cnt = 7;
2062 			} else {
2063 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
2064 				cnt = 1;
2065 			}
2066 
2067 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2068 			if (!new_prog)
2069 				return -ENOMEM;
2070 
2071 			delta    += cnt - 1;
2072 			env->prog = prog = new_prog;
2073 			insn      = new_prog->insnsi + i + delta;
2074 			goto next_insn;
2075 		}
2076 
2077 		/* Implement get_func_arg_cnt inline. */
2078 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2079 		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
2080 			if (eatype == BPF_TRACE_RAW_TP) {
2081 				int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2082 
2083 				/* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2084 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2085 				cnt = 1;
2086 			} else {
2087 				/* Load nr_args from ctx - 8 */
2088 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2089 				insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2090 				cnt = 2;
2091 			}
2092 
2093 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2094 			if (!new_prog)
2095 				return -ENOMEM;
2096 
2097 			delta    += cnt - 1;
2098 			env->prog = prog = new_prog;
2099 			insn      = new_prog->insnsi + i + delta;
2100 			goto next_insn;
2101 		}
2102 
2103 		/* Implement bpf_get_func_ip inline. */
2104 		if (prog_type == BPF_PROG_TYPE_TRACING &&
2105 		    insn->imm == BPF_FUNC_get_func_ip) {
2106 			/* Load IP address from ctx - 16 */
2107 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
2108 
2109 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
2110 			if (!new_prog)
2111 				return -ENOMEM;
2112 
2113 			env->prog = prog = new_prog;
2114 			insn      = new_prog->insnsi + i + delta;
2115 			goto next_insn;
2116 		}
2117 
2118 		/* Implement bpf_get_branch_snapshot inline. */
2119 		if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
2120 		    prog->jit_requested && BITS_PER_LONG == 64 &&
2121 		    insn->imm == BPF_FUNC_get_branch_snapshot) {
2122 			/* We are dealing with the following func protos:
2123 			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
2124 			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
2125 			 */
2126 			const u32 br_entry_size = sizeof(struct perf_branch_entry);
2127 
2128 			/* struct perf_branch_entry is part of UAPI and is
2129 			 * used as an array element, so extremely unlikely to
2130 			 * ever grow or shrink
2131 			 */
2132 			BUILD_BUG_ON(br_entry_size != 24);
2133 
2134 			/* if (unlikely(flags)) return -EINVAL */
2135 			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
2136 
2137 			/* Transform size (bytes) into number of entries (cnt = size / 24).
2138 			 * But to avoid expensive division instruction, we implement
2139 			 * divide-by-3 through multiplication, followed by further
2140 			 * division by 8 through 3-bit right shift.
2141 			 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
2142 			 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
2143 			 *
2144 			 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
2145 			 */
2146 			insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
2147 			insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
2148 			insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
2149 
2150 			/* call perf_snapshot_branch_stack implementation */
2151 			insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
2152 			/* if (entry_cnt == 0) return -ENOENT */
2153 			insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
2154 			/* return entry_cnt * sizeof(struct perf_branch_entry) */
2155 			insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
2156 			insn_buf[7] = BPF_JMP_A(3);
2157 			/* return -EINVAL; */
2158 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2159 			insn_buf[9] = BPF_JMP_A(1);
2160 			/* return -ENOENT; */
2161 			insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
2162 			cnt = 11;
2163 
2164 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2165 			if (!new_prog)
2166 				return -ENOMEM;
2167 
2168 			delta    += cnt - 1;
2169 			env->prog = prog = new_prog;
2170 			insn      = new_prog->insnsi + i + delta;
2171 			goto next_insn;
2172 		}
2173 
2174 		/* Implement bpf_kptr_xchg inline */
2175 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
2176 		    insn->imm == BPF_FUNC_kptr_xchg &&
2177 		    bpf_jit_supports_ptr_xchg()) {
2178 			insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
2179 			insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
2180 			cnt = 2;
2181 
2182 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2183 			if (!new_prog)
2184 				return -ENOMEM;
2185 
2186 			delta    += cnt - 1;
2187 			env->prog = prog = new_prog;
2188 			insn      = new_prog->insnsi + i + delta;
2189 			goto next_insn;
2190 		}
2191 patch_call_imm:
2192 		fn = env->ops->get_func_proto(insn->imm, env->prog);
2193 		/* all functions that have prototype and verifier allowed
2194 		 * programs to call them, must be real in-kernel functions
2195 		 */
2196 		if (!fn->func) {
2197 			verifier_bug(env,
2198 				     "not inlined functions %s#%d is missing func",
2199 				     func_id_name(insn->imm), insn->imm);
2200 			return -EFAULT;
2201 		}
2202 		insn->imm = fn->func - __bpf_call_base;
2203 next_insn:
2204 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2205 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
2206 			subprogs[cur_subprog].stack_extra = stack_depth_extra;
2207 
2208 			stack_depth = subprogs[cur_subprog].stack_depth;
2209 			if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
2210 				verbose(env, "stack size %d(extra %d) is too large\n",
2211 					stack_depth, stack_depth_extra);
2212 				return -EINVAL;
2213 			}
2214 			cur_subprog++;
2215 			stack_depth = subprogs[cur_subprog].stack_depth;
2216 			stack_depth_extra = 0;
2217 		}
2218 		i++;
2219 		insn++;
2220 	}
2221 
2222 	env->prog->aux->stack_depth = subprogs[0].stack_depth;
2223 	for (i = 0; i < env->subprog_cnt; i++) {
2224 		int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
2225 		int subprog_start = subprogs[i].start;
2226 		int stack_slots = subprogs[i].stack_extra / 8;
2227 		int slots = delta, cnt = 0;
2228 
2229 		if (!stack_slots)
2230 			continue;
2231 		/* We need two slots in case timed may_goto is supported. */
2232 		if (stack_slots > slots) {
2233 			verifier_bug(env, "stack_slots supports may_goto only");
2234 			return -EFAULT;
2235 		}
2236 
2237 		stack_depth = subprogs[i].stack_depth;
2238 		if (bpf_jit_supports_timed_may_goto()) {
2239 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2240 						     BPF_MAX_TIMED_LOOPS);
2241 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
2242 		} else {
2243 			/* Add ST insn to subprog prologue to init extra stack */
2244 			insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2245 						     BPF_MAX_LOOPS);
2246 		}
2247 		/* Copy first actual insn to preserve it */
2248 		insn_buf[cnt++] = env->prog->insnsi[subprog_start];
2249 
2250 		new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
2251 		if (!new_prog)
2252 			return -ENOMEM;
2253 		env->prog = prog = new_prog;
2254 		/*
2255 		 * If may_goto is a first insn of a prog there could be a jmp
2256 		 * insn that points to it, hence adjust all such jmps to point
2257 		 * to insn after BPF_ST that inits may_goto count.
2258 		 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
2259 		 */
2260 		WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
2261 	}
2262 
2263 	/* Since poke tab is now finalized, publish aux to tracker. */
2264 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
2265 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
2266 		if (!map_ptr->ops->map_poke_track ||
2267 		    !map_ptr->ops->map_poke_untrack ||
2268 		    !map_ptr->ops->map_poke_run) {
2269 			verifier_bug(env, "poke tab is misconfigured");
2270 			return -EFAULT;
2271 		}
2272 
2273 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
2274 		if (ret < 0) {
2275 			verbose(env, "tracking tail call prog failed\n");
2276 			return ret;
2277 		}
2278 	}
2279 
2280 	ret = sort_kfunc_descs_by_imm_off(env);
2281 	if (ret)
2282 		return ret;
2283 
2284 	return 0;
2285 }
2286 
2287 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
2288 					int position,
2289 					s32 stack_base,
2290 					u32 callback_subprogno,
2291 					u32 *total_cnt)
2292 {
2293 	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
2294 	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
2295 	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
2296 	int reg_loop_max = BPF_REG_6;
2297 	int reg_loop_cnt = BPF_REG_7;
2298 	int reg_loop_ctx = BPF_REG_8;
2299 
2300 	struct bpf_insn *insn_buf = env->insn_buf;
2301 	struct bpf_prog *new_prog;
2302 	u32 callback_start;
2303 	u32 call_insn_offset;
2304 	s32 callback_offset;
2305 	u32 cnt = 0;
2306 
2307 	/* This represents an inlined version of bpf_iter.c:bpf_loop,
2308 	 * be careful to modify this code in sync.
2309 	 */
2310 
2311 	/* Return error and jump to the end of the patch if
2312 	 * expected number of iterations is too big.
2313 	 */
2314 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
2315 	insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
2316 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
2317 	/* spill R6, R7, R8 to use these as loop vars */
2318 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
2319 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
2320 	insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
2321 	/* initialize loop vars */
2322 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
2323 	insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
2324 	insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
2325 	/* loop header,
2326 	 * if reg_loop_cnt >= reg_loop_max skip the loop body
2327 	 */
2328 	insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
2329 	/* callback call,
2330 	 * correct callback offset would be set after patching
2331 	 */
2332 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
2333 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
2334 	insn_buf[cnt++] = BPF_CALL_REL(0);
2335 	/* increment loop counter */
2336 	insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
2337 	/* jump to loop header if callback returned 0 */
2338 	insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
2339 	/* return value of bpf_loop,
2340 	 * set R0 to the number of iterations
2341 	 */
2342 	insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
2343 	/* restore original values of R6, R7, R8 */
2344 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
2345 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
2346 	insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
2347 
2348 	*total_cnt = cnt;
2349 	new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
2350 	if (!new_prog)
2351 		return new_prog;
2352 
2353 	/* callback start is known only after patching */
2354 	callback_start = env->subprog_info[callback_subprogno].start;
2355 	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
2356 	call_insn_offset = position + 12;
2357 	callback_offset = callback_start - call_insn_offset - 1;
2358 	new_prog->insnsi[call_insn_offset].imm = callback_offset;
2359 
2360 	return new_prog;
2361 }
2362 
2363 static bool is_bpf_loop_call(struct bpf_insn *insn)
2364 {
2365 	return insn->code == (BPF_JMP | BPF_CALL) &&
2366 		insn->src_reg == 0 &&
2367 		insn->imm == BPF_FUNC_loop;
2368 }
2369 
2370 /* For all sub-programs in the program (including main) check
2371  * insn_aux_data to see if there are bpf_loop calls that require
2372  * inlining. If such calls are found the calls are replaced with a
2373  * sequence of instructions produced by `inline_bpf_loop` function and
2374  * subprog stack_depth is increased by the size of 3 registers.
2375  * This stack space is used to spill values of the R6, R7, R8.  These
2376  * registers are used to store the loop bound, counter and context
2377  * variables.
2378  */
2379 int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
2380 {
2381 	struct bpf_subprog_info *subprogs = env->subprog_info;
2382 	int i, cur_subprog = 0, cnt, delta = 0;
2383 	struct bpf_insn *insn = env->prog->insnsi;
2384 	int insn_cnt = env->prog->len;
2385 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
2386 	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2387 	u16 stack_depth_extra = 0;
2388 
2389 	for (i = 0; i < insn_cnt; i++, insn++) {
2390 		struct bpf_loop_inline_state *inline_state =
2391 			&env->insn_aux_data[i + delta].loop_inline_state;
2392 
2393 		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
2394 			struct bpf_prog *new_prog;
2395 
2396 			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
2397 			new_prog = inline_bpf_loop(env,
2398 						   i + delta,
2399 						   -(stack_depth + stack_depth_extra),
2400 						   inline_state->callback_subprogno,
2401 						   &cnt);
2402 			if (!new_prog)
2403 				return -ENOMEM;
2404 
2405 			delta     += cnt - 1;
2406 			env->prog  = new_prog;
2407 			insn       = new_prog->insnsi + i + delta;
2408 		}
2409 
2410 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2411 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
2412 			cur_subprog++;
2413 			stack_depth = subprogs[cur_subprog].stack_depth;
2414 			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2415 			stack_depth_extra = 0;
2416 		}
2417 	}
2418 
2419 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
2420 
2421 	return 0;
2422 }
2423 
2424 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
2425  * adjust subprograms stack depth when possible.
2426  */
2427 int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
2428 {
2429 	struct bpf_subprog_info *subprog = env->subprog_info;
2430 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
2431 	struct bpf_insn *insn = env->prog->insnsi;
2432 	int insn_cnt = env->prog->len;
2433 	u32 spills_num;
2434 	bool modified = false;
2435 	int i, j;
2436 
2437 	for (i = 0; i < insn_cnt; i++, insn++) {
2438 		if (aux[i].fastcall_spills_num > 0) {
2439 			spills_num = aux[i].fastcall_spills_num;
2440 			/* NOPs would be removed by opt_remove_nops() */
2441 			for (j = 1; j <= spills_num; ++j) {
2442 				*(insn - j) = NOP;
2443 				*(insn + j) = NOP;
2444 			}
2445 			modified = true;
2446 		}
2447 		if ((subprog + 1)->start == i + 1) {
2448 			if (modified && !subprog->keep_fastcall_stack)
2449 				subprog->stack_depth = -subprog->fastcall_stack_off;
2450 			subprog++;
2451 			modified = false;
2452 		}
2453 	}
2454 
2455 	return 0;
2456 }
2457 
2458