1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <linux/bpf.h>
4 #include <linux/btf.h>
5 #include <linux/bpf_verifier.h>
6 #include <linux/filter.h>
7 #include <linux/vmalloc.h>
8 #include <linux/bsearch.h>
9 #include <linux/sort.h>
10 #include <linux/perf_event.h>
11 #include <net/xdp.h>
12 #include "disasm.h"
13
14 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
15
is_cmpxchg_insn(const struct bpf_insn * insn)16 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
17 {
18 return BPF_CLASS(insn->code) == BPF_STX &&
19 BPF_MODE(insn->code) == BPF_ATOMIC &&
20 insn->imm == BPF_CMPXCHG;
21 }
22
23 /* Return the regno defined by the insn, or -1. */
insn_def_regno(const struct bpf_insn * insn)24 static int insn_def_regno(const struct bpf_insn *insn)
25 {
26 switch (BPF_CLASS(insn->code)) {
27 case BPF_JMP:
28 case BPF_JMP32:
29 case BPF_ST:
30 return -1;
31 case BPF_STX:
32 if (BPF_MODE(insn->code) == BPF_ATOMIC ||
33 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
34 if (insn->imm == BPF_CMPXCHG)
35 return BPF_REG_0;
36 else if (insn->imm == BPF_LOAD_ACQ)
37 return insn->dst_reg;
38 else if (insn->imm & BPF_FETCH)
39 return insn->src_reg;
40 }
41 return -1;
42 default:
43 return insn->dst_reg;
44 }
45 }
46
47 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_insn * insn)48 static bool insn_has_def32(struct bpf_insn *insn)
49 {
50 int dst_reg = insn_def_regno(insn);
51
52 if (dst_reg == -1)
53 return false;
54
55 return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP);
56 }
57
kfunc_desc_cmp_by_imm_off(const void * a,const void * b)58 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
59 {
60 const struct bpf_kfunc_desc *d0 = a;
61 const struct bpf_kfunc_desc *d1 = b;
62
63 if (d0->imm != d1->imm)
64 return d0->imm < d1->imm ? -1 : 1;
65 if (d0->offset != d1->offset)
66 return d0->offset < d1->offset ? -1 : 1;
67 return 0;
68 }
69
70 const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog * prog,const struct bpf_insn * insn)71 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
72 const struct bpf_insn *insn)
73 {
74 const struct bpf_kfunc_desc desc = {
75 .imm = insn->imm,
76 .offset = insn->off,
77 };
78 const struct bpf_kfunc_desc *res;
79 struct bpf_kfunc_desc_tab *tab;
80
81 tab = prog->aux->kfunc_tab;
82 res = bsearch(&desc, tab->descs, tab->nr_descs,
83 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
84
85 return res ? &res->func_model : NULL;
86 }
87
set_kfunc_desc_imm(struct bpf_verifier_env * env,struct bpf_kfunc_desc * desc)88 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
89 {
90 unsigned long call_imm;
91
92 if (bpf_jit_supports_far_kfunc_call()) {
93 call_imm = desc->func_id;
94 } else {
95 call_imm = BPF_CALL_IMM(desc->addr);
96 /* Check whether the relative offset overflows desc->imm */
97 if ((unsigned long)(s32)call_imm != call_imm) {
98 verbose(env, "address of kernel func_id %u is out of range\n",
99 desc->func_id);
100 return -EINVAL;
101 }
102 }
103 desc->imm = call_imm;
104 return 0;
105 }
106
sort_kfunc_descs_by_imm_off(struct bpf_verifier_env * env)107 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
108 {
109 struct bpf_kfunc_desc_tab *tab;
110 int i, err;
111
112 tab = env->prog->aux->kfunc_tab;
113 if (!tab)
114 return 0;
115
116 for (i = 0; i < tab->nr_descs; i++) {
117 err = set_kfunc_desc_imm(env, &tab->descs[i]);
118 if (err)
119 return err;
120 }
121
122 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
123 kfunc_desc_cmp_by_imm_off, NULL);
124 return 0;
125 }
126
add_kfunc_in_insns(struct bpf_verifier_env * env,struct bpf_insn * insn,int cnt)127 static int add_kfunc_in_insns(struct bpf_verifier_env *env,
128 struct bpf_insn *insn, int cnt)
129 {
130 int i, ret;
131
132 for (i = 0; i < cnt; i++, insn++) {
133 if (bpf_pseudo_kfunc_call(insn)) {
134 ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
135 if (ret < 0)
136 return ret;
137 }
138 }
139 return 0;
140 }
141
142 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)143 static int get_callee_stack_depth(struct bpf_verifier_env *env,
144 const struct bpf_insn *insn, int idx)
145 {
146 int start = idx + insn->imm + 1, subprog;
147
148 subprog = bpf_find_subprog(env, start);
149 if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
150 return -EFAULT;
151 return env->subprog_info[subprog].stack_depth;
152 }
153 #endif
154
155 /* single env->prog->insni[off] instruction was replaced with the range
156 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
157 * [0, off) and [off, end) to new locations, so the patched range stays zero
158 */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_prog * new_prog,u32 off,u32 cnt)159 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
160 struct bpf_prog *new_prog, u32 off, u32 cnt)
161 {
162 struct bpf_insn_aux_data *data = env->insn_aux_data;
163 struct bpf_insn *insn = new_prog->insnsi;
164 u32 old_seen = data[off].seen;
165 u32 prog_len;
166 int i;
167
168 /* aux info at OFF always needs adjustment, no matter fast path
169 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
170 * original insn at old prog.
171 */
172 data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
173
174 if (cnt == 1)
175 return;
176 prog_len = new_prog->len;
177
178 memmove(data + off + cnt - 1, data + off,
179 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
180 memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
181 for (i = off; i < off + cnt - 1; i++) {
182 /* Expand insni[off]'s seen count to the patched range. */
183 data[i].seen = old_seen;
184 data[i].zext_dst = insn_has_def32(insn + i);
185 }
186
187 /*
188 * The indirect_target flag of the original instruction was moved to the last of the
189 * new instructions by the above memmove and memset, but the indirect jump target is
190 * actually the first instruction, so move it back. This also matches with the behavior
191 * of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new
192 * instruction.
193 */
194 if (data[off + cnt - 1].indirect_target) {
195 data[off].indirect_target = 1;
196 data[off + cnt - 1].indirect_target = 0;
197 }
198 }
199
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)200 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
201 {
202 int i;
203
204 if (len == 1)
205 return;
206 /* NOTE: fake 'exit' subprog should be updated as well. */
207 for (i = 0; i <= env->subprog_cnt; i++) {
208 if (env->subprog_info[i].start <= off)
209 continue;
210 env->subprog_info[i].start += len - 1;
211 }
212 }
213
adjust_insn_arrays(struct bpf_verifier_env * env,u32 off,u32 len)214 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
215 {
216 int i;
217
218 if (len == 1)
219 return;
220
221 for (i = 0; i < env->insn_array_map_cnt; i++)
222 bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
223 }
224
adjust_insn_arrays_after_remove(struct bpf_verifier_env * env,u32 off,u32 len)225 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
226 {
227 int i;
228
229 for (i = 0; i < env->insn_array_map_cnt; i++)
230 bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
231 }
232
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)233 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
234 {
235 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
236 int i, sz = prog->aux->size_poke_tab;
237 struct bpf_jit_poke_descriptor *desc;
238
239 for (i = 0; i < sz; i++) {
240 desc = &tab[i];
241 if (desc->insn_idx <= off)
242 continue;
243 desc->insn_idx += len - 1;
244 }
245 }
246
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)247 struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
248 const struct bpf_insn *patch, u32 len)
249 {
250 struct bpf_prog *new_prog;
251 struct bpf_insn_aux_data *new_data = NULL;
252
253 if (len > 1) {
254 new_data = vrealloc(env->insn_aux_data,
255 array_size(env->prog->len + len - 1,
256 sizeof(struct bpf_insn_aux_data)),
257 GFP_KERNEL_ACCOUNT | __GFP_ZERO);
258 if (!new_data)
259 return NULL;
260
261 env->insn_aux_data = new_data;
262 }
263
264 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
265 if (IS_ERR(new_prog)) {
266 if (PTR_ERR(new_prog) == -ERANGE)
267 verbose(env,
268 "insn %d cannot be patched due to 16-bit range\n",
269 env->insn_aux_data[off].orig_idx);
270 return NULL;
271 }
272 adjust_insn_aux_data(env, new_prog, off, len);
273 adjust_subprog_starts(env, off, len);
274 adjust_insn_arrays(env, off, len);
275 adjust_poke_descs(new_prog, off, len);
276 return new_prog;
277 }
278
279 /*
280 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
281 * jump offset by 'delta'.
282 */
adjust_jmp_off(struct bpf_prog * prog,u32 tgt_idx,u32 delta)283 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
284 {
285 struct bpf_insn *insn = prog->insnsi;
286 u32 insn_cnt = prog->len, i;
287 s32 imm;
288 s16 off;
289
290 for (i = 0; i < insn_cnt; i++, insn++) {
291 u8 code = insn->code;
292
293 if (tgt_idx <= i && i < tgt_idx + delta)
294 continue;
295
296 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
297 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
298 continue;
299
300 if (insn->code == (BPF_JMP32 | BPF_JA)) {
301 if (i + 1 + insn->imm != tgt_idx)
302 continue;
303 if (check_add_overflow(insn->imm, delta, &imm))
304 return -ERANGE;
305 insn->imm = imm;
306 } else {
307 if (i + 1 + insn->off != tgt_idx)
308 continue;
309 if (check_add_overflow(insn->off, delta, &off))
310 return -ERANGE;
311 insn->off = off;
312 }
313 }
314 return 0;
315 }
316
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)317 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
318 u32 off, u32 cnt)
319 {
320 int i, j;
321
322 /* find first prog starting at or after off (first to remove) */
323 for (i = 0; i < env->subprog_cnt; i++)
324 if (env->subprog_info[i].start >= off)
325 break;
326 /* find first prog starting at or after off + cnt (first to stay) */
327 for (j = i; j < env->subprog_cnt; j++)
328 if (env->subprog_info[j].start >= off + cnt)
329 break;
330 /* if j doesn't start exactly at off + cnt, we are just removing
331 * the front of previous prog
332 */
333 if (env->subprog_info[j].start != off + cnt)
334 j--;
335
336 if (j > i) {
337 struct bpf_prog_aux *aux = env->prog->aux;
338 int move;
339
340 /* move fake 'exit' subprog as well */
341 move = env->subprog_cnt + 1 - j;
342
343 memmove(env->subprog_info + i,
344 env->subprog_info + j,
345 sizeof(*env->subprog_info) * move);
346 env->subprog_cnt -= j - i;
347
348 /* remove func_info */
349 if (aux->func_info) {
350 move = aux->func_info_cnt - j;
351
352 memmove(aux->func_info + i,
353 aux->func_info + j,
354 sizeof(*aux->func_info) * move);
355 aux->func_info_cnt -= j - i;
356 /* func_info->insn_off is set after all code rewrites,
357 * in adjust_btf_func() - no need to adjust
358 */
359 }
360 } else {
361 /* convert i from "first prog to remove" to "first to adjust" */
362 if (env->subprog_info[i].start == off)
363 i++;
364 }
365
366 /* update fake 'exit' subprog as well */
367 for (; i <= env->subprog_cnt; i++)
368 env->subprog_info[i].start -= cnt;
369
370 return 0;
371 }
372
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)373 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
374 u32 cnt)
375 {
376 struct bpf_prog *prog = env->prog;
377 u32 i, l_off, l_cnt, nr_linfo;
378 struct bpf_line_info *linfo;
379
380 nr_linfo = prog->aux->nr_linfo;
381 if (!nr_linfo)
382 return 0;
383
384 linfo = prog->aux->linfo;
385
386 /* find first line info to remove, count lines to be removed */
387 for (i = 0; i < nr_linfo; i++)
388 if (linfo[i].insn_off >= off)
389 break;
390
391 l_off = i;
392 l_cnt = 0;
393 for (; i < nr_linfo; i++)
394 if (linfo[i].insn_off < off + cnt)
395 l_cnt++;
396 else
397 break;
398
399 /* First live insn doesn't match first live linfo, it needs to "inherit"
400 * last removed linfo. prog is already modified, so prog->len == off
401 * means no live instructions after (tail of the program was removed).
402 */
403 if (prog->len != off && l_cnt &&
404 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
405 l_cnt--;
406 linfo[--i].insn_off = off + cnt;
407 }
408
409 /* remove the line info which refer to the removed instructions */
410 if (l_cnt) {
411 memmove(linfo + l_off, linfo + i,
412 sizeof(*linfo) * (nr_linfo - i));
413
414 prog->aux->nr_linfo -= l_cnt;
415 nr_linfo = prog->aux->nr_linfo;
416 }
417
418 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
419 for (i = l_off; i < nr_linfo; i++)
420 linfo[i].insn_off -= cnt;
421
422 /* fix up all subprogs (incl. 'exit') which start >= off */
423 for (i = 0; i <= env->subprog_cnt; i++)
424 if (env->subprog_info[i].linfo_idx > l_off) {
425 /* program may have started in the removed region but
426 * may not be fully removed
427 */
428 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
429 env->subprog_info[i].linfo_idx -= l_cnt;
430 else
431 env->subprog_info[i].linfo_idx = l_off;
432 }
433
434 return 0;
435 }
436
437 /*
438 * Clean up dynamically allocated fields of aux data for instructions [start, ...]
439 */
bpf_clear_insn_aux_data(struct bpf_verifier_env * env,int start,int len)440 void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
441 {
442 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
443 struct bpf_insn *insns = env->prog->insnsi;
444 int end = start + len;
445 int i;
446
447 for (i = start; i < end; i++) {
448 if (aux_data[i].jt) {
449 kvfree(aux_data[i].jt);
450 aux_data[i].jt = NULL;
451 }
452
453 if (bpf_is_ldimm64(&insns[i]))
454 i++;
455 }
456 }
457
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)458 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
459 {
460 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
461 unsigned int orig_prog_len = env->prog->len;
462 int err;
463
464 if (bpf_prog_is_offloaded(env->prog->aux))
465 bpf_prog_offload_remove_insns(env, off, cnt);
466
467 /* Should be called before bpf_remove_insns, as it uses prog->insnsi */
468 bpf_clear_insn_aux_data(env, off, cnt);
469
470 err = bpf_remove_insns(env->prog, off, cnt);
471 if (err)
472 return err;
473
474 err = adjust_subprog_starts_after_remove(env, off, cnt);
475 if (err)
476 return err;
477
478 err = bpf_adj_linfo_after_remove(env, off, cnt);
479 if (err)
480 return err;
481
482 adjust_insn_arrays_after_remove(env, off, cnt);
483
484 memmove(aux_data + off, aux_data + off + cnt,
485 sizeof(*aux_data) * (orig_prog_len - off - cnt));
486
487 return 0;
488 }
489
490 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
491 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
492
bpf_insn_is_cond_jump(u8 code)493 bool bpf_insn_is_cond_jump(u8 code)
494 {
495 u8 op;
496
497 op = BPF_OP(code);
498 if (BPF_CLASS(code) == BPF_JMP32)
499 return op != BPF_JA;
500
501 if (BPF_CLASS(code) != BPF_JMP)
502 return false;
503
504 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
505 }
506
bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)507 void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
508 {
509 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
510 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
511 struct bpf_insn *insn = env->prog->insnsi;
512 const int insn_cnt = env->prog->len;
513 int i;
514
515 for (i = 0; i < insn_cnt; i++, insn++) {
516 if (!bpf_insn_is_cond_jump(insn->code))
517 continue;
518
519 if (!aux_data[i + 1].seen)
520 ja.off = insn->off;
521 else if (!aux_data[i + 1 + insn->off].seen)
522 ja.off = 0;
523 else
524 continue;
525
526 if (bpf_prog_is_offloaded(env->prog->aux))
527 bpf_prog_offload_replace_insn(env, i, &ja);
528
529 memcpy(insn, &ja, sizeof(ja));
530 }
531 }
532
bpf_opt_remove_dead_code(struct bpf_verifier_env * env)533 int bpf_opt_remove_dead_code(struct bpf_verifier_env *env)
534 {
535 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
536 int insn_cnt = env->prog->len;
537 int i, err;
538
539 for (i = 0; i < insn_cnt; i++) {
540 int j;
541
542 j = 0;
543 while (i + j < insn_cnt && !aux_data[i + j].seen)
544 j++;
545 if (!j)
546 continue;
547
548 err = verifier_remove_insns(env, i, j);
549 if (err)
550 return err;
551 insn_cnt = env->prog->len;
552 }
553
554 return 0;
555 }
556
bpf_opt_remove_nops(struct bpf_verifier_env * env)557 int bpf_opt_remove_nops(struct bpf_verifier_env *env)
558 {
559 struct bpf_insn *insn = env->prog->insnsi;
560 int insn_cnt = env->prog->len;
561 bool is_may_goto_0, is_ja;
562 int i, err;
563
564 for (i = 0; i < insn_cnt; i++) {
565 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
566 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
567
568 if (!is_may_goto_0 && !is_ja)
569 continue;
570
571 err = verifier_remove_insns(env, i, 1);
572 if (err)
573 return err;
574 insn_cnt--;
575 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
576 i -= (is_may_goto_0 && i > 0) ? 2 : 1;
577 }
578
579 return 0;
580 }
581
bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)582 int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
583 const union bpf_attr *attr)
584 {
585 struct bpf_insn *patch;
586 /* use env->insn_buf as two independent buffers */
587 struct bpf_insn *zext_patch = env->insn_buf;
588 struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
589 struct bpf_insn_aux_data *aux = env->insn_aux_data;
590 int i, patch_len, delta = 0, len = env->prog->len;
591 struct bpf_insn *insns = env->prog->insnsi;
592 struct bpf_prog *new_prog;
593 bool rnd_hi32;
594
595 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
596 zext_patch[1] = BPF_ZEXT_REG(0);
597 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
598 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
599 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
600 for (i = 0; i < len; i++) {
601 int adj_idx = i + delta;
602 struct bpf_insn insn;
603 int load_reg;
604
605 insn = insns[adj_idx];
606 load_reg = insn_def_regno(&insn);
607 if (!aux[adj_idx].zext_dst) {
608 u8 code, class;
609 u32 imm_rnd;
610
611 if (!rnd_hi32)
612 continue;
613
614 code = insn.code;
615 class = BPF_CLASS(code);
616 if (load_reg == -1)
617 continue;
618
619 /* NOTE: arg "reg" (the fourth one) is only used for
620 * BPF_STX + SRC_OP, so it is safe to pass NULL
621 * here.
622 */
623 if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) {
624 if (class == BPF_LD &&
625 BPF_MODE(code) == BPF_IMM)
626 i++;
627 continue;
628 }
629
630 /* ctx load could be transformed into wider load. */
631 if (class == BPF_LDX &&
632 aux[adj_idx].ptr_type == PTR_TO_CTX)
633 continue;
634
635 imm_rnd = get_random_u32();
636 rnd_hi32_patch[0] = insn;
637 rnd_hi32_patch[1].imm = imm_rnd;
638 rnd_hi32_patch[3].dst_reg = load_reg;
639 patch = rnd_hi32_patch;
640 patch_len = 4;
641 goto apply_patch_buffer;
642 }
643
644 /* Add in an zero-extend instruction if a) the JIT has requested
645 * it or b) it's a CMPXCHG.
646 *
647 * The latter is because: BPF_CMPXCHG always loads a value into
648 * R0, therefore always zero-extends. However some archs'
649 * equivalent instruction only does this load when the
650 * comparison is successful. This detail of CMPXCHG is
651 * orthogonal to the general zero-extension behaviour of the
652 * CPU, so it's treated independently of bpf_jit_needs_zext.
653 */
654 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
655 continue;
656
657 /* Zero-extension is done by the caller. */
658 if (bpf_pseudo_kfunc_call(&insn))
659 continue;
660
661 if (verifier_bug_if(load_reg == -1, env,
662 "zext_dst is set, but no reg is defined"))
663 return -EFAULT;
664
665 zext_patch[0] = insn;
666 zext_patch[1].dst_reg = load_reg;
667 zext_patch[1].src_reg = load_reg;
668 patch = zext_patch;
669 patch_len = 2;
670 apply_patch_buffer:
671 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
672 if (!new_prog)
673 return -ENOMEM;
674 env->prog = new_prog;
675 insns = new_prog->insnsi;
676 aux = env->insn_aux_data;
677 delta += patch_len - 1;
678 }
679
680 return 0;
681 }
682
683 /* convert load instructions that access fields of a context type into a
684 * sequence of instructions that access fields of the underlying structure:
685 * struct __sk_buff -> struct sk_buff
686 * struct bpf_sock_ops -> struct sock
687 */
bpf_convert_ctx_accesses(struct bpf_verifier_env * env)688 int bpf_convert_ctx_accesses(struct bpf_verifier_env *env)
689 {
690 struct bpf_subprog_info *subprogs = env->subprog_info;
691 const struct bpf_verifier_ops *ops = env->ops;
692 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
693 const int insn_cnt = env->prog->len;
694 struct bpf_insn *epilogue_buf = env->epilogue_buf;
695 struct bpf_insn *insn_buf = env->insn_buf;
696 struct bpf_insn *insn;
697 u32 target_size, size_default, off;
698 struct bpf_prog *new_prog;
699 enum bpf_access_type type;
700 bool is_narrower_load;
701 int epilogue_idx = 0;
702
703 if (ops->gen_epilogue) {
704 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
705 -(subprogs[0].stack_depth + 8));
706 if (epilogue_cnt >= INSN_BUF_SIZE) {
707 verifier_bug(env, "epilogue is too long");
708 return -EFAULT;
709 } else if (epilogue_cnt) {
710 /* Save the ARG_PTR_TO_CTX for the epilogue to use */
711 cnt = 0;
712 subprogs[0].stack_depth += 8;
713 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
714 -subprogs[0].stack_depth);
715 insn_buf[cnt++] = env->prog->insnsi[0];
716 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
717 if (!new_prog)
718 return -ENOMEM;
719 env->prog = new_prog;
720 delta += cnt - 1;
721
722 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
723 if (ret < 0)
724 return ret;
725 }
726 }
727
728 if (ops->gen_prologue || env->seen_direct_write) {
729 if (!ops->gen_prologue) {
730 verifier_bug(env, "gen_prologue is null");
731 return -EFAULT;
732 }
733 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
734 env->prog);
735 if (cnt >= INSN_BUF_SIZE) {
736 verifier_bug(env, "prologue is too long");
737 return -EFAULT;
738 } else if (cnt) {
739 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
740 if (!new_prog)
741 return -ENOMEM;
742
743 env->prog = new_prog;
744 delta += cnt - 1;
745
746 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
747 if (ret < 0)
748 return ret;
749 }
750 }
751
752 if (delta)
753 WARN_ON(adjust_jmp_off(env->prog, 0, delta));
754
755 if (bpf_prog_is_offloaded(env->prog->aux))
756 return 0;
757
758 insn = env->prog->insnsi + delta;
759
760 for (i = 0; i < insn_cnt; i++, insn++) {
761 bpf_convert_ctx_access_t convert_ctx_access;
762 u8 mode;
763
764 if (env->insn_aux_data[i + delta].nospec) {
765 WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
766 struct bpf_insn *patch = insn_buf;
767
768 *patch++ = BPF_ST_NOSPEC();
769 *patch++ = *insn;
770 cnt = patch - insn_buf;
771 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
772 if (!new_prog)
773 return -ENOMEM;
774
775 delta += cnt - 1;
776 env->prog = new_prog;
777 insn = new_prog->insnsi + i + delta;
778 /* This can not be easily merged with the
779 * nospec_result-case, because an insn may require a
780 * nospec before and after itself. Therefore also do not
781 * 'continue' here but potentially apply further
782 * patching to insn. *insn should equal patch[1] now.
783 */
784 }
785
786 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
787 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
788 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
789 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
790 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
791 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
792 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
793 type = BPF_READ;
794 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
795 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
796 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
797 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
798 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
799 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
800 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
801 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
802 type = BPF_WRITE;
803 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
804 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
805 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
806 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
807 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
808 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
809 env->prog->aux->num_exentries++;
810 continue;
811 } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
812 epilogue_cnt &&
813 i + delta < subprogs[1].start) {
814 /* Generate epilogue for the main prog */
815 if (epilogue_idx) {
816 /* jump back to the earlier generated epilogue */
817 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
818 cnt = 1;
819 } else {
820 memcpy(insn_buf, epilogue_buf,
821 epilogue_cnt * sizeof(*epilogue_buf));
822 cnt = epilogue_cnt;
823 /* epilogue_idx cannot be 0. It must have at
824 * least one ctx ptr saving insn before the
825 * epilogue.
826 */
827 epilogue_idx = i + delta;
828 }
829 goto patch_insn_buf;
830 } else {
831 continue;
832 }
833
834 if (type == BPF_WRITE &&
835 env->insn_aux_data[i + delta].nospec_result) {
836 /* nospec_result is only used to mitigate Spectre v4 and
837 * to limit verification-time for Spectre v1.
838 */
839 struct bpf_insn *patch = insn_buf;
840
841 *patch++ = *insn;
842 *patch++ = BPF_ST_NOSPEC();
843 cnt = patch - insn_buf;
844 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
845 if (!new_prog)
846 return -ENOMEM;
847
848 delta += cnt - 1;
849 env->prog = new_prog;
850 insn = new_prog->insnsi + i + delta;
851 continue;
852 }
853
854 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
855 case PTR_TO_CTX:
856 if (!ops->convert_ctx_access)
857 continue;
858 convert_ctx_access = ops->convert_ctx_access;
859 break;
860 case PTR_TO_SOCKET:
861 case PTR_TO_SOCK_COMMON:
862 convert_ctx_access = bpf_sock_convert_ctx_access;
863 break;
864 case PTR_TO_TCP_SOCK:
865 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
866 break;
867 case PTR_TO_XDP_SOCK:
868 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
869 break;
870 case PTR_TO_BTF_ID:
871 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
872 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
873 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
874 * be said once it is marked PTR_UNTRUSTED, hence we must handle
875 * any faults for loads into such types. BPF_WRITE is disallowed
876 * for this case.
877 */
878 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
879 case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
880 if (type == BPF_READ) {
881 if (BPF_MODE(insn->code) == BPF_MEM)
882 insn->code = BPF_LDX | BPF_PROBE_MEM |
883 BPF_SIZE((insn)->code);
884 else
885 insn->code = BPF_LDX | BPF_PROBE_MEMSX |
886 BPF_SIZE((insn)->code);
887 env->prog->aux->num_exentries++;
888 }
889 continue;
890 case PTR_TO_ARENA:
891 if (BPF_MODE(insn->code) == BPF_MEMSX) {
892 if (!bpf_jit_supports_insn(insn, true)) {
893 verbose(env, "sign extending loads from arena are not supported yet\n");
894 return -EOPNOTSUPP;
895 }
896 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
897 } else {
898 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
899 }
900 env->prog->aux->num_exentries++;
901 continue;
902 default:
903 continue;
904 }
905
906 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
907 size = BPF_LDST_BYTES(insn);
908 mode = BPF_MODE(insn->code);
909
910 /* If the read access is a narrower load of the field,
911 * convert to a 4/8-byte load, to minimum program type specific
912 * convert_ctx_access changes. If conversion is successful,
913 * we will apply proper mask to the result.
914 */
915 is_narrower_load = size < ctx_field_size;
916 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
917 off = insn->off;
918 if (is_narrower_load) {
919 u8 size_code;
920
921 if (type == BPF_WRITE) {
922 verifier_bug(env, "narrow ctx access misconfigured");
923 return -EFAULT;
924 }
925
926 size_code = BPF_H;
927 if (ctx_field_size == 4)
928 size_code = BPF_W;
929 else if (ctx_field_size == 8)
930 size_code = BPF_DW;
931
932 insn->off = off & ~(size_default - 1);
933 insn->code = BPF_LDX | BPF_MEM | size_code;
934 }
935
936 target_size = 0;
937 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
938 &target_size);
939 if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
940 (ctx_field_size && !target_size)) {
941 verifier_bug(env, "error during ctx access conversion (%d)", cnt);
942 return -EFAULT;
943 }
944
945 if (is_narrower_load && size < target_size) {
946 u8 shift = bpf_ctx_narrow_access_offset(
947 off, size, size_default) * 8;
948 if (shift && cnt + 1 >= INSN_BUF_SIZE) {
949 verifier_bug(env, "narrow ctx load misconfigured");
950 return -EFAULT;
951 }
952 if (ctx_field_size <= 4) {
953 if (shift)
954 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
955 insn->dst_reg,
956 shift);
957 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
958 (1 << size * 8) - 1);
959 } else {
960 if (shift)
961 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
962 insn->dst_reg,
963 shift);
964 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
965 (1ULL << size * 8) - 1);
966 }
967 }
968 if (mode == BPF_MEMSX)
969 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
970 insn->dst_reg, insn->dst_reg,
971 size * 8, 0);
972
973 patch_insn_buf:
974 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
975 if (!new_prog)
976 return -ENOMEM;
977
978 delta += cnt - 1;
979
980 /* keep walking new program and skip insns we just inserted */
981 env->prog = new_prog;
982 insn = new_prog->insnsi + i + delta;
983 }
984
985 return 0;
986 }
987
bpf_dup_subprog_starts(struct bpf_verifier_env * env)988 static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env)
989 {
990 u32 *starts = NULL;
991
992 starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
993 if (starts) {
994 for (int i = 0; i < env->subprog_cnt; i++)
995 starts[i] = env->subprog_info[i].start;
996 }
997 return starts;
998 }
999
bpf_restore_subprog_starts(struct bpf_verifier_env * env,u32 * orig_starts)1000 static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts)
1001 {
1002 for (int i = 0; i < env->subprog_cnt; i++)
1003 env->subprog_info[i].start = orig_starts[i];
1004 /* restore the start of fake 'exit' subprog as well */
1005 env->subprog_info[env->subprog_cnt].start = env->prog->len;
1006 }
1007
bpf_dup_insn_aux_data(struct bpf_verifier_env * env)1008 struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env)
1009 {
1010 size_t size;
1011 void *new_aux;
1012
1013 size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len);
1014 new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT);
1015 if (new_aux)
1016 memcpy(new_aux, env->insn_aux_data, size);
1017 return new_aux;
1018 }
1019
bpf_restore_insn_aux_data(struct bpf_verifier_env * env,struct bpf_insn_aux_data * orig_insn_aux)1020 void bpf_restore_insn_aux_data(struct bpf_verifier_env *env,
1021 struct bpf_insn_aux_data *orig_insn_aux)
1022 {
1023 /* the expanded elements are zero-filled, so no special handling is required */
1024 vfree(env->insn_aux_data);
1025 env->insn_aux_data = orig_insn_aux;
1026 }
1027
jit_subprogs(struct bpf_verifier_env * env)1028 static int jit_subprogs(struct bpf_verifier_env *env)
1029 {
1030 struct bpf_prog *prog = env->prog, **func, *tmp;
1031 int i, j, subprog_start, subprog_end = 0, len, subprog;
1032 struct bpf_map *map_ptr;
1033 struct bpf_insn *insn;
1034 void *old_bpf_func;
1035 int err, num_exentries;
1036
1037 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1038 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
1039 continue;
1040
1041 /* Upon error here we cannot fall back to interpreter but
1042 * need a hard reject of the program. Thus -EFAULT is
1043 * propagated in any case.
1044 */
1045 subprog = bpf_find_subprog(env, i + insn->imm + 1);
1046 if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
1047 i + insn->imm + 1))
1048 return -EFAULT;
1049 /* temporarily remember subprog id inside insn instead of
1050 * aux_data, since next loop will split up all insns into funcs
1051 */
1052 insn->off = subprog;
1053 /* remember original imm in case JIT fails and fallback
1054 * to interpreter will be needed
1055 */
1056 env->insn_aux_data[i].call_imm = insn->imm;
1057 /* point imm to __bpf_call_base+1 from JITs point of view */
1058 insn->imm = 1;
1059 if (bpf_pseudo_func(insn)) {
1060 #if defined(MODULES_VADDR)
1061 u64 addr = MODULES_VADDR;
1062 #else
1063 u64 addr = VMALLOC_START;
1064 #endif
1065 /* jit (e.g. x86_64) may emit fewer instructions
1066 * if it learns a u32 imm is the same as a u64 imm.
1067 * Set close enough to possible prog address.
1068 */
1069 insn[0].imm = (u32)addr;
1070 insn[1].imm = addr >> 32;
1071 }
1072 }
1073
1074 err = bpf_prog_alloc_jited_linfo(prog);
1075 if (err)
1076 goto out_undo_insn;
1077
1078 err = -ENOMEM;
1079 func = kzalloc_objs(prog, env->subprog_cnt);
1080 if (!func)
1081 goto out_undo_insn;
1082
1083 for (i = 0; i < env->subprog_cnt; i++) {
1084 subprog_start = subprog_end;
1085 subprog_end = env->subprog_info[i + 1].start;
1086
1087 len = subprog_end - subprog_start;
1088 /* bpf_prog_run() doesn't call subprogs directly,
1089 * hence main prog stats include the runtime of subprogs.
1090 * subprogs don't have IDs and not reachable via prog_get_next_id
1091 * func[i]->stats will never be accessed and stays NULL
1092 */
1093 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1094 if (!func[i])
1095 goto out_free;
1096 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
1097 len * sizeof(struct bpf_insn));
1098 func[i]->type = prog->type;
1099 func[i]->len = len;
1100 if (bpf_prog_calc_tag(func[i]))
1101 goto out_free;
1102 func[i]->is_func = 1;
1103 func[i]->sleepable = prog->sleepable;
1104 func[i]->blinded = prog->blinded;
1105 func[i]->aux->func_idx = i;
1106 /* Below members will be freed only at prog->aux */
1107 func[i]->aux->btf = prog->aux->btf;
1108 func[i]->aux->subprog_start = subprog_start;
1109 func[i]->aux->func_info = prog->aux->func_info;
1110 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
1111 func[i]->aux->poke_tab = prog->aux->poke_tab;
1112 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
1113 func[i]->aux->main_prog_aux = prog->aux;
1114
1115 for (j = 0; j < prog->aux->size_poke_tab; j++) {
1116 struct bpf_jit_poke_descriptor *poke;
1117
1118 poke = &prog->aux->poke_tab[j];
1119 if (poke->insn_idx < subprog_end &&
1120 poke->insn_idx >= subprog_start)
1121 poke->aux = func[i]->aux;
1122 }
1123
1124 func[i]->aux->name[0] = 'F';
1125 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1126 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
1127 func[i]->aux->jits_use_priv_stack = true;
1128
1129 func[i]->jit_requested = 1;
1130 func[i]->blinding_requested = prog->blinding_requested;
1131 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
1132 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
1133 func[i]->aux->linfo = prog->aux->linfo;
1134 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
1135 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
1136 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1137 func[i]->aux->arena = prog->aux->arena;
1138 func[i]->aux->used_maps = env->used_maps;
1139 func[i]->aux->used_map_cnt = env->used_map_cnt;
1140 num_exentries = 0;
1141 insn = func[i]->insnsi;
1142 for (j = 0; j < func[i]->len; j++, insn++) {
1143 if (BPF_CLASS(insn->code) == BPF_LDX &&
1144 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1145 BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
1146 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
1147 BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
1148 num_exentries++;
1149 if ((BPF_CLASS(insn->code) == BPF_STX ||
1150 BPF_CLASS(insn->code) == BPF_ST) &&
1151 BPF_MODE(insn->code) == BPF_PROBE_MEM32)
1152 num_exentries++;
1153 if (BPF_CLASS(insn->code) == BPF_STX &&
1154 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
1155 num_exentries++;
1156 }
1157 func[i]->aux->num_exentries = num_exentries;
1158 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1159 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
1160 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
1161 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
1162 func[i]->aux->token = prog->aux->token;
1163 if (!i)
1164 func[i]->aux->exception_boundary = env->seen_exception;
1165 func[i] = bpf_int_jit_compile(env, func[i]);
1166 if (!func[i]->jited) {
1167 err = -ENOTSUPP;
1168 goto out_free;
1169 }
1170 cond_resched();
1171 }
1172
1173 /* at this point all bpf functions were successfully JITed
1174 * now populate all bpf_calls with correct addresses and
1175 * run last pass of JIT
1176 */
1177 for (i = 0; i < env->subprog_cnt; i++) {
1178 insn = func[i]->insnsi;
1179 for (j = 0; j < func[i]->len; j++, insn++) {
1180 if (bpf_pseudo_func(insn)) {
1181 subprog = insn->off;
1182 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
1183 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
1184 continue;
1185 }
1186 if (!bpf_pseudo_call(insn))
1187 continue;
1188 subprog = insn->off;
1189 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1190 }
1191
1192 /* we use the aux data to keep a list of the start addresses
1193 * of the JITed images for each function in the program
1194 *
1195 * for some architectures, such as powerpc64, the imm field
1196 * might not be large enough to hold the offset of the start
1197 * address of the callee's JITed image from __bpf_call_base
1198 *
1199 * in such cases, we can lookup the start address of a callee
1200 * by using its subprog id, available from the off field of
1201 * the call instruction, as an index for this list
1202 */
1203 func[i]->aux->func = func;
1204 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1205 func[i]->aux->real_func_cnt = env->subprog_cnt;
1206 }
1207 for (i = 0; i < env->subprog_cnt; i++) {
1208 old_bpf_func = func[i]->bpf_func;
1209 tmp = bpf_int_jit_compile(env, func[i]);
1210 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
1211 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
1212 err = -ENOTSUPP;
1213 goto out_free;
1214 }
1215 cond_resched();
1216 }
1217
1218 /*
1219 * Cleanup func[i]->aux fields which aren't required
1220 * or can become invalid in future
1221 */
1222 for (i = 0; i < env->subprog_cnt; i++) {
1223 func[i]->aux->used_maps = NULL;
1224 func[i]->aux->used_map_cnt = 0;
1225 }
1226
1227 /* finally lock prog and jit images for all functions and
1228 * populate kallsysm. Begin at the first subprogram, since
1229 * bpf_prog_load will add the kallsyms for the main program.
1230 */
1231 for (i = 1; i < env->subprog_cnt; i++) {
1232 err = bpf_prog_lock_ro(func[i]);
1233 if (err)
1234 goto out_free;
1235 }
1236
1237 for (i = 1; i < env->subprog_cnt; i++)
1238 bpf_prog_kallsyms_add(func[i]);
1239
1240 /* Last step: make now unused interpreter insns from main
1241 * prog consistent for later dump requests, so they can
1242 * later look the same as if they were interpreted only.
1243 */
1244 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1245 if (bpf_pseudo_func(insn)) {
1246 insn[0].imm = env->insn_aux_data[i].call_imm;
1247 insn[1].imm = insn->off;
1248 insn->off = 0;
1249 continue;
1250 }
1251 if (!bpf_pseudo_call(insn))
1252 continue;
1253 insn->imm = env->insn_aux_data[i].call_imm;
1254 subprog = bpf_find_subprog(env, i + insn->imm + 1);
1255 insn->off = subprog;
1256 }
1257
1258 prog->jited = 1;
1259 prog->bpf_func = func[0]->bpf_func;
1260 prog->jited_len = func[0]->jited_len;
1261 prog->aux->extable = func[0]->aux->extable;
1262 prog->aux->num_exentries = func[0]->aux->num_exentries;
1263 prog->aux->func = func;
1264 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
1265 prog->aux->real_func_cnt = env->subprog_cnt;
1266 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
1267 prog->aux->exception_boundary = func[0]->aux->exception_boundary;
1268 bpf_prog_jit_attempt_done(prog);
1269 return 0;
1270 out_free:
1271 /* We failed JIT'ing, so at this point we need to unregister poke
1272 * descriptors from subprogs, so that kernel is not attempting to
1273 * patch it anymore as we're freeing the subprog JIT memory.
1274 */
1275 for (i = 0; i < prog->aux->size_poke_tab; i++) {
1276 map_ptr = prog->aux->poke_tab[i].tail_call.map;
1277 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
1278 }
1279 /* At this point we're guaranteed that poke descriptors are not
1280 * live anymore. We can just unlink its descriptor table as it's
1281 * released with the main prog.
1282 */
1283 for (i = 0; i < env->subprog_cnt; i++) {
1284 if (!func[i])
1285 continue;
1286 func[i]->aux->poke_tab = NULL;
1287 bpf_jit_free(func[i]);
1288 }
1289 kfree(func);
1290 out_undo_insn:
1291 bpf_prog_jit_attempt_done(prog);
1292 return err;
1293 }
1294
bpf_jit_subprogs(struct bpf_verifier_env * env)1295 int bpf_jit_subprogs(struct bpf_verifier_env *env)
1296 {
1297 int err, i;
1298 bool blinded = false;
1299 struct bpf_insn *insn;
1300 struct bpf_prog *prog, *orig_prog;
1301 struct bpf_insn_aux_data *orig_insn_aux;
1302 u32 *orig_subprog_starts;
1303
1304 if (env->subprog_cnt <= 1)
1305 return 0;
1306
1307 prog = orig_prog = env->prog;
1308 if (bpf_prog_need_blind(prog)) {
1309 orig_insn_aux = bpf_dup_insn_aux_data(env);
1310 if (!orig_insn_aux) {
1311 err = -ENOMEM;
1312 goto out_cleanup;
1313 }
1314 orig_subprog_starts = bpf_dup_subprog_starts(env);
1315 if (!orig_subprog_starts) {
1316 vfree(orig_insn_aux);
1317 err = -ENOMEM;
1318 goto out_cleanup;
1319 }
1320 prog = bpf_jit_blind_constants(env, prog);
1321 if (IS_ERR(prog)) {
1322 err = -ENOMEM;
1323 prog = orig_prog;
1324 goto out_restore;
1325 }
1326 blinded = true;
1327 }
1328
1329 err = jit_subprogs(env);
1330 if (err)
1331 goto out_jit_err;
1332
1333 if (blinded) {
1334 bpf_jit_prog_release_other(prog, orig_prog);
1335 kvfree(orig_subprog_starts);
1336 vfree(orig_insn_aux);
1337 }
1338
1339 return 0;
1340
1341 out_jit_err:
1342 if (blinded) {
1343 bpf_jit_prog_release_other(orig_prog, prog);
1344 /* roll back to the clean original prog */
1345 prog = env->prog = orig_prog;
1346 goto out_restore;
1347 } else {
1348 if (err != -EFAULT) {
1349 /*
1350 * We will fall back to interpreter mode when err is not -EFAULT, before
1351 * that, insn->off and insn->imm should be restored to their original
1352 * values since they were modified by jit_subprogs.
1353 */
1354 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1355 if (!bpf_pseudo_call(insn))
1356 continue;
1357 insn->off = 0;
1358 insn->imm = env->insn_aux_data[i].call_imm;
1359 }
1360 }
1361 goto out_cleanup;
1362 }
1363
1364 out_restore:
1365 bpf_restore_subprog_starts(env, orig_subprog_starts);
1366 bpf_restore_insn_aux_data(env, orig_insn_aux);
1367 kvfree(orig_subprog_starts);
1368 out_cleanup:
1369 /* cleanup main prog to be interpreted */
1370 prog->jit_requested = 0;
1371 prog->blinding_requested = 0;
1372 return err;
1373 }
1374
bpf_fixup_call_args(struct bpf_verifier_env * env)1375 int bpf_fixup_call_args(struct bpf_verifier_env *env)
1376 {
1377 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1378 struct bpf_prog *prog = env->prog;
1379 struct bpf_insn *insn = prog->insnsi;
1380 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1381 int i, depth;
1382 #endif
1383 int err = 0;
1384
1385 if (env->prog->jit_requested &&
1386 !bpf_prog_is_offloaded(env->prog->aux)) {
1387 err = bpf_jit_subprogs(env);
1388 if (err == 0)
1389 return 0;
1390 if (err == -EFAULT)
1391 return err;
1392 }
1393 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1394 if (has_kfunc_call) {
1395 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
1396 return -EINVAL;
1397 }
1398 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
1399 /* When JIT fails the progs with bpf2bpf calls and tail_calls
1400 * have to be rejected, since interpreter doesn't support them yet.
1401 */
1402 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
1403 return -EINVAL;
1404 }
1405 for (i = 0; i < prog->len; i++, insn++) {
1406 if (bpf_pseudo_func(insn)) {
1407 /* When JIT fails the progs with callback calls
1408 * have to be rejected, since interpreter doesn't support them yet.
1409 */
1410 verbose(env, "callbacks are not allowed in non-JITed programs\n");
1411 return -EINVAL;
1412 }
1413
1414 if (!bpf_pseudo_call(insn))
1415 continue;
1416 depth = get_callee_stack_depth(env, insn, i);
1417 if (depth < 0)
1418 return depth;
1419 err = bpf_patch_call_args(insn, depth);
1420 if (err) {
1421 verbose(env, "stack depth %d exceeds interpreter stack depth limit\n",
1422 depth);
1423 return err;
1424 }
1425 }
1426 err = 0;
1427 #endif
1428 return err;
1429 }
1430
1431
1432 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
add_hidden_subprog(struct bpf_verifier_env * env,struct bpf_insn * patch,int len)1433 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
1434 {
1435 struct bpf_subprog_info *info = env->subprog_info;
1436 int cnt = env->subprog_cnt;
1437 struct bpf_prog *prog;
1438
1439 /* We only reserve one slot for hidden subprogs in subprog_info. */
1440 if (env->hidden_subprog_cnt) {
1441 verifier_bug(env, "only one hidden subprog supported");
1442 return -EFAULT;
1443 }
1444 /* We're not patching any existing instruction, just appending the new
1445 * ones for the hidden subprog. Hence all of the adjustment operations
1446 * in bpf_patch_insn_data are no-ops.
1447 */
1448 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
1449 if (!prog)
1450 return -ENOMEM;
1451 env->prog = prog;
1452 info[cnt + 1].start = info[cnt].start;
1453 info[cnt].start = prog->len - len + 1;
1454 env->subprog_cnt++;
1455 env->hidden_subprog_cnt++;
1456 return 0;
1457 }
1458
1459 /* Do various post-verification rewrites in a single program pass.
1460 * These rewrites simplify JIT and interpreter implementations.
1461 */
bpf_do_misc_fixups(struct bpf_verifier_env * env)1462 int bpf_do_misc_fixups(struct bpf_verifier_env *env)
1463 {
1464 struct bpf_prog *prog = env->prog;
1465 enum bpf_attach_type eatype = prog->expected_attach_type;
1466 enum bpf_prog_type prog_type = resolve_prog_type(prog);
1467 struct bpf_insn *insn = prog->insnsi;
1468 const struct bpf_func_proto *fn;
1469 const int insn_cnt = prog->len;
1470 const struct bpf_map_ops *ops;
1471 struct bpf_insn_aux_data *aux;
1472 struct bpf_insn *insn_buf = env->insn_buf;
1473 struct bpf_prog *new_prog;
1474 struct bpf_map *map_ptr;
1475 int i, ret, cnt, delta = 0, cur_subprog = 0;
1476 struct bpf_subprog_info *subprogs = env->subprog_info;
1477 u16 stack_depth = subprogs[cur_subprog].stack_depth;
1478 u16 stack_depth_extra = 0;
1479
1480 if (env->seen_exception && !env->exception_callback_subprog) {
1481 struct bpf_insn *patch = insn_buf;
1482
1483 *patch++ = env->prog->insnsi[insn_cnt - 1];
1484 *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
1485 *patch++ = BPF_EXIT_INSN();
1486 ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
1487 if (ret < 0)
1488 return ret;
1489 prog = env->prog;
1490 insn = prog->insnsi;
1491
1492 env->exception_callback_subprog = env->subprog_cnt - 1;
1493 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
1494 bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog);
1495 }
1496
1497 for (i = 0; i < insn_cnt;) {
1498 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
1499 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
1500 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
1501 /* convert to 32-bit mov that clears upper 32-bit */
1502 insn->code = BPF_ALU | BPF_MOV | BPF_X;
1503 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
1504 insn->off = 0;
1505 insn->imm = 0;
1506 } /* cast from as(0) to as(1) should be handled by JIT */
1507 goto next_insn;
1508 }
1509
1510 if (env->insn_aux_data[i + delta].needs_zext)
1511 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
1512 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
1513
1514 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
1515 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
1516 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
1517 insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
1518 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
1519 insn->off == 1 && insn->imm == -1) {
1520 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1521 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1522 struct bpf_insn *patch = insn_buf;
1523
1524 if (isdiv)
1525 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1526 BPF_NEG | BPF_K, insn->dst_reg,
1527 0, 0, 0);
1528 else
1529 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1530
1531 cnt = patch - insn_buf;
1532
1533 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1534 if (!new_prog)
1535 return -ENOMEM;
1536
1537 delta += cnt - 1;
1538 env->prog = prog = new_prog;
1539 insn = new_prog->insnsi + i + delta;
1540 goto next_insn;
1541 }
1542
1543 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
1544 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
1545 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
1546 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
1547 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
1548 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1549 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
1550 bool is_sdiv = isdiv && insn->off == 1;
1551 bool is_smod = !isdiv && insn->off == 1;
1552 struct bpf_insn *patch = insn_buf;
1553
1554 if (is_sdiv) {
1555 /* [R,W]x sdiv 0 -> 0
1556 * LLONG_MIN sdiv -1 -> LLONG_MIN
1557 * INT_MIN sdiv -1 -> INT_MIN
1558 */
1559 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1560 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1561 BPF_ADD | BPF_K, BPF_REG_AX,
1562 0, 0, 1);
1563 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1564 BPF_JGT | BPF_K, BPF_REG_AX,
1565 0, 4, 1);
1566 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1567 BPF_JEQ | BPF_K, BPF_REG_AX,
1568 0, 1, 0);
1569 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1570 BPF_MOV | BPF_K, insn->dst_reg,
1571 0, 0, 0);
1572 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
1573 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1574 BPF_NEG | BPF_K, insn->dst_reg,
1575 0, 0, 0);
1576 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1577 *patch++ = *insn;
1578 cnt = patch - insn_buf;
1579 } else if (is_smod) {
1580 /* [R,W]x mod 0 -> [R,W]x */
1581 /* [R,W]x mod -1 -> 0 */
1582 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1583 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
1584 BPF_ADD | BPF_K, BPF_REG_AX,
1585 0, 0, 1);
1586 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1587 BPF_JGT | BPF_K, BPF_REG_AX,
1588 0, 3, 1);
1589 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1590 BPF_JEQ | BPF_K, BPF_REG_AX,
1591 0, 3 + (is64 ? 0 : 1), 1);
1592 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
1593 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1594 *patch++ = *insn;
1595
1596 if (!is64) {
1597 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1598 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1599 }
1600 cnt = patch - insn_buf;
1601 } else if (isdiv) {
1602 /* [R,W]x div 0 -> 0 */
1603 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1604 BPF_JNE | BPF_K, insn->src_reg,
1605 0, 2, 0);
1606 *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
1607 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1608 *patch++ = *insn;
1609 cnt = patch - insn_buf;
1610 } else {
1611 /* [R,W]x mod 0 -> [R,W]x */
1612 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
1613 BPF_JEQ | BPF_K, insn->src_reg,
1614 0, 1 + (is64 ? 0 : 1), 0);
1615 *patch++ = *insn;
1616
1617 if (!is64) {
1618 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1619 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
1620 }
1621 cnt = patch - insn_buf;
1622 }
1623
1624 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1625 if (!new_prog)
1626 return -ENOMEM;
1627
1628 delta += cnt - 1;
1629 env->prog = prog = new_prog;
1630 insn = new_prog->insnsi + i + delta;
1631 goto next_insn;
1632 }
1633
1634 /* Make it impossible to de-reference a userspace address */
1635 if (BPF_CLASS(insn->code) == BPF_LDX &&
1636 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
1637 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
1638 struct bpf_insn *patch = insn_buf;
1639 u64 uaddress_limit = bpf_arch_uaddress_limit();
1640
1641 if (!uaddress_limit)
1642 goto next_insn;
1643
1644 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
1645 if (insn->off)
1646 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
1647 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
1648 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
1649 *patch++ = *insn;
1650 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1651 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
1652
1653 cnt = patch - insn_buf;
1654 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1655 if (!new_prog)
1656 return -ENOMEM;
1657
1658 delta += cnt - 1;
1659 env->prog = prog = new_prog;
1660 insn = new_prog->insnsi + i + delta;
1661 goto next_insn;
1662 }
1663
1664 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
1665 if (BPF_CLASS(insn->code) == BPF_LD &&
1666 (BPF_MODE(insn->code) == BPF_ABS ||
1667 BPF_MODE(insn->code) == BPF_IND)) {
1668 cnt = env->ops->gen_ld_abs(insn, insn_buf);
1669 if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
1670 verifier_bug(env, "%d insns generated for ld_abs", cnt);
1671 return -EFAULT;
1672 }
1673
1674 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1675 if (!new_prog)
1676 return -ENOMEM;
1677
1678 delta += cnt - 1;
1679 env->prog = prog = new_prog;
1680 insn = new_prog->insnsi + i + delta;
1681 goto next_insn;
1682 }
1683
1684 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
1685 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
1686 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
1687 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
1688 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
1689 struct bpf_insn *patch = insn_buf;
1690 bool issrc, isneg, isimm;
1691 u32 off_reg;
1692
1693 aux = &env->insn_aux_data[i + delta];
1694 if (!aux->alu_state ||
1695 aux->alu_state == BPF_ALU_NON_POINTER)
1696 goto next_insn;
1697
1698 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
1699 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
1700 BPF_ALU_SANITIZE_SRC;
1701 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
1702
1703 off_reg = issrc ? insn->src_reg : insn->dst_reg;
1704 if (isimm) {
1705 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1706 } else {
1707 if (isneg)
1708 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1709 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
1710 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
1711 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
1712 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
1713 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
1714 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
1715 }
1716 if (!issrc)
1717 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
1718 insn->src_reg = BPF_REG_AX;
1719 if (isneg)
1720 insn->code = insn->code == code_add ?
1721 code_sub : code_add;
1722 *patch++ = *insn;
1723 if (issrc && isneg && !isimm)
1724 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
1725 cnt = patch - insn_buf;
1726
1727 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1728 if (!new_prog)
1729 return -ENOMEM;
1730
1731 delta += cnt - 1;
1732 env->prog = prog = new_prog;
1733 insn = new_prog->insnsi + i + delta;
1734 goto next_insn;
1735 }
1736
1737 if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
1738 int stack_off_cnt = -stack_depth - 16;
1739
1740 /*
1741 * Two 8 byte slots, depth-16 stores the count, and
1742 * depth-8 stores the start timestamp of the loop.
1743 *
1744 * The starting value of count is BPF_MAX_TIMED_LOOPS
1745 * (0xffff). Every iteration loads it and subs it by 1,
1746 * until the value becomes 0 in AX (thus, 1 in stack),
1747 * after which we call arch_bpf_timed_may_goto, which
1748 * either sets AX to 0xffff to keep looping, or to 0
1749 * upon timeout. AX is then stored into the stack. In
1750 * the next iteration, we either see 0 and break out, or
1751 * continue iterating until the next time value is 0
1752 * after subtraction, rinse and repeat.
1753 */
1754 stack_depth_extra = 16;
1755 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
1756 if (insn->off >= 0)
1757 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
1758 else
1759 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1760 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1761 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
1762 /*
1763 * AX is used as an argument to pass in stack_off_cnt
1764 * (to add to r10/fp), and also as the return value of
1765 * the call to arch_bpf_timed_may_goto.
1766 */
1767 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
1768 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
1769 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
1770 cnt = 7;
1771
1772 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1773 if (!new_prog)
1774 return -ENOMEM;
1775
1776 delta += cnt - 1;
1777 env->prog = prog = new_prog;
1778 insn = new_prog->insnsi + i + delta;
1779 goto next_insn;
1780 } else if (bpf_is_may_goto_insn(insn)) {
1781 int stack_off = -stack_depth - 8;
1782
1783 stack_depth_extra = 8;
1784 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
1785 if (insn->off >= 0)
1786 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
1787 else
1788 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
1789 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
1790 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
1791 cnt = 4;
1792
1793 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1794 if (!new_prog)
1795 return -ENOMEM;
1796
1797 delta += cnt - 1;
1798 env->prog = prog = new_prog;
1799 insn = new_prog->insnsi + i + delta;
1800 goto next_insn;
1801 }
1802
1803 if (insn->code != (BPF_JMP | BPF_CALL))
1804 goto next_insn;
1805 if (insn->src_reg == BPF_PSEUDO_CALL)
1806 goto next_insn;
1807 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1808 ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
1809 if (ret)
1810 return ret;
1811 if (cnt == 0)
1812 goto next_insn;
1813
1814 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1815 if (!new_prog)
1816 return -ENOMEM;
1817
1818 delta += cnt - 1;
1819 env->prog = prog = new_prog;
1820 insn = new_prog->insnsi + i + delta;
1821 goto next_insn;
1822 }
1823
1824 /* Skip inlining the helper call if the JIT does it. */
1825 if (bpf_jit_inlines_helper_call(insn->imm))
1826 goto next_insn;
1827
1828 if (insn->imm == BPF_FUNC_get_route_realm)
1829 prog->dst_needed = 1;
1830 if (insn->imm == BPF_FUNC_get_prandom_u32)
1831 bpf_user_rnd_init_once();
1832 if (insn->imm == BPF_FUNC_override_return)
1833 prog->kprobe_override = 1;
1834 if (insn->imm == BPF_FUNC_tail_call) {
1835 /* If we tail call into other programs, we
1836 * cannot make any assumptions since they can
1837 * be replaced dynamically during runtime in
1838 * the program array.
1839 */
1840 prog->cb_access = 1;
1841 if (!bpf_allow_tail_call_in_subprogs(env))
1842 prog->aux->stack_depth = MAX_BPF_STACK;
1843 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
1844
1845 /* mark bpf_tail_call as different opcode to avoid
1846 * conditional branch in the interpreter for every normal
1847 * call and to prevent accidental JITing by JIT compiler
1848 * that doesn't support bpf_tail_call yet
1849 */
1850 insn->imm = 0;
1851 insn->code = BPF_JMP | BPF_TAIL_CALL;
1852
1853 aux = &env->insn_aux_data[i + delta];
1854 if (env->bpf_capable && !prog->blinding_requested &&
1855 prog->jit_requested &&
1856 !bpf_map_key_poisoned(aux) &&
1857 !bpf_map_ptr_poisoned(aux) &&
1858 !bpf_map_ptr_unpriv(aux)) {
1859 struct bpf_jit_poke_descriptor desc = {
1860 .reason = BPF_POKE_REASON_TAIL_CALL,
1861 .tail_call.map = aux->map_ptr_state.map_ptr,
1862 .tail_call.key = bpf_map_key_immediate(aux),
1863 .insn_idx = i + delta,
1864 };
1865
1866 ret = bpf_jit_add_poke_descriptor(prog, &desc);
1867 if (ret < 0) {
1868 verbose(env, "adding tail call poke descriptor failed\n");
1869 return ret;
1870 }
1871
1872 insn->imm = ret + 1;
1873 goto next_insn;
1874 }
1875
1876 if (!bpf_map_ptr_unpriv(aux))
1877 goto next_insn;
1878
1879 /* instead of changing every JIT dealing with tail_call
1880 * emit two extra insns:
1881 * if (index >= max_entries) goto out;
1882 * index &= array->index_mask;
1883 * to avoid out-of-bounds cpu speculation
1884 */
1885 if (bpf_map_ptr_poisoned(aux)) {
1886 verbose(env, "tail_call abusing map_ptr\n");
1887 return -EINVAL;
1888 }
1889
1890 map_ptr = aux->map_ptr_state.map_ptr;
1891 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
1892 map_ptr->max_entries, 2);
1893 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
1894 container_of(map_ptr,
1895 struct bpf_array,
1896 map)->index_mask);
1897 insn_buf[2] = *insn;
1898 cnt = 3;
1899 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1900 if (!new_prog)
1901 return -ENOMEM;
1902
1903 delta += cnt - 1;
1904 env->prog = prog = new_prog;
1905 insn = new_prog->insnsi + i + delta;
1906 goto next_insn;
1907 }
1908
1909 if (insn->imm == BPF_FUNC_timer_set_callback) {
1910 /* The verifier will process callback_fn as many times as necessary
1911 * with different maps and the register states prepared by
1912 * set_timer_callback_state will be accurate.
1913 *
1914 * The following use case is valid:
1915 * map1 is shared by prog1, prog2, prog3.
1916 * prog1 calls bpf_timer_init for some map1 elements
1917 * prog2 calls bpf_timer_set_callback for some map1 elements.
1918 * Those that were not bpf_timer_init-ed will return -EINVAL.
1919 * prog3 calls bpf_timer_start for some map1 elements.
1920 * Those that were not both bpf_timer_init-ed and
1921 * bpf_timer_set_callback-ed will return -EINVAL.
1922 */
1923 struct bpf_insn ld_addrs[2] = {
1924 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
1925 };
1926
1927 insn_buf[0] = ld_addrs[0];
1928 insn_buf[1] = ld_addrs[1];
1929 insn_buf[2] = *insn;
1930 cnt = 3;
1931
1932 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1933 if (!new_prog)
1934 return -ENOMEM;
1935
1936 delta += cnt - 1;
1937 env->prog = prog = new_prog;
1938 insn = new_prog->insnsi + i + delta;
1939 goto patch_call_imm;
1940 }
1941
1942 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
1943 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
1944 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
1945 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
1946 */
1947 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
1948 insn_buf[1] = *insn;
1949 cnt = 2;
1950
1951 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1952 if (!new_prog)
1953 return -ENOMEM;
1954
1955 delta += cnt - 1;
1956 env->prog = prog = new_prog;
1957 insn = new_prog->insnsi + i + delta;
1958 goto patch_call_imm;
1959 }
1960
1961 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
1962 * and other inlining handlers are currently limited to 64 bit
1963 * only.
1964 */
1965 if (prog->jit_requested && BITS_PER_LONG == 64 &&
1966 (insn->imm == BPF_FUNC_map_lookup_elem ||
1967 insn->imm == BPF_FUNC_map_update_elem ||
1968 insn->imm == BPF_FUNC_map_delete_elem ||
1969 insn->imm == BPF_FUNC_map_push_elem ||
1970 insn->imm == BPF_FUNC_map_pop_elem ||
1971 insn->imm == BPF_FUNC_map_peek_elem ||
1972 insn->imm == BPF_FUNC_redirect_map ||
1973 insn->imm == BPF_FUNC_for_each_map_elem ||
1974 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
1975 aux = &env->insn_aux_data[i + delta];
1976 if (bpf_map_ptr_poisoned(aux))
1977 goto patch_call_imm;
1978
1979 map_ptr = aux->map_ptr_state.map_ptr;
1980 ops = map_ptr->ops;
1981 if (insn->imm == BPF_FUNC_map_lookup_elem &&
1982 ops->map_gen_lookup) {
1983 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
1984 if (cnt == -EOPNOTSUPP)
1985 goto patch_map_ops_generic;
1986 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
1987 verifier_bug(env, "%d insns generated for map lookup", cnt);
1988 return -EFAULT;
1989 }
1990
1991 new_prog = bpf_patch_insn_data(env, i + delta,
1992 insn_buf, cnt);
1993 if (!new_prog)
1994 return -ENOMEM;
1995
1996 delta += cnt - 1;
1997 env->prog = prog = new_prog;
1998 insn = new_prog->insnsi + i + delta;
1999 goto next_insn;
2000 }
2001
2002 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
2003 (void *(*)(struct bpf_map *map, void *key))NULL));
2004 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
2005 (long (*)(struct bpf_map *map, void *key))NULL));
2006 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
2007 (long (*)(struct bpf_map *map, void *key, void *value,
2008 u64 flags))NULL));
2009 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
2010 (long (*)(struct bpf_map *map, void *value,
2011 u64 flags))NULL));
2012 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
2013 (long (*)(struct bpf_map *map, void *value))NULL));
2014 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
2015 (long (*)(struct bpf_map *map, void *value))NULL));
2016 BUILD_BUG_ON(!__same_type(ops->map_redirect,
2017 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
2018 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
2019 (long (*)(struct bpf_map *map,
2020 bpf_callback_t callback_fn,
2021 void *callback_ctx,
2022 u64 flags))NULL));
2023 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
2024 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
2025
2026 patch_map_ops_generic:
2027 switch (insn->imm) {
2028 case BPF_FUNC_map_lookup_elem:
2029 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
2030 goto next_insn;
2031 case BPF_FUNC_map_update_elem:
2032 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
2033 goto next_insn;
2034 case BPF_FUNC_map_delete_elem:
2035 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
2036 goto next_insn;
2037 case BPF_FUNC_map_push_elem:
2038 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
2039 goto next_insn;
2040 case BPF_FUNC_map_pop_elem:
2041 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
2042 goto next_insn;
2043 case BPF_FUNC_map_peek_elem:
2044 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
2045 goto next_insn;
2046 case BPF_FUNC_redirect_map:
2047 insn->imm = BPF_CALL_IMM(ops->map_redirect);
2048 goto next_insn;
2049 case BPF_FUNC_for_each_map_elem:
2050 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
2051 goto next_insn;
2052 case BPF_FUNC_map_lookup_percpu_elem:
2053 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
2054 goto next_insn;
2055 }
2056
2057 goto patch_call_imm;
2058 }
2059
2060 /* Implement bpf_jiffies64 inline. */
2061 if (prog->jit_requested && BITS_PER_LONG == 64 &&
2062 insn->imm == BPF_FUNC_jiffies64) {
2063 struct bpf_insn ld_jiffies_addr[2] = {
2064 BPF_LD_IMM64(BPF_REG_0,
2065 (unsigned long)&jiffies),
2066 };
2067
2068 insn_buf[0] = ld_jiffies_addr[0];
2069 insn_buf[1] = ld_jiffies_addr[1];
2070 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
2071 BPF_REG_0, 0);
2072 cnt = 3;
2073
2074 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
2075 cnt);
2076 if (!new_prog)
2077 return -ENOMEM;
2078
2079 delta += cnt - 1;
2080 env->prog = prog = new_prog;
2081 insn = new_prog->insnsi + i + delta;
2082 goto next_insn;
2083 }
2084
2085 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
2086 /* Implement bpf_get_smp_processor_id() inline. */
2087 if (insn->imm == BPF_FUNC_get_smp_processor_id &&
2088 bpf_verifier_inlines_helper_call(env, insn->imm)) {
2089 /* BPF_FUNC_get_smp_processor_id inlining is an
2090 * optimization, so if cpu_number is ever
2091 * changed in some incompatible and hard to support
2092 * way, it's fine to back out this inlining logic
2093 */
2094 #ifdef CONFIG_SMP
2095 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
2096 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2097 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
2098 cnt = 3;
2099 #else
2100 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
2101 cnt = 1;
2102 #endif
2103 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2104 if (!new_prog)
2105 return -ENOMEM;
2106
2107 delta += cnt - 1;
2108 env->prog = prog = new_prog;
2109 insn = new_prog->insnsi + i + delta;
2110 goto next_insn;
2111 }
2112
2113 /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
2114 if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
2115 bpf_verifier_inlines_helper_call(env, insn->imm)) {
2116 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)¤t_task);
2117 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
2118 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
2119 cnt = 3;
2120
2121 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2122 if (!new_prog)
2123 return -ENOMEM;
2124
2125 delta += cnt - 1;
2126 env->prog = prog = new_prog;
2127 insn = new_prog->insnsi + i + delta;
2128 goto next_insn;
2129 }
2130 #endif
2131 /* Implement bpf_get_func_arg inline. */
2132 if (prog_type == BPF_PROG_TYPE_TRACING &&
2133 insn->imm == BPF_FUNC_get_func_arg) {
2134 if (eatype == BPF_TRACE_RAW_TP) {
2135 int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2136
2137 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2138 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2139 cnt = 1;
2140 } else {
2141 /* Load nr_args from ctx - 8 */
2142 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2143 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2144 cnt = 2;
2145 }
2146 insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
2147 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
2148 insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
2149 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
2150 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2151 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
2152 insn_buf[cnt++] = BPF_JMP_A(1);
2153 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2154
2155 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2156 if (!new_prog)
2157 return -ENOMEM;
2158
2159 delta += cnt - 1;
2160 env->prog = prog = new_prog;
2161 insn = new_prog->insnsi + i + delta;
2162 goto next_insn;
2163 }
2164
2165 /* Implement bpf_get_func_ret inline. */
2166 if (prog_type == BPF_PROG_TYPE_TRACING &&
2167 insn->imm == BPF_FUNC_get_func_ret) {
2168 if (eatype == BPF_TRACE_FEXIT ||
2169 eatype == BPF_TRACE_FSESSION ||
2170 eatype == BPF_MODIFY_RETURN) {
2171 /* Load nr_args from ctx - 8 */
2172 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2173 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2174 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
2175 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
2176 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
2177 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
2178 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
2179 cnt = 7;
2180 } else {
2181 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
2182 cnt = 1;
2183 }
2184
2185 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2186 if (!new_prog)
2187 return -ENOMEM;
2188
2189 delta += cnt - 1;
2190 env->prog = prog = new_prog;
2191 insn = new_prog->insnsi + i + delta;
2192 goto next_insn;
2193 }
2194
2195 /* Implement get_func_arg_cnt inline. */
2196 if (prog_type == BPF_PROG_TYPE_TRACING &&
2197 insn->imm == BPF_FUNC_get_func_arg_cnt) {
2198 if (eatype == BPF_TRACE_RAW_TP) {
2199 int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
2200
2201 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
2202 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
2203 cnt = 1;
2204 } else {
2205 /* Load nr_args from ctx - 8 */
2206 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
2207 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
2208 cnt = 2;
2209 }
2210
2211 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2212 if (!new_prog)
2213 return -ENOMEM;
2214
2215 delta += cnt - 1;
2216 env->prog = prog = new_prog;
2217 insn = new_prog->insnsi + i + delta;
2218 goto next_insn;
2219 }
2220
2221 /* Implement bpf_get_func_ip inline. */
2222 if (prog_type == BPF_PROG_TYPE_TRACING &&
2223 insn->imm == BPF_FUNC_get_func_ip) {
2224 /* Load IP address from ctx - 16 */
2225 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
2226
2227 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
2228 if (!new_prog)
2229 return -ENOMEM;
2230
2231 env->prog = prog = new_prog;
2232 insn = new_prog->insnsi + i + delta;
2233 goto next_insn;
2234 }
2235
2236 /* Implement bpf_get_branch_snapshot inline. */
2237 if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
2238 prog->jit_requested && BITS_PER_LONG == 64 &&
2239 insn->imm == BPF_FUNC_get_branch_snapshot) {
2240 /* We are dealing with the following func protos:
2241 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
2242 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
2243 */
2244 const u32 br_entry_size = sizeof(struct perf_branch_entry);
2245
2246 /* struct perf_branch_entry is part of UAPI and is
2247 * used as an array element, so extremely unlikely to
2248 * ever grow or shrink
2249 */
2250 BUILD_BUG_ON(br_entry_size != 24);
2251
2252 /* if (unlikely(flags)) return -EINVAL */
2253 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
2254
2255 /* Transform size (bytes) into number of entries (cnt = size / 24).
2256 * But to avoid expensive division instruction, we implement
2257 * divide-by-3 through multiplication, followed by further
2258 * division by 8 through 3-bit right shift.
2259 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
2260 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
2261 *
2262 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
2263 */
2264 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
2265 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
2266 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
2267
2268 /* call perf_snapshot_branch_stack implementation */
2269 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
2270 /* if (entry_cnt == 0) return -ENOENT */
2271 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
2272 /* return entry_cnt * sizeof(struct perf_branch_entry) */
2273 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
2274 insn_buf[7] = BPF_JMP_A(3);
2275 /* return -EINVAL; */
2276 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
2277 insn_buf[9] = BPF_JMP_A(1);
2278 /* return -ENOENT; */
2279 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
2280 cnt = 11;
2281
2282 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2283 if (!new_prog)
2284 return -ENOMEM;
2285
2286 delta += cnt - 1;
2287 env->prog = prog = new_prog;
2288 insn = new_prog->insnsi + i + delta;
2289 goto next_insn;
2290 }
2291
2292 /* Implement bpf_kptr_xchg inline */
2293 if (prog->jit_requested && BITS_PER_LONG == 64 &&
2294 insn->imm == BPF_FUNC_kptr_xchg &&
2295 bpf_jit_supports_ptr_xchg()) {
2296 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
2297 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
2298 cnt = 2;
2299
2300 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
2301 if (!new_prog)
2302 return -ENOMEM;
2303
2304 delta += cnt - 1;
2305 env->prog = prog = new_prog;
2306 insn = new_prog->insnsi + i + delta;
2307 goto next_insn;
2308 }
2309 patch_call_imm:
2310 fn = env->ops->get_func_proto(insn->imm, env->prog);
2311 /* all functions that have prototype and verifier allowed
2312 * programs to call them, must be real in-kernel functions
2313 */
2314 if (!fn->func) {
2315 verifier_bug(env,
2316 "not inlined functions %s#%d is missing func",
2317 func_id_name(insn->imm), insn->imm);
2318 return -EFAULT;
2319 }
2320 insn->imm = fn->func - __bpf_call_base;
2321 next_insn:
2322 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2323 subprogs[cur_subprog].stack_depth += stack_depth_extra;
2324 subprogs[cur_subprog].stack_extra = stack_depth_extra;
2325
2326 stack_depth = subprogs[cur_subprog].stack_depth;
2327 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
2328 verbose(env, "stack size %d(extra %d) is too large\n",
2329 stack_depth, stack_depth_extra);
2330 return -EINVAL;
2331 }
2332 cur_subprog++;
2333 stack_depth = subprogs[cur_subprog].stack_depth;
2334 stack_depth_extra = 0;
2335 }
2336 i++;
2337 insn++;
2338 }
2339
2340 env->prog->aux->stack_depth = subprogs[0].stack_depth;
2341 for (i = 0; i < env->subprog_cnt; i++) {
2342 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
2343 int subprog_start = subprogs[i].start;
2344 int stack_slots = subprogs[i].stack_extra / 8;
2345 int slots = delta, cnt = 0;
2346
2347 if (!stack_slots)
2348 continue;
2349 /* We need two slots in case timed may_goto is supported. */
2350 if (stack_slots > slots) {
2351 verifier_bug(env, "stack_slots supports may_goto only");
2352 return -EFAULT;
2353 }
2354
2355 stack_depth = subprogs[i].stack_depth;
2356 if (bpf_jit_supports_timed_may_goto()) {
2357 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2358 BPF_MAX_TIMED_LOOPS);
2359 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
2360 } else {
2361 /* Add ST insn to subprog prologue to init extra stack */
2362 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
2363 BPF_MAX_LOOPS);
2364 }
2365 /* Copy first actual insn to preserve it */
2366 insn_buf[cnt++] = env->prog->insnsi[subprog_start];
2367
2368 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
2369 if (!new_prog)
2370 return -ENOMEM;
2371 env->prog = prog = new_prog;
2372 /*
2373 * If may_goto is a first insn of a prog there could be a jmp
2374 * insn that points to it, hence adjust all such jmps to point
2375 * to insn after BPF_ST that inits may_goto count.
2376 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
2377 */
2378 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
2379 }
2380
2381 /* Since poke tab is now finalized, publish aux to tracker. */
2382 for (i = 0; i < prog->aux->size_poke_tab; i++) {
2383 map_ptr = prog->aux->poke_tab[i].tail_call.map;
2384 if (!map_ptr->ops->map_poke_track ||
2385 !map_ptr->ops->map_poke_untrack ||
2386 !map_ptr->ops->map_poke_run) {
2387 verifier_bug(env, "poke tab is misconfigured");
2388 return -EFAULT;
2389 }
2390
2391 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
2392 if (ret < 0) {
2393 verbose(env, "tracking tail call prog failed\n");
2394 return ret;
2395 }
2396 }
2397
2398 ret = sort_kfunc_descs_by_imm_off(env);
2399 if (ret)
2400 return ret;
2401
2402 return 0;
2403 }
2404
inline_bpf_loop(struct bpf_verifier_env * env,int position,s32 stack_base,u32 callback_subprogno,u32 * total_cnt)2405 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
2406 int position,
2407 s32 stack_base,
2408 u32 callback_subprogno,
2409 u32 *total_cnt)
2410 {
2411 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
2412 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
2413 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
2414 int reg_loop_max = BPF_REG_6;
2415 int reg_loop_cnt = BPF_REG_7;
2416 int reg_loop_ctx = BPF_REG_8;
2417
2418 struct bpf_insn *insn_buf = env->insn_buf;
2419 struct bpf_prog *new_prog;
2420 u32 callback_start;
2421 u32 call_insn_offset;
2422 s32 callback_offset;
2423 u32 cnt = 0;
2424
2425 /* This represents an inlined version of bpf_iter.c:bpf_loop,
2426 * be careful to modify this code in sync.
2427 */
2428
2429 /* Return error and jump to the end of the patch if
2430 * expected number of iterations is too big.
2431 */
2432 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
2433 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
2434 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
2435 /* spill R6, R7, R8 to use these as loop vars */
2436 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
2437 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
2438 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
2439 /* initialize loop vars */
2440 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
2441 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
2442 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
2443 /* loop header,
2444 * if reg_loop_cnt >= reg_loop_max skip the loop body
2445 */
2446 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
2447 /* callback call,
2448 * correct callback offset would be set after patching
2449 */
2450 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
2451 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
2452 insn_buf[cnt++] = BPF_CALL_REL(0);
2453 /* increment loop counter */
2454 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
2455 /* jump to loop header if callback returned 0 */
2456 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
2457 /* return value of bpf_loop,
2458 * set R0 to the number of iterations
2459 */
2460 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
2461 /* restore original values of R6, R7, R8 */
2462 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
2463 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
2464 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
2465
2466 *total_cnt = cnt;
2467 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
2468 if (!new_prog)
2469 return new_prog;
2470
2471 /* callback start is known only after patching */
2472 callback_start = env->subprog_info[callback_subprogno].start;
2473 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
2474 call_insn_offset = position + 12;
2475 callback_offset = callback_start - call_insn_offset - 1;
2476 new_prog->insnsi[call_insn_offset].imm = callback_offset;
2477
2478 return new_prog;
2479 }
2480
is_bpf_loop_call(struct bpf_insn * insn)2481 static bool is_bpf_loop_call(struct bpf_insn *insn)
2482 {
2483 return insn->code == (BPF_JMP | BPF_CALL) &&
2484 insn->src_reg == 0 &&
2485 insn->imm == BPF_FUNC_loop;
2486 }
2487
2488 /* For all sub-programs in the program (including main) check
2489 * insn_aux_data to see if there are bpf_loop calls that require
2490 * inlining. If such calls are found the calls are replaced with a
2491 * sequence of instructions produced by `inline_bpf_loop` function and
2492 * subprog stack_depth is increased by the size of 3 registers.
2493 * This stack space is used to spill values of the R6, R7, R8. These
2494 * registers are used to store the loop bound, counter and context
2495 * variables.
2496 */
bpf_optimize_bpf_loop(struct bpf_verifier_env * env)2497 int bpf_optimize_bpf_loop(struct bpf_verifier_env *env)
2498 {
2499 struct bpf_subprog_info *subprogs = env->subprog_info;
2500 int i, cur_subprog = 0, cnt, delta = 0;
2501 struct bpf_insn *insn = env->prog->insnsi;
2502 int insn_cnt = env->prog->len;
2503 u16 stack_depth = subprogs[cur_subprog].stack_depth;
2504 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2505 u16 stack_depth_extra = 0;
2506
2507 for (i = 0; i < insn_cnt; i++, insn++) {
2508 struct bpf_loop_inline_state *inline_state =
2509 &env->insn_aux_data[i + delta].loop_inline_state;
2510
2511 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
2512 struct bpf_prog *new_prog;
2513
2514 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
2515 new_prog = inline_bpf_loop(env,
2516 i + delta,
2517 -(stack_depth + stack_depth_extra),
2518 inline_state->callback_subprogno,
2519 &cnt);
2520 if (!new_prog)
2521 return -ENOMEM;
2522
2523 delta += cnt - 1;
2524 env->prog = new_prog;
2525 insn = new_prog->insnsi + i + delta;
2526 }
2527
2528 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
2529 subprogs[cur_subprog].stack_depth += stack_depth_extra;
2530 cur_subprog++;
2531 stack_depth = subprogs[cur_subprog].stack_depth;
2532 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
2533 stack_depth_extra = 0;
2534 }
2535 }
2536
2537 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
2538
2539 return 0;
2540 }
2541
2542 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
2543 * adjust subprograms stack depth when possible.
2544 */
bpf_remove_fastcall_spills_fills(struct bpf_verifier_env * env)2545 int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env)
2546 {
2547 struct bpf_subprog_info *subprog = env->subprog_info;
2548 struct bpf_insn_aux_data *aux = env->insn_aux_data;
2549 struct bpf_insn *insn = env->prog->insnsi;
2550 int insn_cnt = env->prog->len;
2551 u32 spills_num;
2552 bool modified = false;
2553 int i, j;
2554
2555 for (i = 0; i < insn_cnt; i++, insn++) {
2556 if (aux[i].fastcall_spills_num > 0) {
2557 spills_num = aux[i].fastcall_spills_num;
2558 /* NOPs would be removed by opt_remove_nops() */
2559 for (j = 1; j <= spills_num; ++j) {
2560 *(insn - j) = NOP;
2561 *(insn + j) = NOP;
2562 }
2563 modified = true;
2564 }
2565 if ((subprog + 1)->start == i + 1) {
2566 if (modified && !subprog->keep_fastcall_stack)
2567 subprog->stack_depth = -subprog->fastcall_stack_off;
2568 subprog++;
2569 modified = false;
2570 }
2571 }
2572
2573 return 0;
2574 }
2575
2576