1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/btf.h> 5 #include <linux/bpf_verifier.h> 6 #include <linux/filter.h> 7 #include <linux/vmalloc.h> 8 #include <linux/bsearch.h> 9 #include <linux/sort.h> 10 #include <linux/perf_event.h> 11 #include <net/xdp.h> 12 #include "disasm.h" 13 14 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 15 16 static bool is_cmpxchg_insn(const struct bpf_insn *insn) 17 { 18 return BPF_CLASS(insn->code) == BPF_STX && 19 BPF_MODE(insn->code) == BPF_ATOMIC && 20 insn->imm == BPF_CMPXCHG; 21 } 22 23 /* Return the regno defined by the insn, or -1. */ 24 static int insn_def_regno(const struct bpf_insn *insn) 25 { 26 switch (BPF_CLASS(insn->code)) { 27 case BPF_JMP: 28 case BPF_JMP32: 29 case BPF_ST: 30 return -1; 31 case BPF_STX: 32 if (BPF_MODE(insn->code) == BPF_ATOMIC || 33 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) { 34 if (insn->imm == BPF_CMPXCHG) 35 return BPF_REG_0; 36 else if (insn->imm == BPF_LOAD_ACQ) 37 return insn->dst_reg; 38 else if (insn->imm & BPF_FETCH) 39 return insn->src_reg; 40 } 41 return -1; 42 default: 43 return insn->dst_reg; 44 } 45 } 46 47 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 48 static bool insn_has_def32(struct bpf_insn *insn) 49 { 50 int dst_reg = insn_def_regno(insn); 51 52 if (dst_reg == -1) 53 return false; 54 55 return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP); 56 } 57 58 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) 59 { 60 const struct bpf_kfunc_desc *d0 = a; 61 const struct bpf_kfunc_desc *d1 = b; 62 63 if (d0->imm != d1->imm) 64 return d0->imm < d1->imm ? -1 : 1; 65 if (d0->offset != d1->offset) 66 return d0->offset < d1->offset ? -1 : 1; 67 return 0; 68 } 69 70 const struct btf_func_model * 71 bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 72 const struct bpf_insn *insn) 73 { 74 const struct bpf_kfunc_desc desc = { 75 .imm = insn->imm, 76 .offset = insn->off, 77 }; 78 const struct bpf_kfunc_desc *res; 79 struct bpf_kfunc_desc_tab *tab; 80 81 tab = prog->aux->kfunc_tab; 82 res = bsearch(&desc, tab->descs, tab->nr_descs, 83 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); 84 85 return res ? &res->func_model : NULL; 86 } 87 88 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) 89 { 90 unsigned long call_imm; 91 92 if (bpf_jit_supports_far_kfunc_call()) { 93 call_imm = desc->func_id; 94 } else { 95 call_imm = BPF_CALL_IMM(desc->addr); 96 /* Check whether the relative offset overflows desc->imm */ 97 if ((unsigned long)(s32)call_imm != call_imm) { 98 verbose(env, "address of kernel func_id %u is out of range\n", 99 desc->func_id); 100 return -EINVAL; 101 } 102 } 103 desc->imm = call_imm; 104 return 0; 105 } 106 107 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env) 108 { 109 struct bpf_kfunc_desc_tab *tab; 110 int i, err; 111 112 tab = env->prog->aux->kfunc_tab; 113 if (!tab) 114 return 0; 115 116 for (i = 0; i < tab->nr_descs; i++) { 117 err = set_kfunc_desc_imm(env, &tab->descs[i]); 118 if (err) 119 return err; 120 } 121 122 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 123 kfunc_desc_cmp_by_imm_off, NULL); 124 return 0; 125 } 126 127 static int add_kfunc_in_insns(struct bpf_verifier_env *env, 128 struct bpf_insn *insn, int cnt) 129 { 130 int i, ret; 131 132 for (i = 0; i < cnt; i++, insn++) { 133 if (bpf_pseudo_kfunc_call(insn)) { 134 ret = bpf_add_kfunc_call(env, insn->imm, insn->off); 135 if (ret < 0) 136 return ret; 137 } 138 } 139 return 0; 140 } 141 142 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 143 static int get_callee_stack_depth(struct bpf_verifier_env *env, 144 const struct bpf_insn *insn, int idx) 145 { 146 int start = idx + insn->imm + 1, subprog; 147 148 subprog = bpf_find_subprog(env, start); 149 if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start)) 150 return -EFAULT; 151 return env->subprog_info[subprog].stack_depth; 152 } 153 #endif 154 155 /* single env->prog->insni[off] instruction was replaced with the range 156 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 157 * [0, off) and [off, end) to new locations, so the patched range stays zero 158 */ 159 static void adjust_insn_aux_data(struct bpf_verifier_env *env, 160 struct bpf_prog *new_prog, u32 off, u32 cnt) 161 { 162 struct bpf_insn_aux_data *data = env->insn_aux_data; 163 struct bpf_insn *insn = new_prog->insnsi; 164 u32 old_seen = data[off].seen; 165 u32 prog_len; 166 int i; 167 168 /* aux info at OFF always needs adjustment, no matter fast path 169 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 170 * original insn at old prog. 171 */ 172 data[off].zext_dst = insn_has_def32(insn + off + cnt - 1); 173 174 if (cnt == 1) 175 return; 176 prog_len = new_prog->len; 177 178 memmove(data + off + cnt - 1, data + off, 179 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 180 memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1)); 181 for (i = off; i < off + cnt - 1; i++) { 182 /* Expand insni[off]'s seen count to the patched range. */ 183 data[i].seen = old_seen; 184 data[i].zext_dst = insn_has_def32(insn + i); 185 } 186 } 187 188 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 189 { 190 int i; 191 192 if (len == 1) 193 return; 194 /* NOTE: fake 'exit' subprog should be updated as well. */ 195 for (i = 0; i <= env->subprog_cnt; i++) { 196 if (env->subprog_info[i].start <= off) 197 continue; 198 env->subprog_info[i].start += len - 1; 199 } 200 } 201 202 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len) 203 { 204 int i; 205 206 if (len == 1) 207 return; 208 209 for (i = 0; i < env->insn_array_map_cnt; i++) 210 bpf_insn_array_adjust(env->insn_array_maps[i], off, len); 211 } 212 213 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len) 214 { 215 int i; 216 217 for (i = 0; i < env->insn_array_map_cnt; i++) 218 bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len); 219 } 220 221 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) 222 { 223 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; 224 int i, sz = prog->aux->size_poke_tab; 225 struct bpf_jit_poke_descriptor *desc; 226 227 for (i = 0; i < sz; i++) { 228 desc = &tab[i]; 229 if (desc->insn_idx <= off) 230 continue; 231 desc->insn_idx += len - 1; 232 } 233 } 234 235 struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 236 const struct bpf_insn *patch, u32 len) 237 { 238 struct bpf_prog *new_prog; 239 struct bpf_insn_aux_data *new_data = NULL; 240 241 if (len > 1) { 242 new_data = vrealloc(env->insn_aux_data, 243 array_size(env->prog->len + len - 1, 244 sizeof(struct bpf_insn_aux_data)), 245 GFP_KERNEL_ACCOUNT | __GFP_ZERO); 246 if (!new_data) 247 return NULL; 248 249 env->insn_aux_data = new_data; 250 } 251 252 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 253 if (IS_ERR(new_prog)) { 254 if (PTR_ERR(new_prog) == -ERANGE) 255 verbose(env, 256 "insn %d cannot be patched due to 16-bit range\n", 257 env->insn_aux_data[off].orig_idx); 258 return NULL; 259 } 260 adjust_insn_aux_data(env, new_prog, off, len); 261 adjust_subprog_starts(env, off, len); 262 adjust_insn_arrays(env, off, len); 263 adjust_poke_descs(new_prog, off, len); 264 return new_prog; 265 } 266 267 /* 268 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the 269 * jump offset by 'delta'. 270 */ 271 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) 272 { 273 struct bpf_insn *insn = prog->insnsi; 274 u32 insn_cnt = prog->len, i; 275 s32 imm; 276 s16 off; 277 278 for (i = 0; i < insn_cnt; i++, insn++) { 279 u8 code = insn->code; 280 281 if (tgt_idx <= i && i < tgt_idx + delta) 282 continue; 283 284 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || 285 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) 286 continue; 287 288 if (insn->code == (BPF_JMP32 | BPF_JA)) { 289 if (i + 1 + insn->imm != tgt_idx) 290 continue; 291 if (check_add_overflow(insn->imm, delta, &imm)) 292 return -ERANGE; 293 insn->imm = imm; 294 } else { 295 if (i + 1 + insn->off != tgt_idx) 296 continue; 297 if (check_add_overflow(insn->off, delta, &off)) 298 return -ERANGE; 299 insn->off = off; 300 } 301 } 302 return 0; 303 } 304 305 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 306 u32 off, u32 cnt) 307 { 308 int i, j; 309 310 /* find first prog starting at or after off (first to remove) */ 311 for (i = 0; i < env->subprog_cnt; i++) 312 if (env->subprog_info[i].start >= off) 313 break; 314 /* find first prog starting at or after off + cnt (first to stay) */ 315 for (j = i; j < env->subprog_cnt; j++) 316 if (env->subprog_info[j].start >= off + cnt) 317 break; 318 /* if j doesn't start exactly at off + cnt, we are just removing 319 * the front of previous prog 320 */ 321 if (env->subprog_info[j].start != off + cnt) 322 j--; 323 324 if (j > i) { 325 struct bpf_prog_aux *aux = env->prog->aux; 326 int move; 327 328 /* move fake 'exit' subprog as well */ 329 move = env->subprog_cnt + 1 - j; 330 331 memmove(env->subprog_info + i, 332 env->subprog_info + j, 333 sizeof(*env->subprog_info) * move); 334 env->subprog_cnt -= j - i; 335 336 /* remove func_info */ 337 if (aux->func_info) { 338 move = aux->func_info_cnt - j; 339 340 memmove(aux->func_info + i, 341 aux->func_info + j, 342 sizeof(*aux->func_info) * move); 343 aux->func_info_cnt -= j - i; 344 /* func_info->insn_off is set after all code rewrites, 345 * in adjust_btf_func() - no need to adjust 346 */ 347 } 348 } else { 349 /* convert i from "first prog to remove" to "first to adjust" */ 350 if (env->subprog_info[i].start == off) 351 i++; 352 } 353 354 /* update fake 'exit' subprog as well */ 355 for (; i <= env->subprog_cnt; i++) 356 env->subprog_info[i].start -= cnt; 357 358 return 0; 359 } 360 361 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 362 u32 cnt) 363 { 364 struct bpf_prog *prog = env->prog; 365 u32 i, l_off, l_cnt, nr_linfo; 366 struct bpf_line_info *linfo; 367 368 nr_linfo = prog->aux->nr_linfo; 369 if (!nr_linfo) 370 return 0; 371 372 linfo = prog->aux->linfo; 373 374 /* find first line info to remove, count lines to be removed */ 375 for (i = 0; i < nr_linfo; i++) 376 if (linfo[i].insn_off >= off) 377 break; 378 379 l_off = i; 380 l_cnt = 0; 381 for (; i < nr_linfo; i++) 382 if (linfo[i].insn_off < off + cnt) 383 l_cnt++; 384 else 385 break; 386 387 /* First live insn doesn't match first live linfo, it needs to "inherit" 388 * last removed linfo. prog is already modified, so prog->len == off 389 * means no live instructions after (tail of the program was removed). 390 */ 391 if (prog->len != off && l_cnt && 392 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 393 l_cnt--; 394 linfo[--i].insn_off = off + cnt; 395 } 396 397 /* remove the line info which refer to the removed instructions */ 398 if (l_cnt) { 399 memmove(linfo + l_off, linfo + i, 400 sizeof(*linfo) * (nr_linfo - i)); 401 402 prog->aux->nr_linfo -= l_cnt; 403 nr_linfo = prog->aux->nr_linfo; 404 } 405 406 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 407 for (i = l_off; i < nr_linfo; i++) 408 linfo[i].insn_off -= cnt; 409 410 /* fix up all subprogs (incl. 'exit') which start >= off */ 411 for (i = 0; i <= env->subprog_cnt; i++) 412 if (env->subprog_info[i].linfo_idx > l_off) { 413 /* program may have started in the removed region but 414 * may not be fully removed 415 */ 416 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 417 env->subprog_info[i].linfo_idx -= l_cnt; 418 else 419 env->subprog_info[i].linfo_idx = l_off; 420 } 421 422 return 0; 423 } 424 425 /* 426 * Clean up dynamically allocated fields of aux data for instructions [start, ...] 427 */ 428 void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len) 429 { 430 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 431 struct bpf_insn *insns = env->prog->insnsi; 432 int end = start + len; 433 int i; 434 435 for (i = start; i < end; i++) { 436 if (aux_data[i].jt) { 437 kvfree(aux_data[i].jt); 438 aux_data[i].jt = NULL; 439 } 440 441 if (bpf_is_ldimm64(&insns[i])) 442 i++; 443 } 444 } 445 446 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 447 { 448 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 449 unsigned int orig_prog_len = env->prog->len; 450 int err; 451 452 if (bpf_prog_is_offloaded(env->prog->aux)) 453 bpf_prog_offload_remove_insns(env, off, cnt); 454 455 /* Should be called before bpf_remove_insns, as it uses prog->insnsi */ 456 bpf_clear_insn_aux_data(env, off, cnt); 457 458 err = bpf_remove_insns(env->prog, off, cnt); 459 if (err) 460 return err; 461 462 err = adjust_subprog_starts_after_remove(env, off, cnt); 463 if (err) 464 return err; 465 466 err = bpf_adj_linfo_after_remove(env, off, cnt); 467 if (err) 468 return err; 469 470 adjust_insn_arrays_after_remove(env, off, cnt); 471 472 memmove(aux_data + off, aux_data + off + cnt, 473 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 474 475 return 0; 476 } 477 478 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 479 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0); 480 481 bool bpf_insn_is_cond_jump(u8 code) 482 { 483 u8 op; 484 485 op = BPF_OP(code); 486 if (BPF_CLASS(code) == BPF_JMP32) 487 return op != BPF_JA; 488 489 if (BPF_CLASS(code) != BPF_JMP) 490 return false; 491 492 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 493 } 494 495 void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 496 { 497 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 498 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 499 struct bpf_insn *insn = env->prog->insnsi; 500 const int insn_cnt = env->prog->len; 501 int i; 502 503 for (i = 0; i < insn_cnt; i++, insn++) { 504 if (!bpf_insn_is_cond_jump(insn->code)) 505 continue; 506 507 if (!aux_data[i + 1].seen) 508 ja.off = insn->off; 509 else if (!aux_data[i + 1 + insn->off].seen) 510 ja.off = 0; 511 else 512 continue; 513 514 if (bpf_prog_is_offloaded(env->prog->aux)) 515 bpf_prog_offload_replace_insn(env, i, &ja); 516 517 memcpy(insn, &ja, sizeof(ja)); 518 } 519 } 520 521 int bpf_opt_remove_dead_code(struct bpf_verifier_env *env) 522 { 523 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 524 int insn_cnt = env->prog->len; 525 int i, err; 526 527 for (i = 0; i < insn_cnt; i++) { 528 int j; 529 530 j = 0; 531 while (i + j < insn_cnt && !aux_data[i + j].seen) 532 j++; 533 if (!j) 534 continue; 535 536 err = verifier_remove_insns(env, i, j); 537 if (err) 538 return err; 539 insn_cnt = env->prog->len; 540 } 541 542 return 0; 543 } 544 545 int bpf_opt_remove_nops(struct bpf_verifier_env *env) 546 { 547 struct bpf_insn *insn = env->prog->insnsi; 548 int insn_cnt = env->prog->len; 549 bool is_may_goto_0, is_ja; 550 int i, err; 551 552 for (i = 0; i < insn_cnt; i++) { 553 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0)); 554 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP)); 555 556 if (!is_may_goto_0 && !is_ja) 557 continue; 558 559 err = verifier_remove_insns(env, i, 1); 560 if (err) 561 return err; 562 insn_cnt--; 563 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */ 564 i -= (is_may_goto_0 && i > 0) ? 2 : 1; 565 } 566 567 return 0; 568 } 569 570 int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 571 const union bpf_attr *attr) 572 { 573 struct bpf_insn *patch; 574 /* use env->insn_buf as two independent buffers */ 575 struct bpf_insn *zext_patch = env->insn_buf; 576 struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2]; 577 struct bpf_insn_aux_data *aux = env->insn_aux_data; 578 int i, patch_len, delta = 0, len = env->prog->len; 579 struct bpf_insn *insns = env->prog->insnsi; 580 struct bpf_prog *new_prog; 581 bool rnd_hi32; 582 583 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 584 zext_patch[1] = BPF_ZEXT_REG(0); 585 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 586 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 587 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 588 for (i = 0; i < len; i++) { 589 int adj_idx = i + delta; 590 struct bpf_insn insn; 591 int load_reg; 592 593 insn = insns[adj_idx]; 594 load_reg = insn_def_regno(&insn); 595 if (!aux[adj_idx].zext_dst) { 596 u8 code, class; 597 u32 imm_rnd; 598 599 if (!rnd_hi32) 600 continue; 601 602 code = insn.code; 603 class = BPF_CLASS(code); 604 if (load_reg == -1) 605 continue; 606 607 /* NOTE: arg "reg" (the fourth one) is only used for 608 * BPF_STX + SRC_OP, so it is safe to pass NULL 609 * here. 610 */ 611 if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) { 612 if (class == BPF_LD && 613 BPF_MODE(code) == BPF_IMM) 614 i++; 615 continue; 616 } 617 618 /* ctx load could be transformed into wider load. */ 619 if (class == BPF_LDX && 620 aux[adj_idx].ptr_type == PTR_TO_CTX) 621 continue; 622 623 imm_rnd = get_random_u32(); 624 rnd_hi32_patch[0] = insn; 625 rnd_hi32_patch[1].imm = imm_rnd; 626 rnd_hi32_patch[3].dst_reg = load_reg; 627 patch = rnd_hi32_patch; 628 patch_len = 4; 629 goto apply_patch_buffer; 630 } 631 632 /* Add in an zero-extend instruction if a) the JIT has requested 633 * it or b) it's a CMPXCHG. 634 * 635 * The latter is because: BPF_CMPXCHG always loads a value into 636 * R0, therefore always zero-extends. However some archs' 637 * equivalent instruction only does this load when the 638 * comparison is successful. This detail of CMPXCHG is 639 * orthogonal to the general zero-extension behaviour of the 640 * CPU, so it's treated independently of bpf_jit_needs_zext. 641 */ 642 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn)) 643 continue; 644 645 /* Zero-extension is done by the caller. */ 646 if (bpf_pseudo_kfunc_call(&insn)) 647 continue; 648 649 if (verifier_bug_if(load_reg == -1, env, 650 "zext_dst is set, but no reg is defined")) 651 return -EFAULT; 652 653 zext_patch[0] = insn; 654 zext_patch[1].dst_reg = load_reg; 655 zext_patch[1].src_reg = load_reg; 656 patch = zext_patch; 657 patch_len = 2; 658 apply_patch_buffer: 659 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 660 if (!new_prog) 661 return -ENOMEM; 662 env->prog = new_prog; 663 insns = new_prog->insnsi; 664 aux = env->insn_aux_data; 665 delta += patch_len - 1; 666 } 667 668 return 0; 669 } 670 671 /* convert load instructions that access fields of a context type into a 672 * sequence of instructions that access fields of the underlying structure: 673 * struct __sk_buff -> struct sk_buff 674 * struct bpf_sock_ops -> struct sock 675 */ 676 int bpf_convert_ctx_accesses(struct bpf_verifier_env *env) 677 { 678 struct bpf_subprog_info *subprogs = env->subprog_info; 679 const struct bpf_verifier_ops *ops = env->ops; 680 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0; 681 const int insn_cnt = env->prog->len; 682 struct bpf_insn *epilogue_buf = env->epilogue_buf; 683 struct bpf_insn *insn_buf = env->insn_buf; 684 struct bpf_insn *insn; 685 u32 target_size, size_default, off; 686 struct bpf_prog *new_prog; 687 enum bpf_access_type type; 688 bool is_narrower_load; 689 int epilogue_idx = 0; 690 691 if (ops->gen_epilogue) { 692 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog, 693 -(subprogs[0].stack_depth + 8)); 694 if (epilogue_cnt >= INSN_BUF_SIZE) { 695 verifier_bug(env, "epilogue is too long"); 696 return -EFAULT; 697 } else if (epilogue_cnt) { 698 /* Save the ARG_PTR_TO_CTX for the epilogue to use */ 699 cnt = 0; 700 subprogs[0].stack_depth += 8; 701 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1, 702 -subprogs[0].stack_depth); 703 insn_buf[cnt++] = env->prog->insnsi[0]; 704 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 705 if (!new_prog) 706 return -ENOMEM; 707 env->prog = new_prog; 708 delta += cnt - 1; 709 710 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1); 711 if (ret < 0) 712 return ret; 713 } 714 } 715 716 if (ops->gen_prologue || env->seen_direct_write) { 717 if (!ops->gen_prologue) { 718 verifier_bug(env, "gen_prologue is null"); 719 return -EFAULT; 720 } 721 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 722 env->prog); 723 if (cnt >= INSN_BUF_SIZE) { 724 verifier_bug(env, "prologue is too long"); 725 return -EFAULT; 726 } else if (cnt) { 727 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 728 if (!new_prog) 729 return -ENOMEM; 730 731 env->prog = new_prog; 732 delta += cnt - 1; 733 734 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1); 735 if (ret < 0) 736 return ret; 737 } 738 } 739 740 if (delta) 741 WARN_ON(adjust_jmp_off(env->prog, 0, delta)); 742 743 if (bpf_prog_is_offloaded(env->prog->aux)) 744 return 0; 745 746 insn = env->prog->insnsi + delta; 747 748 for (i = 0; i < insn_cnt; i++, insn++) { 749 bpf_convert_ctx_access_t convert_ctx_access; 750 u8 mode; 751 752 if (env->insn_aux_data[i + delta].nospec) { 753 WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state); 754 struct bpf_insn *patch = insn_buf; 755 756 *patch++ = BPF_ST_NOSPEC(); 757 *patch++ = *insn; 758 cnt = patch - insn_buf; 759 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 760 if (!new_prog) 761 return -ENOMEM; 762 763 delta += cnt - 1; 764 env->prog = new_prog; 765 insn = new_prog->insnsi + i + delta; 766 /* This can not be easily merged with the 767 * nospec_result-case, because an insn may require a 768 * nospec before and after itself. Therefore also do not 769 * 'continue' here but potentially apply further 770 * patching to insn. *insn should equal patch[1] now. 771 */ 772 } 773 774 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 775 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 776 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 777 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) || 778 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) || 779 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) || 780 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) { 781 type = BPF_READ; 782 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 783 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 784 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 785 insn->code == (BPF_STX | BPF_MEM | BPF_DW) || 786 insn->code == (BPF_ST | BPF_MEM | BPF_B) || 787 insn->code == (BPF_ST | BPF_MEM | BPF_H) || 788 insn->code == (BPF_ST | BPF_MEM | BPF_W) || 789 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { 790 type = BPF_WRITE; 791 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || 792 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || 793 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || 794 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && 795 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { 796 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); 797 env->prog->aux->num_exentries++; 798 continue; 799 } else if (insn->code == (BPF_JMP | BPF_EXIT) && 800 epilogue_cnt && 801 i + delta < subprogs[1].start) { 802 /* Generate epilogue for the main prog */ 803 if (epilogue_idx) { 804 /* jump back to the earlier generated epilogue */ 805 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1); 806 cnt = 1; 807 } else { 808 memcpy(insn_buf, epilogue_buf, 809 epilogue_cnt * sizeof(*epilogue_buf)); 810 cnt = epilogue_cnt; 811 /* epilogue_idx cannot be 0. It must have at 812 * least one ctx ptr saving insn before the 813 * epilogue. 814 */ 815 epilogue_idx = i + delta; 816 } 817 goto patch_insn_buf; 818 } else { 819 continue; 820 } 821 822 if (type == BPF_WRITE && 823 env->insn_aux_data[i + delta].nospec_result) { 824 /* nospec_result is only used to mitigate Spectre v4 and 825 * to limit verification-time for Spectre v1. 826 */ 827 struct bpf_insn *patch = insn_buf; 828 829 *patch++ = *insn; 830 *patch++ = BPF_ST_NOSPEC(); 831 cnt = patch - insn_buf; 832 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 833 if (!new_prog) 834 return -ENOMEM; 835 836 delta += cnt - 1; 837 env->prog = new_prog; 838 insn = new_prog->insnsi + i + delta; 839 continue; 840 } 841 842 switch ((int)env->insn_aux_data[i + delta].ptr_type) { 843 case PTR_TO_CTX: 844 if (!ops->convert_ctx_access) 845 continue; 846 convert_ctx_access = ops->convert_ctx_access; 847 break; 848 case PTR_TO_SOCKET: 849 case PTR_TO_SOCK_COMMON: 850 convert_ctx_access = bpf_sock_convert_ctx_access; 851 break; 852 case PTR_TO_TCP_SOCK: 853 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 854 break; 855 case PTR_TO_XDP_SOCK: 856 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 857 break; 858 case PTR_TO_BTF_ID: 859 case PTR_TO_BTF_ID | PTR_UNTRUSTED: 860 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike 861 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot 862 * be said once it is marked PTR_UNTRUSTED, hence we must handle 863 * any faults for loads into such types. BPF_WRITE is disallowed 864 * for this case. 865 */ 866 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: 867 case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED: 868 if (type == BPF_READ) { 869 if (BPF_MODE(insn->code) == BPF_MEM) 870 insn->code = BPF_LDX | BPF_PROBE_MEM | 871 BPF_SIZE((insn)->code); 872 else 873 insn->code = BPF_LDX | BPF_PROBE_MEMSX | 874 BPF_SIZE((insn)->code); 875 env->prog->aux->num_exentries++; 876 } 877 continue; 878 case PTR_TO_ARENA: 879 if (BPF_MODE(insn->code) == BPF_MEMSX) { 880 if (!bpf_jit_supports_insn(insn, true)) { 881 verbose(env, "sign extending loads from arena are not supported yet\n"); 882 return -EOPNOTSUPP; 883 } 884 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code); 885 } else { 886 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); 887 } 888 env->prog->aux->num_exentries++; 889 continue; 890 default: 891 continue; 892 } 893 894 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 895 size = BPF_LDST_BYTES(insn); 896 mode = BPF_MODE(insn->code); 897 898 /* If the read access is a narrower load of the field, 899 * convert to a 4/8-byte load, to minimum program type specific 900 * convert_ctx_access changes. If conversion is successful, 901 * we will apply proper mask to the result. 902 */ 903 is_narrower_load = size < ctx_field_size; 904 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 905 off = insn->off; 906 if (is_narrower_load) { 907 u8 size_code; 908 909 if (type == BPF_WRITE) { 910 verifier_bug(env, "narrow ctx access misconfigured"); 911 return -EFAULT; 912 } 913 914 size_code = BPF_H; 915 if (ctx_field_size == 4) 916 size_code = BPF_W; 917 else if (ctx_field_size == 8) 918 size_code = BPF_DW; 919 920 insn->off = off & ~(size_default - 1); 921 insn->code = BPF_LDX | BPF_MEM | size_code; 922 } 923 924 target_size = 0; 925 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 926 &target_size); 927 if (cnt == 0 || cnt >= INSN_BUF_SIZE || 928 (ctx_field_size && !target_size)) { 929 verifier_bug(env, "error during ctx access conversion (%d)", cnt); 930 return -EFAULT; 931 } 932 933 if (is_narrower_load && size < target_size) { 934 u8 shift = bpf_ctx_narrow_access_offset( 935 off, size, size_default) * 8; 936 if (shift && cnt + 1 >= INSN_BUF_SIZE) { 937 verifier_bug(env, "narrow ctx load misconfigured"); 938 return -EFAULT; 939 } 940 if (ctx_field_size <= 4) { 941 if (shift) 942 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 943 insn->dst_reg, 944 shift); 945 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 946 (1 << size * 8) - 1); 947 } else { 948 if (shift) 949 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 950 insn->dst_reg, 951 shift); 952 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 953 (1ULL << size * 8) - 1); 954 } 955 } 956 if (mode == BPF_MEMSX) 957 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X, 958 insn->dst_reg, insn->dst_reg, 959 size * 8, 0); 960 961 patch_insn_buf: 962 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 963 if (!new_prog) 964 return -ENOMEM; 965 966 delta += cnt - 1; 967 968 /* keep walking new program and skip insns we just inserted */ 969 env->prog = new_prog; 970 insn = new_prog->insnsi + i + delta; 971 } 972 973 return 0; 974 } 975 976 static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env) 977 { 978 u32 *starts = NULL; 979 980 starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT); 981 if (starts) { 982 for (int i = 0; i < env->subprog_cnt; i++) 983 starts[i] = env->subprog_info[i].start; 984 } 985 return starts; 986 } 987 988 static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts) 989 { 990 for (int i = 0; i < env->subprog_cnt; i++) 991 env->subprog_info[i].start = orig_starts[i]; 992 /* restore the start of fake 'exit' subprog as well */ 993 env->subprog_info[env->subprog_cnt].start = env->prog->len; 994 } 995 996 struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env) 997 { 998 size_t size; 999 void *new_aux; 1000 1001 size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len); 1002 new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT); 1003 if (new_aux) 1004 memcpy(new_aux, env->insn_aux_data, size); 1005 return new_aux; 1006 } 1007 1008 void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, 1009 struct bpf_insn_aux_data *orig_insn_aux) 1010 { 1011 /* the expanded elements are zero-filled, so no special handling is required */ 1012 vfree(env->insn_aux_data); 1013 env->insn_aux_data = orig_insn_aux; 1014 } 1015 1016 static int jit_subprogs(struct bpf_verifier_env *env) 1017 { 1018 struct bpf_prog *prog = env->prog, **func, *tmp; 1019 int i, j, subprog_start, subprog_end = 0, len, subprog; 1020 struct bpf_map *map_ptr; 1021 struct bpf_insn *insn; 1022 void *old_bpf_func; 1023 int err, num_exentries; 1024 1025 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1026 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) 1027 continue; 1028 1029 /* Upon error here we cannot fall back to interpreter but 1030 * need a hard reject of the program. Thus -EFAULT is 1031 * propagated in any case. 1032 */ 1033 subprog = bpf_find_subprog(env, i + insn->imm + 1); 1034 if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d", 1035 i + insn->imm + 1)) 1036 return -EFAULT; 1037 /* temporarily remember subprog id inside insn instead of 1038 * aux_data, since next loop will split up all insns into funcs 1039 */ 1040 insn->off = subprog; 1041 /* remember original imm in case JIT fails and fallback 1042 * to interpreter will be needed 1043 */ 1044 env->insn_aux_data[i].call_imm = insn->imm; 1045 /* point imm to __bpf_call_base+1 from JITs point of view */ 1046 insn->imm = 1; 1047 if (bpf_pseudo_func(insn)) { 1048 #if defined(MODULES_VADDR) 1049 u64 addr = MODULES_VADDR; 1050 #else 1051 u64 addr = VMALLOC_START; 1052 #endif 1053 /* jit (e.g. x86_64) may emit fewer instructions 1054 * if it learns a u32 imm is the same as a u64 imm. 1055 * Set close enough to possible prog address. 1056 */ 1057 insn[0].imm = (u32)addr; 1058 insn[1].imm = addr >> 32; 1059 } 1060 } 1061 1062 err = bpf_prog_alloc_jited_linfo(prog); 1063 if (err) 1064 goto out_undo_insn; 1065 1066 err = -ENOMEM; 1067 func = kzalloc_objs(prog, env->subprog_cnt); 1068 if (!func) 1069 goto out_undo_insn; 1070 1071 for (i = 0; i < env->subprog_cnt; i++) { 1072 subprog_start = subprog_end; 1073 subprog_end = env->subprog_info[i + 1].start; 1074 1075 len = subprog_end - subprog_start; 1076 /* bpf_prog_run() doesn't call subprogs directly, 1077 * hence main prog stats include the runtime of subprogs. 1078 * subprogs don't have IDs and not reachable via prog_get_next_id 1079 * func[i]->stats will never be accessed and stays NULL 1080 */ 1081 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 1082 if (!func[i]) 1083 goto out_free; 1084 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 1085 len * sizeof(struct bpf_insn)); 1086 func[i]->type = prog->type; 1087 func[i]->len = len; 1088 if (bpf_prog_calc_tag(func[i])) 1089 goto out_free; 1090 func[i]->is_func = 1; 1091 func[i]->sleepable = prog->sleepable; 1092 func[i]->blinded = prog->blinded; 1093 func[i]->aux->func_idx = i; 1094 /* Below members will be freed only at prog->aux */ 1095 func[i]->aux->btf = prog->aux->btf; 1096 func[i]->aux->subprog_start = subprog_start; 1097 func[i]->aux->func_info = prog->aux->func_info; 1098 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; 1099 func[i]->aux->poke_tab = prog->aux->poke_tab; 1100 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; 1101 func[i]->aux->main_prog_aux = prog->aux; 1102 1103 for (j = 0; j < prog->aux->size_poke_tab; j++) { 1104 struct bpf_jit_poke_descriptor *poke; 1105 1106 poke = &prog->aux->poke_tab[j]; 1107 if (poke->insn_idx < subprog_end && 1108 poke->insn_idx >= subprog_start) 1109 poke->aux = func[i]->aux; 1110 } 1111 1112 func[i]->aux->name[0] = 'F'; 1113 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 1114 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) 1115 func[i]->aux->jits_use_priv_stack = true; 1116 1117 func[i]->jit_requested = 1; 1118 func[i]->blinding_requested = prog->blinding_requested; 1119 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; 1120 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; 1121 func[i]->aux->linfo = prog->aux->linfo; 1122 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 1123 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 1124 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 1125 func[i]->aux->arena = prog->aux->arena; 1126 func[i]->aux->used_maps = env->used_maps; 1127 func[i]->aux->used_map_cnt = env->used_map_cnt; 1128 num_exentries = 0; 1129 insn = func[i]->insnsi; 1130 for (j = 0; j < func[i]->len; j++, insn++) { 1131 if (BPF_CLASS(insn->code) == BPF_LDX && 1132 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 1133 BPF_MODE(insn->code) == BPF_PROBE_MEM32 || 1134 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX || 1135 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) 1136 num_exentries++; 1137 if ((BPF_CLASS(insn->code) == BPF_STX || 1138 BPF_CLASS(insn->code) == BPF_ST) && 1139 BPF_MODE(insn->code) == BPF_PROBE_MEM32) 1140 num_exentries++; 1141 if (BPF_CLASS(insn->code) == BPF_STX && 1142 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) 1143 num_exentries++; 1144 } 1145 func[i]->aux->num_exentries = num_exentries; 1146 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; 1147 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; 1148 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; 1149 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; 1150 func[i]->aux->token = prog->aux->token; 1151 if (!i) 1152 func[i]->aux->exception_boundary = env->seen_exception; 1153 func[i] = bpf_int_jit_compile(env, func[i]); 1154 if (!func[i]->jited) { 1155 err = -ENOTSUPP; 1156 goto out_free; 1157 } 1158 cond_resched(); 1159 } 1160 1161 /* at this point all bpf functions were successfully JITed 1162 * now populate all bpf_calls with correct addresses and 1163 * run last pass of JIT 1164 */ 1165 for (i = 0; i < env->subprog_cnt; i++) { 1166 insn = func[i]->insnsi; 1167 for (j = 0; j < func[i]->len; j++, insn++) { 1168 if (bpf_pseudo_func(insn)) { 1169 subprog = insn->off; 1170 insn[0].imm = (u32)(long)func[subprog]->bpf_func; 1171 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; 1172 continue; 1173 } 1174 if (!bpf_pseudo_call(insn)) 1175 continue; 1176 subprog = insn->off; 1177 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func); 1178 } 1179 1180 /* we use the aux data to keep a list of the start addresses 1181 * of the JITed images for each function in the program 1182 * 1183 * for some architectures, such as powerpc64, the imm field 1184 * might not be large enough to hold the offset of the start 1185 * address of the callee's JITed image from __bpf_call_base 1186 * 1187 * in such cases, we can lookup the start address of a callee 1188 * by using its subprog id, available from the off field of 1189 * the call instruction, as an index for this list 1190 */ 1191 func[i]->aux->func = func; 1192 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 1193 func[i]->aux->real_func_cnt = env->subprog_cnt; 1194 } 1195 for (i = 0; i < env->subprog_cnt; i++) { 1196 old_bpf_func = func[i]->bpf_func; 1197 tmp = bpf_int_jit_compile(env, func[i]); 1198 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 1199 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 1200 err = -ENOTSUPP; 1201 goto out_free; 1202 } 1203 cond_resched(); 1204 } 1205 1206 /* 1207 * Cleanup func[i]->aux fields which aren't required 1208 * or can become invalid in future 1209 */ 1210 for (i = 0; i < env->subprog_cnt; i++) { 1211 func[i]->aux->used_maps = NULL; 1212 func[i]->aux->used_map_cnt = 0; 1213 } 1214 1215 /* finally lock prog and jit images for all functions and 1216 * populate kallsysm. Begin at the first subprogram, since 1217 * bpf_prog_load will add the kallsyms for the main program. 1218 */ 1219 for (i = 1; i < env->subprog_cnt; i++) { 1220 err = bpf_prog_lock_ro(func[i]); 1221 if (err) 1222 goto out_free; 1223 } 1224 1225 for (i = 1; i < env->subprog_cnt; i++) 1226 bpf_prog_kallsyms_add(func[i]); 1227 1228 /* Last step: make now unused interpreter insns from main 1229 * prog consistent for later dump requests, so they can 1230 * later look the same as if they were interpreted only. 1231 */ 1232 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1233 if (bpf_pseudo_func(insn)) { 1234 insn[0].imm = env->insn_aux_data[i].call_imm; 1235 insn[1].imm = insn->off; 1236 insn->off = 0; 1237 continue; 1238 } 1239 if (!bpf_pseudo_call(insn)) 1240 continue; 1241 insn->off = env->insn_aux_data[i].call_imm; 1242 subprog = bpf_find_subprog(env, i + insn->off + 1); 1243 insn->imm = subprog; 1244 } 1245 1246 prog->jited = 1; 1247 prog->bpf_func = func[0]->bpf_func; 1248 prog->jited_len = func[0]->jited_len; 1249 prog->aux->extable = func[0]->aux->extable; 1250 prog->aux->num_exentries = func[0]->aux->num_exentries; 1251 prog->aux->func = func; 1252 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 1253 prog->aux->real_func_cnt = env->subprog_cnt; 1254 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func; 1255 prog->aux->exception_boundary = func[0]->aux->exception_boundary; 1256 bpf_prog_jit_attempt_done(prog); 1257 return 0; 1258 out_free: 1259 /* We failed JIT'ing, so at this point we need to unregister poke 1260 * descriptors from subprogs, so that kernel is not attempting to 1261 * patch it anymore as we're freeing the subprog JIT memory. 1262 */ 1263 for (i = 0; i < prog->aux->size_poke_tab; i++) { 1264 map_ptr = prog->aux->poke_tab[i].tail_call.map; 1265 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); 1266 } 1267 /* At this point we're guaranteed that poke descriptors are not 1268 * live anymore. We can just unlink its descriptor table as it's 1269 * released with the main prog. 1270 */ 1271 for (i = 0; i < env->subprog_cnt; i++) { 1272 if (!func[i]) 1273 continue; 1274 func[i]->aux->poke_tab = NULL; 1275 bpf_jit_free(func[i]); 1276 } 1277 kfree(func); 1278 out_undo_insn: 1279 bpf_prog_jit_attempt_done(prog); 1280 return err; 1281 } 1282 1283 int bpf_jit_subprogs(struct bpf_verifier_env *env) 1284 { 1285 int err, i; 1286 bool blinded = false; 1287 struct bpf_insn *insn; 1288 struct bpf_prog *prog, *orig_prog; 1289 struct bpf_insn_aux_data *orig_insn_aux; 1290 u32 *orig_subprog_starts; 1291 1292 if (env->subprog_cnt <= 1) 1293 return 0; 1294 1295 prog = orig_prog = env->prog; 1296 if (bpf_prog_need_blind(prog)) { 1297 orig_insn_aux = bpf_dup_insn_aux_data(env); 1298 if (!orig_insn_aux) { 1299 err = -ENOMEM; 1300 goto out_cleanup; 1301 } 1302 orig_subprog_starts = bpf_dup_subprog_starts(env); 1303 if (!orig_subprog_starts) { 1304 vfree(orig_insn_aux); 1305 err = -ENOMEM; 1306 goto out_cleanup; 1307 } 1308 prog = bpf_jit_blind_constants(env, prog); 1309 if (IS_ERR(prog)) { 1310 err = -ENOMEM; 1311 prog = orig_prog; 1312 goto out_restore; 1313 } 1314 blinded = true; 1315 } 1316 1317 err = jit_subprogs(env); 1318 if (err) 1319 goto out_jit_err; 1320 1321 if (blinded) { 1322 bpf_jit_prog_release_other(prog, orig_prog); 1323 kvfree(orig_subprog_starts); 1324 vfree(orig_insn_aux); 1325 } 1326 1327 return 0; 1328 1329 out_jit_err: 1330 if (blinded) { 1331 bpf_jit_prog_release_other(orig_prog, prog); 1332 /* roll back to the clean original prog */ 1333 prog = env->prog = orig_prog; 1334 goto out_restore; 1335 } else { 1336 if (err != -EFAULT) { 1337 /* 1338 * We will fall back to interpreter mode when err is not -EFAULT, before 1339 * that, insn->off and insn->imm should be restored to their original 1340 * values since they were modified by jit_subprogs. 1341 */ 1342 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1343 if (!bpf_pseudo_call(insn)) 1344 continue; 1345 insn->off = 0; 1346 insn->imm = env->insn_aux_data[i].call_imm; 1347 } 1348 } 1349 goto out_cleanup; 1350 } 1351 1352 out_restore: 1353 bpf_restore_subprog_starts(env, orig_subprog_starts); 1354 bpf_restore_insn_aux_data(env, orig_insn_aux); 1355 kvfree(orig_subprog_starts); 1356 out_cleanup: 1357 /* cleanup main prog to be interpreted */ 1358 prog->jit_requested = 0; 1359 prog->blinding_requested = 0; 1360 return err; 1361 } 1362 1363 int bpf_fixup_call_args(struct bpf_verifier_env *env) 1364 { 1365 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 1366 struct bpf_prog *prog = env->prog; 1367 struct bpf_insn *insn = prog->insnsi; 1368 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); 1369 int i, depth; 1370 #endif 1371 int err = 0; 1372 1373 if (env->prog->jit_requested && 1374 !bpf_prog_is_offloaded(env->prog->aux)) { 1375 err = bpf_jit_subprogs(env); 1376 if (err == 0) 1377 return 0; 1378 if (err == -EFAULT) 1379 return err; 1380 } 1381 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 1382 if (has_kfunc_call) { 1383 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); 1384 return -EINVAL; 1385 } 1386 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { 1387 /* When JIT fails the progs with bpf2bpf calls and tail_calls 1388 * have to be rejected, since interpreter doesn't support them yet. 1389 */ 1390 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); 1391 return -EINVAL; 1392 } 1393 for (i = 0; i < prog->len; i++, insn++) { 1394 if (bpf_pseudo_func(insn)) { 1395 /* When JIT fails the progs with callback calls 1396 * have to be rejected, since interpreter doesn't support them yet. 1397 */ 1398 verbose(env, "callbacks are not allowed in non-JITed programs\n"); 1399 return -EINVAL; 1400 } 1401 1402 if (!bpf_pseudo_call(insn)) 1403 continue; 1404 depth = get_callee_stack_depth(env, insn, i); 1405 if (depth < 0) 1406 return depth; 1407 bpf_patch_call_args(insn, depth); 1408 } 1409 err = 0; 1410 #endif 1411 return err; 1412 } 1413 1414 1415 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ 1416 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) 1417 { 1418 struct bpf_subprog_info *info = env->subprog_info; 1419 int cnt = env->subprog_cnt; 1420 struct bpf_prog *prog; 1421 1422 /* We only reserve one slot for hidden subprogs in subprog_info. */ 1423 if (env->hidden_subprog_cnt) { 1424 verifier_bug(env, "only one hidden subprog supported"); 1425 return -EFAULT; 1426 } 1427 /* We're not patching any existing instruction, just appending the new 1428 * ones for the hidden subprog. Hence all of the adjustment operations 1429 * in bpf_patch_insn_data are no-ops. 1430 */ 1431 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len); 1432 if (!prog) 1433 return -ENOMEM; 1434 env->prog = prog; 1435 info[cnt + 1].start = info[cnt].start; 1436 info[cnt].start = prog->len - len + 1; 1437 env->subprog_cnt++; 1438 env->hidden_subprog_cnt++; 1439 return 0; 1440 } 1441 1442 /* Do various post-verification rewrites in a single program pass. 1443 * These rewrites simplify JIT and interpreter implementations. 1444 */ 1445 int bpf_do_misc_fixups(struct bpf_verifier_env *env) 1446 { 1447 struct bpf_prog *prog = env->prog; 1448 enum bpf_attach_type eatype = prog->expected_attach_type; 1449 enum bpf_prog_type prog_type = resolve_prog_type(prog); 1450 struct bpf_insn *insn = prog->insnsi; 1451 const struct bpf_func_proto *fn; 1452 const int insn_cnt = prog->len; 1453 const struct bpf_map_ops *ops; 1454 struct bpf_insn_aux_data *aux; 1455 struct bpf_insn *insn_buf = env->insn_buf; 1456 struct bpf_prog *new_prog; 1457 struct bpf_map *map_ptr; 1458 int i, ret, cnt, delta = 0, cur_subprog = 0; 1459 struct bpf_subprog_info *subprogs = env->subprog_info; 1460 u16 stack_depth = subprogs[cur_subprog].stack_depth; 1461 u16 stack_depth_extra = 0; 1462 1463 if (env->seen_exception && !env->exception_callback_subprog) { 1464 struct bpf_insn *patch = insn_buf; 1465 1466 *patch++ = env->prog->insnsi[insn_cnt - 1]; 1467 *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); 1468 *patch++ = BPF_EXIT_INSN(); 1469 ret = add_hidden_subprog(env, insn_buf, patch - insn_buf); 1470 if (ret < 0) 1471 return ret; 1472 prog = env->prog; 1473 insn = prog->insnsi; 1474 1475 env->exception_callback_subprog = env->subprog_cnt - 1; 1476 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */ 1477 bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog); 1478 } 1479 1480 for (i = 0; i < insn_cnt;) { 1481 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { 1482 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || 1483 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { 1484 /* convert to 32-bit mov that clears upper 32-bit */ 1485 insn->code = BPF_ALU | BPF_MOV | BPF_X; 1486 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ 1487 insn->off = 0; 1488 insn->imm = 0; 1489 } /* cast from as(0) to as(1) should be handled by JIT */ 1490 goto next_insn; 1491 } 1492 1493 if (env->insn_aux_data[i + delta].needs_zext) 1494 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ 1495 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code); 1496 1497 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ 1498 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || 1499 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || 1500 insn->code == (BPF_ALU | BPF_MOD | BPF_K) || 1501 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && 1502 insn->off == 1 && insn->imm == -1) { 1503 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 1504 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 1505 struct bpf_insn *patch = insn_buf; 1506 1507 if (isdiv) 1508 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1509 BPF_NEG | BPF_K, insn->dst_reg, 1510 0, 0, 0); 1511 else 1512 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 1513 1514 cnt = patch - insn_buf; 1515 1516 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1517 if (!new_prog) 1518 return -ENOMEM; 1519 1520 delta += cnt - 1; 1521 env->prog = prog = new_prog; 1522 insn = new_prog->insnsi + i + delta; 1523 goto next_insn; 1524 } 1525 1526 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ 1527 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 1528 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 1529 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 1530 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 1531 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 1532 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 1533 bool is_sdiv = isdiv && insn->off == 1; 1534 bool is_smod = !isdiv && insn->off == 1; 1535 struct bpf_insn *patch = insn_buf; 1536 1537 if (is_sdiv) { 1538 /* [R,W]x sdiv 0 -> 0 1539 * LLONG_MIN sdiv -1 -> LLONG_MIN 1540 * INT_MIN sdiv -1 -> INT_MIN 1541 */ 1542 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1543 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1544 BPF_ADD | BPF_K, BPF_REG_AX, 1545 0, 0, 1); 1546 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1547 BPF_JGT | BPF_K, BPF_REG_AX, 1548 0, 4, 1); 1549 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1550 BPF_JEQ | BPF_K, BPF_REG_AX, 1551 0, 1, 0); 1552 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1553 BPF_MOV | BPF_K, insn->dst_reg, 1554 0, 0, 0); 1555 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ 1556 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1557 BPF_NEG | BPF_K, insn->dst_reg, 1558 0, 0, 0); 1559 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1560 *patch++ = *insn; 1561 cnt = patch - insn_buf; 1562 } else if (is_smod) { 1563 /* [R,W]x mod 0 -> [R,W]x */ 1564 /* [R,W]x mod -1 -> 0 */ 1565 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1566 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1567 BPF_ADD | BPF_K, BPF_REG_AX, 1568 0, 0, 1); 1569 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1570 BPF_JGT | BPF_K, BPF_REG_AX, 1571 0, 3, 1); 1572 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1573 BPF_JEQ | BPF_K, BPF_REG_AX, 1574 0, 3 + (is64 ? 0 : 1), 1); 1575 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 1576 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1577 *patch++ = *insn; 1578 1579 if (!is64) { 1580 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1581 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 1582 } 1583 cnt = patch - insn_buf; 1584 } else if (isdiv) { 1585 /* [R,W]x div 0 -> 0 */ 1586 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1587 BPF_JNE | BPF_K, insn->src_reg, 1588 0, 2, 0); 1589 *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg); 1590 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1591 *patch++ = *insn; 1592 cnt = patch - insn_buf; 1593 } else { 1594 /* [R,W]x mod 0 -> [R,W]x */ 1595 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1596 BPF_JEQ | BPF_K, insn->src_reg, 1597 0, 1 + (is64 ? 0 : 1), 0); 1598 *patch++ = *insn; 1599 1600 if (!is64) { 1601 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1602 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 1603 } 1604 cnt = patch - insn_buf; 1605 } 1606 1607 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1608 if (!new_prog) 1609 return -ENOMEM; 1610 1611 delta += cnt - 1; 1612 env->prog = prog = new_prog; 1613 insn = new_prog->insnsi + i + delta; 1614 goto next_insn; 1615 } 1616 1617 /* Make it impossible to de-reference a userspace address */ 1618 if (BPF_CLASS(insn->code) == BPF_LDX && 1619 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 1620 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { 1621 struct bpf_insn *patch = insn_buf; 1622 u64 uaddress_limit = bpf_arch_uaddress_limit(); 1623 1624 if (!uaddress_limit) 1625 goto next_insn; 1626 1627 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1628 if (insn->off) 1629 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); 1630 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); 1631 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); 1632 *patch++ = *insn; 1633 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1634 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); 1635 1636 cnt = patch - insn_buf; 1637 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1638 if (!new_prog) 1639 return -ENOMEM; 1640 1641 delta += cnt - 1; 1642 env->prog = prog = new_prog; 1643 insn = new_prog->insnsi + i + delta; 1644 goto next_insn; 1645 } 1646 1647 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ 1648 if (BPF_CLASS(insn->code) == BPF_LD && 1649 (BPF_MODE(insn->code) == BPF_ABS || 1650 BPF_MODE(insn->code) == BPF_IND)) { 1651 cnt = env->ops->gen_ld_abs(insn, insn_buf); 1652 if (cnt == 0 || cnt >= INSN_BUF_SIZE) { 1653 verifier_bug(env, "%d insns generated for ld_abs", cnt); 1654 return -EFAULT; 1655 } 1656 1657 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1658 if (!new_prog) 1659 return -ENOMEM; 1660 1661 delta += cnt - 1; 1662 env->prog = prog = new_prog; 1663 insn = new_prog->insnsi + i + delta; 1664 goto next_insn; 1665 } 1666 1667 /* Rewrite pointer arithmetic to mitigate speculation attacks. */ 1668 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 1669 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 1670 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 1671 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 1672 struct bpf_insn *patch = insn_buf; 1673 bool issrc, isneg, isimm; 1674 u32 off_reg; 1675 1676 aux = &env->insn_aux_data[i + delta]; 1677 if (!aux->alu_state || 1678 aux->alu_state == BPF_ALU_NON_POINTER) 1679 goto next_insn; 1680 1681 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 1682 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 1683 BPF_ALU_SANITIZE_SRC; 1684 isimm = aux->alu_state & BPF_ALU_IMMEDIATE; 1685 1686 off_reg = issrc ? insn->src_reg : insn->dst_reg; 1687 if (isimm) { 1688 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 1689 } else { 1690 if (isneg) 1691 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 1692 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 1693 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 1694 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 1695 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 1696 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 1697 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg); 1698 } 1699 if (!issrc) 1700 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg); 1701 insn->src_reg = BPF_REG_AX; 1702 if (isneg) 1703 insn->code = insn->code == code_add ? 1704 code_sub : code_add; 1705 *patch++ = *insn; 1706 if (issrc && isneg && !isimm) 1707 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 1708 cnt = patch - insn_buf; 1709 1710 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1711 if (!new_prog) 1712 return -ENOMEM; 1713 1714 delta += cnt - 1; 1715 env->prog = prog = new_prog; 1716 insn = new_prog->insnsi + i + delta; 1717 goto next_insn; 1718 } 1719 1720 if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) { 1721 int stack_off_cnt = -stack_depth - 16; 1722 1723 /* 1724 * Two 8 byte slots, depth-16 stores the count, and 1725 * depth-8 stores the start timestamp of the loop. 1726 * 1727 * The starting value of count is BPF_MAX_TIMED_LOOPS 1728 * (0xffff). Every iteration loads it and subs it by 1, 1729 * until the value becomes 0 in AX (thus, 1 in stack), 1730 * after which we call arch_bpf_timed_may_goto, which 1731 * either sets AX to 0xffff to keep looping, or to 0 1732 * upon timeout. AX is then stored into the stack. In 1733 * the next iteration, we either see 0 and break out, or 1734 * continue iterating until the next time value is 0 1735 * after subtraction, rinse and repeat. 1736 */ 1737 stack_depth_extra = 16; 1738 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt); 1739 if (insn->off >= 0) 1740 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5); 1741 else 1742 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 1743 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 1744 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2); 1745 /* 1746 * AX is used as an argument to pass in stack_off_cnt 1747 * (to add to r10/fp), and also as the return value of 1748 * the call to arch_bpf_timed_may_goto. 1749 */ 1750 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt); 1751 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto); 1752 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt); 1753 cnt = 7; 1754 1755 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1756 if (!new_prog) 1757 return -ENOMEM; 1758 1759 delta += cnt - 1; 1760 env->prog = prog = new_prog; 1761 insn = new_prog->insnsi + i + delta; 1762 goto next_insn; 1763 } else if (bpf_is_may_goto_insn(insn)) { 1764 int stack_off = -stack_depth - 8; 1765 1766 stack_depth_extra = 8; 1767 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); 1768 if (insn->off >= 0) 1769 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); 1770 else 1771 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 1772 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 1773 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); 1774 cnt = 4; 1775 1776 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1777 if (!new_prog) 1778 return -ENOMEM; 1779 1780 delta += cnt - 1; 1781 env->prog = prog = new_prog; 1782 insn = new_prog->insnsi + i + delta; 1783 goto next_insn; 1784 } 1785 1786 if (insn->code != (BPF_JMP | BPF_CALL)) 1787 goto next_insn; 1788 if (insn->src_reg == BPF_PSEUDO_CALL) 1789 goto next_insn; 1790 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1791 ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); 1792 if (ret) 1793 return ret; 1794 if (cnt == 0) 1795 goto next_insn; 1796 1797 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1798 if (!new_prog) 1799 return -ENOMEM; 1800 1801 delta += cnt - 1; 1802 env->prog = prog = new_prog; 1803 insn = new_prog->insnsi + i + delta; 1804 goto next_insn; 1805 } 1806 1807 /* Skip inlining the helper call if the JIT does it. */ 1808 if (bpf_jit_inlines_helper_call(insn->imm)) 1809 goto next_insn; 1810 1811 if (insn->imm == BPF_FUNC_get_route_realm) 1812 prog->dst_needed = 1; 1813 if (insn->imm == BPF_FUNC_get_prandom_u32) 1814 bpf_user_rnd_init_once(); 1815 if (insn->imm == BPF_FUNC_override_return) 1816 prog->kprobe_override = 1; 1817 if (insn->imm == BPF_FUNC_tail_call) { 1818 /* If we tail call into other programs, we 1819 * cannot make any assumptions since they can 1820 * be replaced dynamically during runtime in 1821 * the program array. 1822 */ 1823 prog->cb_access = 1; 1824 if (!bpf_allow_tail_call_in_subprogs(env)) 1825 prog->aux->stack_depth = MAX_BPF_STACK; 1826 prog->aux->max_pkt_offset = MAX_PACKET_OFF; 1827 1828 /* mark bpf_tail_call as different opcode to avoid 1829 * conditional branch in the interpreter for every normal 1830 * call and to prevent accidental JITing by JIT compiler 1831 * that doesn't support bpf_tail_call yet 1832 */ 1833 insn->imm = 0; 1834 insn->code = BPF_JMP | BPF_TAIL_CALL; 1835 1836 aux = &env->insn_aux_data[i + delta]; 1837 if (env->bpf_capable && !prog->blinding_requested && 1838 prog->jit_requested && 1839 !bpf_map_key_poisoned(aux) && 1840 !bpf_map_ptr_poisoned(aux) && 1841 !bpf_map_ptr_unpriv(aux)) { 1842 struct bpf_jit_poke_descriptor desc = { 1843 .reason = BPF_POKE_REASON_TAIL_CALL, 1844 .tail_call.map = aux->map_ptr_state.map_ptr, 1845 .tail_call.key = bpf_map_key_immediate(aux), 1846 .insn_idx = i + delta, 1847 }; 1848 1849 ret = bpf_jit_add_poke_descriptor(prog, &desc); 1850 if (ret < 0) { 1851 verbose(env, "adding tail call poke descriptor failed\n"); 1852 return ret; 1853 } 1854 1855 insn->imm = ret + 1; 1856 goto next_insn; 1857 } 1858 1859 if (!bpf_map_ptr_unpriv(aux)) 1860 goto next_insn; 1861 1862 /* instead of changing every JIT dealing with tail_call 1863 * emit two extra insns: 1864 * if (index >= max_entries) goto out; 1865 * index &= array->index_mask; 1866 * to avoid out-of-bounds cpu speculation 1867 */ 1868 if (bpf_map_ptr_poisoned(aux)) { 1869 verbose(env, "tail_call abusing map_ptr\n"); 1870 return -EINVAL; 1871 } 1872 1873 map_ptr = aux->map_ptr_state.map_ptr; 1874 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 1875 map_ptr->max_entries, 2); 1876 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 1877 container_of(map_ptr, 1878 struct bpf_array, 1879 map)->index_mask); 1880 insn_buf[2] = *insn; 1881 cnt = 3; 1882 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1883 if (!new_prog) 1884 return -ENOMEM; 1885 1886 delta += cnt - 1; 1887 env->prog = prog = new_prog; 1888 insn = new_prog->insnsi + i + delta; 1889 goto next_insn; 1890 } 1891 1892 if (insn->imm == BPF_FUNC_timer_set_callback) { 1893 /* The verifier will process callback_fn as many times as necessary 1894 * with different maps and the register states prepared by 1895 * set_timer_callback_state will be accurate. 1896 * 1897 * The following use case is valid: 1898 * map1 is shared by prog1, prog2, prog3. 1899 * prog1 calls bpf_timer_init for some map1 elements 1900 * prog2 calls bpf_timer_set_callback for some map1 elements. 1901 * Those that were not bpf_timer_init-ed will return -EINVAL. 1902 * prog3 calls bpf_timer_start for some map1 elements. 1903 * Those that were not both bpf_timer_init-ed and 1904 * bpf_timer_set_callback-ed will return -EINVAL. 1905 */ 1906 struct bpf_insn ld_addrs[2] = { 1907 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux), 1908 }; 1909 1910 insn_buf[0] = ld_addrs[0]; 1911 insn_buf[1] = ld_addrs[1]; 1912 insn_buf[2] = *insn; 1913 cnt = 3; 1914 1915 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1916 if (!new_prog) 1917 return -ENOMEM; 1918 1919 delta += cnt - 1; 1920 env->prog = prog = new_prog; 1921 insn = new_prog->insnsi + i + delta; 1922 goto patch_call_imm; 1923 } 1924 1925 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */ 1926 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) { 1927 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data, 1928 * bpf_mem_alloc() returns a ptr to the percpu data ptr. 1929 */ 1930 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); 1931 insn_buf[1] = *insn; 1932 cnt = 2; 1933 1934 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1935 if (!new_prog) 1936 return -ENOMEM; 1937 1938 delta += cnt - 1; 1939 env->prog = prog = new_prog; 1940 insn = new_prog->insnsi + i + delta; 1941 goto patch_call_imm; 1942 } 1943 1944 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 1945 * and other inlining handlers are currently limited to 64 bit 1946 * only. 1947 */ 1948 if (prog->jit_requested && BITS_PER_LONG == 64 && 1949 (insn->imm == BPF_FUNC_map_lookup_elem || 1950 insn->imm == BPF_FUNC_map_update_elem || 1951 insn->imm == BPF_FUNC_map_delete_elem || 1952 insn->imm == BPF_FUNC_map_push_elem || 1953 insn->imm == BPF_FUNC_map_pop_elem || 1954 insn->imm == BPF_FUNC_map_peek_elem || 1955 insn->imm == BPF_FUNC_redirect_map || 1956 insn->imm == BPF_FUNC_for_each_map_elem || 1957 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { 1958 aux = &env->insn_aux_data[i + delta]; 1959 if (bpf_map_ptr_poisoned(aux)) 1960 goto patch_call_imm; 1961 1962 map_ptr = aux->map_ptr_state.map_ptr; 1963 ops = map_ptr->ops; 1964 if (insn->imm == BPF_FUNC_map_lookup_elem && 1965 ops->map_gen_lookup) { 1966 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 1967 if (cnt == -EOPNOTSUPP) 1968 goto patch_map_ops_generic; 1969 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { 1970 verifier_bug(env, "%d insns generated for map lookup", cnt); 1971 return -EFAULT; 1972 } 1973 1974 new_prog = bpf_patch_insn_data(env, i + delta, 1975 insn_buf, cnt); 1976 if (!new_prog) 1977 return -ENOMEM; 1978 1979 delta += cnt - 1; 1980 env->prog = prog = new_prog; 1981 insn = new_prog->insnsi + i + delta; 1982 goto next_insn; 1983 } 1984 1985 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 1986 (void *(*)(struct bpf_map *map, void *key))NULL)); 1987 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 1988 (long (*)(struct bpf_map *map, void *key))NULL)); 1989 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 1990 (long (*)(struct bpf_map *map, void *key, void *value, 1991 u64 flags))NULL)); 1992 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 1993 (long (*)(struct bpf_map *map, void *value, 1994 u64 flags))NULL)); 1995 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 1996 (long (*)(struct bpf_map *map, void *value))NULL)); 1997 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 1998 (long (*)(struct bpf_map *map, void *value))NULL)); 1999 BUILD_BUG_ON(!__same_type(ops->map_redirect, 2000 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); 2001 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, 2002 (long (*)(struct bpf_map *map, 2003 bpf_callback_t callback_fn, 2004 void *callback_ctx, 2005 u64 flags))NULL)); 2006 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, 2007 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); 2008 2009 patch_map_ops_generic: 2010 switch (insn->imm) { 2011 case BPF_FUNC_map_lookup_elem: 2012 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); 2013 goto next_insn; 2014 case BPF_FUNC_map_update_elem: 2015 insn->imm = BPF_CALL_IMM(ops->map_update_elem); 2016 goto next_insn; 2017 case BPF_FUNC_map_delete_elem: 2018 insn->imm = BPF_CALL_IMM(ops->map_delete_elem); 2019 goto next_insn; 2020 case BPF_FUNC_map_push_elem: 2021 insn->imm = BPF_CALL_IMM(ops->map_push_elem); 2022 goto next_insn; 2023 case BPF_FUNC_map_pop_elem: 2024 insn->imm = BPF_CALL_IMM(ops->map_pop_elem); 2025 goto next_insn; 2026 case BPF_FUNC_map_peek_elem: 2027 insn->imm = BPF_CALL_IMM(ops->map_peek_elem); 2028 goto next_insn; 2029 case BPF_FUNC_redirect_map: 2030 insn->imm = BPF_CALL_IMM(ops->map_redirect); 2031 goto next_insn; 2032 case BPF_FUNC_for_each_map_elem: 2033 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); 2034 goto next_insn; 2035 case BPF_FUNC_map_lookup_percpu_elem: 2036 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); 2037 goto next_insn; 2038 } 2039 2040 goto patch_call_imm; 2041 } 2042 2043 /* Implement bpf_jiffies64 inline. */ 2044 if (prog->jit_requested && BITS_PER_LONG == 64 && 2045 insn->imm == BPF_FUNC_jiffies64) { 2046 struct bpf_insn ld_jiffies_addr[2] = { 2047 BPF_LD_IMM64(BPF_REG_0, 2048 (unsigned long)&jiffies), 2049 }; 2050 2051 insn_buf[0] = ld_jiffies_addr[0]; 2052 insn_buf[1] = ld_jiffies_addr[1]; 2053 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 2054 BPF_REG_0, 0); 2055 cnt = 3; 2056 2057 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 2058 cnt); 2059 if (!new_prog) 2060 return -ENOMEM; 2061 2062 delta += cnt - 1; 2063 env->prog = prog = new_prog; 2064 insn = new_prog->insnsi + i + delta; 2065 goto next_insn; 2066 } 2067 2068 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 2069 /* Implement bpf_get_smp_processor_id() inline. */ 2070 if (insn->imm == BPF_FUNC_get_smp_processor_id && 2071 bpf_verifier_inlines_helper_call(env, insn->imm)) { 2072 /* BPF_FUNC_get_smp_processor_id inlining is an 2073 * optimization, so if cpu_number is ever 2074 * changed in some incompatible and hard to support 2075 * way, it's fine to back out this inlining logic 2076 */ 2077 #ifdef CONFIG_SMP 2078 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number); 2079 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2080 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); 2081 cnt = 3; 2082 #else 2083 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); 2084 cnt = 1; 2085 #endif 2086 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2087 if (!new_prog) 2088 return -ENOMEM; 2089 2090 delta += cnt - 1; 2091 env->prog = prog = new_prog; 2092 insn = new_prog->insnsi + i + delta; 2093 goto next_insn; 2094 } 2095 2096 /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */ 2097 if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) && 2098 bpf_verifier_inlines_helper_call(env, insn->imm)) { 2099 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)¤t_task); 2100 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2101 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2102 cnt = 3; 2103 2104 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2105 if (!new_prog) 2106 return -ENOMEM; 2107 2108 delta += cnt - 1; 2109 env->prog = prog = new_prog; 2110 insn = new_prog->insnsi + i + delta; 2111 goto next_insn; 2112 } 2113 #endif 2114 /* Implement bpf_get_func_arg inline. */ 2115 if (prog_type == BPF_PROG_TYPE_TRACING && 2116 insn->imm == BPF_FUNC_get_func_arg) { 2117 if (eatype == BPF_TRACE_RAW_TP) { 2118 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 2119 2120 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 2121 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 2122 cnt = 1; 2123 } else { 2124 /* Load nr_args from ctx - 8 */ 2125 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2126 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2127 cnt = 2; 2128 } 2129 insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); 2130 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); 2131 insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); 2132 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); 2133 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 2134 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0); 2135 insn_buf[cnt++] = BPF_JMP_A(1); 2136 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 2137 2138 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2139 if (!new_prog) 2140 return -ENOMEM; 2141 2142 delta += cnt - 1; 2143 env->prog = prog = new_prog; 2144 insn = new_prog->insnsi + i + delta; 2145 goto next_insn; 2146 } 2147 2148 /* Implement bpf_get_func_ret inline. */ 2149 if (prog_type == BPF_PROG_TYPE_TRACING && 2150 insn->imm == BPF_FUNC_get_func_ret) { 2151 if (eatype == BPF_TRACE_FEXIT || 2152 eatype == BPF_TRACE_FSESSION || 2153 eatype == BPF_MODIFY_RETURN) { 2154 /* Load nr_args from ctx - 8 */ 2155 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2156 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2157 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); 2158 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); 2159 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 2160 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); 2161 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); 2162 cnt = 7; 2163 } else { 2164 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); 2165 cnt = 1; 2166 } 2167 2168 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2169 if (!new_prog) 2170 return -ENOMEM; 2171 2172 delta += cnt - 1; 2173 env->prog = prog = new_prog; 2174 insn = new_prog->insnsi + i + delta; 2175 goto next_insn; 2176 } 2177 2178 /* Implement get_func_arg_cnt inline. */ 2179 if (prog_type == BPF_PROG_TYPE_TRACING && 2180 insn->imm == BPF_FUNC_get_func_arg_cnt) { 2181 if (eatype == BPF_TRACE_RAW_TP) { 2182 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 2183 2184 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 2185 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 2186 cnt = 1; 2187 } else { 2188 /* Load nr_args from ctx - 8 */ 2189 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2190 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2191 cnt = 2; 2192 } 2193 2194 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2195 if (!new_prog) 2196 return -ENOMEM; 2197 2198 delta += cnt - 1; 2199 env->prog = prog = new_prog; 2200 insn = new_prog->insnsi + i + delta; 2201 goto next_insn; 2202 } 2203 2204 /* Implement bpf_get_func_ip inline. */ 2205 if (prog_type == BPF_PROG_TYPE_TRACING && 2206 insn->imm == BPF_FUNC_get_func_ip) { 2207 /* Load IP address from ctx - 16 */ 2208 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); 2209 2210 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); 2211 if (!new_prog) 2212 return -ENOMEM; 2213 2214 env->prog = prog = new_prog; 2215 insn = new_prog->insnsi + i + delta; 2216 goto next_insn; 2217 } 2218 2219 /* Implement bpf_get_branch_snapshot inline. */ 2220 if (IS_ENABLED(CONFIG_PERF_EVENTS) && 2221 prog->jit_requested && BITS_PER_LONG == 64 && 2222 insn->imm == BPF_FUNC_get_branch_snapshot) { 2223 /* We are dealing with the following func protos: 2224 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags); 2225 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt); 2226 */ 2227 const u32 br_entry_size = sizeof(struct perf_branch_entry); 2228 2229 /* struct perf_branch_entry is part of UAPI and is 2230 * used as an array element, so extremely unlikely to 2231 * ever grow or shrink 2232 */ 2233 BUILD_BUG_ON(br_entry_size != 24); 2234 2235 /* if (unlikely(flags)) return -EINVAL */ 2236 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7); 2237 2238 /* Transform size (bytes) into number of entries (cnt = size / 24). 2239 * But to avoid expensive division instruction, we implement 2240 * divide-by-3 through multiplication, followed by further 2241 * division by 8 through 3-bit right shift. 2242 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr., 2243 * p. 227, chapter "Unsigned Division by 3" for details and proofs. 2244 * 2245 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab. 2246 */ 2247 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab); 2248 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0); 2249 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36); 2250 2251 /* call perf_snapshot_branch_stack implementation */ 2252 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack)); 2253 /* if (entry_cnt == 0) return -ENOENT */ 2254 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4); 2255 /* return entry_cnt * sizeof(struct perf_branch_entry) */ 2256 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size); 2257 insn_buf[7] = BPF_JMP_A(3); 2258 /* return -EINVAL; */ 2259 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 2260 insn_buf[9] = BPF_JMP_A(1); 2261 /* return -ENOENT; */ 2262 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT); 2263 cnt = 11; 2264 2265 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2266 if (!new_prog) 2267 return -ENOMEM; 2268 2269 delta += cnt - 1; 2270 env->prog = prog = new_prog; 2271 insn = new_prog->insnsi + i + delta; 2272 goto next_insn; 2273 } 2274 2275 /* Implement bpf_kptr_xchg inline */ 2276 if (prog->jit_requested && BITS_PER_LONG == 64 && 2277 insn->imm == BPF_FUNC_kptr_xchg && 2278 bpf_jit_supports_ptr_xchg()) { 2279 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); 2280 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); 2281 cnt = 2; 2282 2283 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2284 if (!new_prog) 2285 return -ENOMEM; 2286 2287 delta += cnt - 1; 2288 env->prog = prog = new_prog; 2289 insn = new_prog->insnsi + i + delta; 2290 goto next_insn; 2291 } 2292 patch_call_imm: 2293 fn = env->ops->get_func_proto(insn->imm, env->prog); 2294 /* all functions that have prototype and verifier allowed 2295 * programs to call them, must be real in-kernel functions 2296 */ 2297 if (!fn->func) { 2298 verifier_bug(env, 2299 "not inlined functions %s#%d is missing func", 2300 func_id_name(insn->imm), insn->imm); 2301 return -EFAULT; 2302 } 2303 insn->imm = fn->func - __bpf_call_base; 2304 next_insn: 2305 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 2306 subprogs[cur_subprog].stack_depth += stack_depth_extra; 2307 subprogs[cur_subprog].stack_extra = stack_depth_extra; 2308 2309 stack_depth = subprogs[cur_subprog].stack_depth; 2310 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) { 2311 verbose(env, "stack size %d(extra %d) is too large\n", 2312 stack_depth, stack_depth_extra); 2313 return -EINVAL; 2314 } 2315 cur_subprog++; 2316 stack_depth = subprogs[cur_subprog].stack_depth; 2317 stack_depth_extra = 0; 2318 } 2319 i++; 2320 insn++; 2321 } 2322 2323 env->prog->aux->stack_depth = subprogs[0].stack_depth; 2324 for (i = 0; i < env->subprog_cnt; i++) { 2325 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1; 2326 int subprog_start = subprogs[i].start; 2327 int stack_slots = subprogs[i].stack_extra / 8; 2328 int slots = delta, cnt = 0; 2329 2330 if (!stack_slots) 2331 continue; 2332 /* We need two slots in case timed may_goto is supported. */ 2333 if (stack_slots > slots) { 2334 verifier_bug(env, "stack_slots supports may_goto only"); 2335 return -EFAULT; 2336 } 2337 2338 stack_depth = subprogs[i].stack_depth; 2339 if (bpf_jit_supports_timed_may_goto()) { 2340 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 2341 BPF_MAX_TIMED_LOOPS); 2342 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0); 2343 } else { 2344 /* Add ST insn to subprog prologue to init extra stack */ 2345 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 2346 BPF_MAX_LOOPS); 2347 } 2348 /* Copy first actual insn to preserve it */ 2349 insn_buf[cnt++] = env->prog->insnsi[subprog_start]; 2350 2351 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt); 2352 if (!new_prog) 2353 return -ENOMEM; 2354 env->prog = prog = new_prog; 2355 /* 2356 * If may_goto is a first insn of a prog there could be a jmp 2357 * insn that points to it, hence adjust all such jmps to point 2358 * to insn after BPF_ST that inits may_goto count. 2359 * Adjustment will succeed because bpf_patch_insn_data() didn't fail. 2360 */ 2361 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta)); 2362 } 2363 2364 /* Since poke tab is now finalized, publish aux to tracker. */ 2365 for (i = 0; i < prog->aux->size_poke_tab; i++) { 2366 map_ptr = prog->aux->poke_tab[i].tail_call.map; 2367 if (!map_ptr->ops->map_poke_track || 2368 !map_ptr->ops->map_poke_untrack || 2369 !map_ptr->ops->map_poke_run) { 2370 verifier_bug(env, "poke tab is misconfigured"); 2371 return -EFAULT; 2372 } 2373 2374 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 2375 if (ret < 0) { 2376 verbose(env, "tracking tail call prog failed\n"); 2377 return ret; 2378 } 2379 } 2380 2381 ret = sort_kfunc_descs_by_imm_off(env); 2382 if (ret) 2383 return ret; 2384 2385 return 0; 2386 } 2387 2388 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, 2389 int position, 2390 s32 stack_base, 2391 u32 callback_subprogno, 2392 u32 *total_cnt) 2393 { 2394 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; 2395 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; 2396 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; 2397 int reg_loop_max = BPF_REG_6; 2398 int reg_loop_cnt = BPF_REG_7; 2399 int reg_loop_ctx = BPF_REG_8; 2400 2401 struct bpf_insn *insn_buf = env->insn_buf; 2402 struct bpf_prog *new_prog; 2403 u32 callback_start; 2404 u32 call_insn_offset; 2405 s32 callback_offset; 2406 u32 cnt = 0; 2407 2408 /* This represents an inlined version of bpf_iter.c:bpf_loop, 2409 * be careful to modify this code in sync. 2410 */ 2411 2412 /* Return error and jump to the end of the patch if 2413 * expected number of iterations is too big. 2414 */ 2415 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2); 2416 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG); 2417 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16); 2418 /* spill R6, R7, R8 to use these as loop vars */ 2419 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset); 2420 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset); 2421 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset); 2422 /* initialize loop vars */ 2423 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1); 2424 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0); 2425 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3); 2426 /* loop header, 2427 * if reg_loop_cnt >= reg_loop_max skip the loop body 2428 */ 2429 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5); 2430 /* callback call, 2431 * correct callback offset would be set after patching 2432 */ 2433 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt); 2434 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx); 2435 insn_buf[cnt++] = BPF_CALL_REL(0); 2436 /* increment loop counter */ 2437 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1); 2438 /* jump to loop header if callback returned 0 */ 2439 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6); 2440 /* return value of bpf_loop, 2441 * set R0 to the number of iterations 2442 */ 2443 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt); 2444 /* restore original values of R6, R7, R8 */ 2445 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset); 2446 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset); 2447 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset); 2448 2449 *total_cnt = cnt; 2450 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt); 2451 if (!new_prog) 2452 return new_prog; 2453 2454 /* callback start is known only after patching */ 2455 callback_start = env->subprog_info[callback_subprogno].start; 2456 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ 2457 call_insn_offset = position + 12; 2458 callback_offset = callback_start - call_insn_offset - 1; 2459 new_prog->insnsi[call_insn_offset].imm = callback_offset; 2460 2461 return new_prog; 2462 } 2463 2464 static bool is_bpf_loop_call(struct bpf_insn *insn) 2465 { 2466 return insn->code == (BPF_JMP | BPF_CALL) && 2467 insn->src_reg == 0 && 2468 insn->imm == BPF_FUNC_loop; 2469 } 2470 2471 /* For all sub-programs in the program (including main) check 2472 * insn_aux_data to see if there are bpf_loop calls that require 2473 * inlining. If such calls are found the calls are replaced with a 2474 * sequence of instructions produced by `inline_bpf_loop` function and 2475 * subprog stack_depth is increased by the size of 3 registers. 2476 * This stack space is used to spill values of the R6, R7, R8. These 2477 * registers are used to store the loop bound, counter and context 2478 * variables. 2479 */ 2480 int bpf_optimize_bpf_loop(struct bpf_verifier_env *env) 2481 { 2482 struct bpf_subprog_info *subprogs = env->subprog_info; 2483 int i, cur_subprog = 0, cnt, delta = 0; 2484 struct bpf_insn *insn = env->prog->insnsi; 2485 int insn_cnt = env->prog->len; 2486 u16 stack_depth = subprogs[cur_subprog].stack_depth; 2487 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 2488 u16 stack_depth_extra = 0; 2489 2490 for (i = 0; i < insn_cnt; i++, insn++) { 2491 struct bpf_loop_inline_state *inline_state = 2492 &env->insn_aux_data[i + delta].loop_inline_state; 2493 2494 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { 2495 struct bpf_prog *new_prog; 2496 2497 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; 2498 new_prog = inline_bpf_loop(env, 2499 i + delta, 2500 -(stack_depth + stack_depth_extra), 2501 inline_state->callback_subprogno, 2502 &cnt); 2503 if (!new_prog) 2504 return -ENOMEM; 2505 2506 delta += cnt - 1; 2507 env->prog = new_prog; 2508 insn = new_prog->insnsi + i + delta; 2509 } 2510 2511 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 2512 subprogs[cur_subprog].stack_depth += stack_depth_extra; 2513 cur_subprog++; 2514 stack_depth = subprogs[cur_subprog].stack_depth; 2515 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 2516 stack_depth_extra = 0; 2517 } 2518 } 2519 2520 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 2521 2522 return 0; 2523 } 2524 2525 /* Remove unnecessary spill/fill pairs, members of fastcall pattern, 2526 * adjust subprograms stack depth when possible. 2527 */ 2528 int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env) 2529 { 2530 struct bpf_subprog_info *subprog = env->subprog_info; 2531 struct bpf_insn_aux_data *aux = env->insn_aux_data; 2532 struct bpf_insn *insn = env->prog->insnsi; 2533 int insn_cnt = env->prog->len; 2534 u32 spills_num; 2535 bool modified = false; 2536 int i, j; 2537 2538 for (i = 0; i < insn_cnt; i++, insn++) { 2539 if (aux[i].fastcall_spills_num > 0) { 2540 spills_num = aux[i].fastcall_spills_num; 2541 /* NOPs would be removed by opt_remove_nops() */ 2542 for (j = 1; j <= spills_num; ++j) { 2543 *(insn - j) = NOP; 2544 *(insn + j) = NOP; 2545 } 2546 modified = true; 2547 } 2548 if ((subprog + 1)->start == i + 1) { 2549 if (modified && !subprog->keep_fastcall_stack) 2550 subprog->stack_depth = -subprog->fastcall_stack_off; 2551 subprog++; 2552 modified = false; 2553 } 2554 } 2555 2556 return 0; 2557 } 2558 2559