1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/btf.h> 5 #include <linux/bpf_verifier.h> 6 #include <linux/filter.h> 7 #include <linux/vmalloc.h> 8 #include <linux/bsearch.h> 9 #include <linux/sort.h> 10 #include <linux/perf_event.h> 11 #include <net/xdp.h> 12 #include "disasm.h" 13 14 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 15 16 static bool is_cmpxchg_insn(const struct bpf_insn *insn) 17 { 18 return BPF_CLASS(insn->code) == BPF_STX && 19 BPF_MODE(insn->code) == BPF_ATOMIC && 20 insn->imm == BPF_CMPXCHG; 21 } 22 23 /* Return the regno defined by the insn, or -1. */ 24 static int insn_def_regno(const struct bpf_insn *insn) 25 { 26 switch (BPF_CLASS(insn->code)) { 27 case BPF_JMP: 28 case BPF_JMP32: 29 case BPF_ST: 30 return -1; 31 case BPF_STX: 32 if (BPF_MODE(insn->code) == BPF_ATOMIC || 33 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) { 34 if (insn->imm == BPF_CMPXCHG) 35 return BPF_REG_0; 36 else if (insn->imm == BPF_LOAD_ACQ) 37 return insn->dst_reg; 38 else if (insn->imm & BPF_FETCH) 39 return insn->src_reg; 40 } 41 return -1; 42 default: 43 return insn->dst_reg; 44 } 45 } 46 47 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 48 static bool insn_has_def32(struct bpf_insn *insn) 49 { 50 int dst_reg = insn_def_regno(insn); 51 52 if (dst_reg == -1) 53 return false; 54 55 return !bpf_is_reg64(insn, dst_reg, NULL, DST_OP); 56 } 57 58 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) 59 { 60 const struct bpf_kfunc_desc *d0 = a; 61 const struct bpf_kfunc_desc *d1 = b; 62 63 if (d0->imm != d1->imm) 64 return d0->imm < d1->imm ? -1 : 1; 65 if (d0->offset != d1->offset) 66 return d0->offset < d1->offset ? -1 : 1; 67 return 0; 68 } 69 70 const struct btf_func_model * 71 bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 72 const struct bpf_insn *insn) 73 { 74 const struct bpf_kfunc_desc desc = { 75 .imm = insn->imm, 76 .offset = insn->off, 77 }; 78 const struct bpf_kfunc_desc *res; 79 struct bpf_kfunc_desc_tab *tab; 80 81 tab = prog->aux->kfunc_tab; 82 res = bsearch(&desc, tab->descs, tab->nr_descs, 83 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); 84 85 return res ? &res->func_model : NULL; 86 } 87 88 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) 89 { 90 unsigned long call_imm; 91 92 if (bpf_jit_supports_far_kfunc_call()) { 93 call_imm = desc->func_id; 94 } else { 95 call_imm = BPF_CALL_IMM(desc->addr); 96 /* Check whether the relative offset overflows desc->imm */ 97 if ((unsigned long)(s32)call_imm != call_imm) { 98 verbose(env, "address of kernel func_id %u is out of range\n", 99 desc->func_id); 100 return -EINVAL; 101 } 102 } 103 desc->imm = call_imm; 104 return 0; 105 } 106 107 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env) 108 { 109 struct bpf_kfunc_desc_tab *tab; 110 int i, err; 111 112 tab = env->prog->aux->kfunc_tab; 113 if (!tab) 114 return 0; 115 116 for (i = 0; i < tab->nr_descs; i++) { 117 err = set_kfunc_desc_imm(env, &tab->descs[i]); 118 if (err) 119 return err; 120 } 121 122 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 123 kfunc_desc_cmp_by_imm_off, NULL); 124 return 0; 125 } 126 127 static int add_kfunc_in_insns(struct bpf_verifier_env *env, 128 struct bpf_insn *insn, int cnt) 129 { 130 int i, ret; 131 132 for (i = 0; i < cnt; i++, insn++) { 133 if (bpf_pseudo_kfunc_call(insn)) { 134 ret = bpf_add_kfunc_call(env, insn->imm, insn->off); 135 if (ret < 0) 136 return ret; 137 } 138 } 139 return 0; 140 } 141 142 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 143 static int get_callee_stack_depth(struct bpf_verifier_env *env, 144 const struct bpf_insn *insn, int idx) 145 { 146 int start = idx + insn->imm + 1, subprog; 147 148 subprog = bpf_find_subprog(env, start); 149 if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start)) 150 return -EFAULT; 151 return env->subprog_info[subprog].stack_depth; 152 } 153 #endif 154 155 /* single env->prog->insni[off] instruction was replaced with the range 156 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 157 * [0, off) and [off, end) to new locations, so the patched range stays zero 158 */ 159 static void adjust_insn_aux_data(struct bpf_verifier_env *env, 160 struct bpf_prog *new_prog, u32 off, u32 cnt) 161 { 162 struct bpf_insn_aux_data *data = env->insn_aux_data; 163 struct bpf_insn *insn = new_prog->insnsi; 164 u32 old_seen = data[off].seen; 165 u32 prog_len; 166 int i; 167 168 /* aux info at OFF always needs adjustment, no matter fast path 169 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 170 * original insn at old prog. 171 */ 172 data[off].zext_dst = insn_has_def32(insn + off + cnt - 1); 173 174 if (cnt == 1) 175 return; 176 prog_len = new_prog->len; 177 178 memmove(data + off + cnt - 1, data + off, 179 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 180 memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1)); 181 for (i = off; i < off + cnt - 1; i++) { 182 /* Expand insni[off]'s seen count to the patched range. */ 183 data[i].seen = old_seen; 184 data[i].zext_dst = insn_has_def32(insn + i); 185 } 186 187 /* 188 * The indirect_target flag of the original instruction was moved to the last of the 189 * new instructions by the above memmove and memset, but the indirect jump target is 190 * actually the first instruction, so move it back. This also matches with the behavior 191 * of bpf_insn_array_adjust(), which preserves xlated_off to point to the first new 192 * instruction. 193 */ 194 if (data[off + cnt - 1].indirect_target) { 195 data[off].indirect_target = 1; 196 data[off + cnt - 1].indirect_target = 0; 197 } 198 } 199 200 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 201 { 202 int i; 203 204 if (len == 1) 205 return; 206 /* NOTE: fake 'exit' subprog should be updated as well. */ 207 for (i = 0; i <= env->subprog_cnt; i++) { 208 if (env->subprog_info[i].start <= off) 209 continue; 210 env->subprog_info[i].start += len - 1; 211 } 212 } 213 214 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len) 215 { 216 int i; 217 218 if (len == 1) 219 return; 220 221 for (i = 0; i < env->insn_array_map_cnt; i++) 222 bpf_insn_array_adjust(env->insn_array_maps[i], off, len); 223 } 224 225 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len) 226 { 227 int i; 228 229 for (i = 0; i < env->insn_array_map_cnt; i++) 230 bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len); 231 } 232 233 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) 234 { 235 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; 236 int i, sz = prog->aux->size_poke_tab; 237 struct bpf_jit_poke_descriptor *desc; 238 239 for (i = 0; i < sz; i++) { 240 desc = &tab[i]; 241 if (desc->insn_idx <= off) 242 continue; 243 desc->insn_idx += len - 1; 244 } 245 } 246 247 struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 248 const struct bpf_insn *patch, u32 len) 249 { 250 struct bpf_prog *new_prog; 251 struct bpf_insn_aux_data *new_data = NULL; 252 253 if (len > 1) { 254 new_data = vrealloc(env->insn_aux_data, 255 array_size(env->prog->len + len - 1, 256 sizeof(struct bpf_insn_aux_data)), 257 GFP_KERNEL_ACCOUNT | __GFP_ZERO); 258 if (!new_data) 259 return NULL; 260 261 env->insn_aux_data = new_data; 262 } 263 264 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 265 if (IS_ERR(new_prog)) { 266 if (PTR_ERR(new_prog) == -ERANGE) 267 verbose(env, 268 "insn %d cannot be patched due to 16-bit range\n", 269 env->insn_aux_data[off].orig_idx); 270 return NULL; 271 } 272 adjust_insn_aux_data(env, new_prog, off, len); 273 adjust_subprog_starts(env, off, len); 274 adjust_insn_arrays(env, off, len); 275 adjust_poke_descs(new_prog, off, len); 276 return new_prog; 277 } 278 279 /* 280 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the 281 * jump offset by 'delta'. 282 */ 283 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) 284 { 285 struct bpf_insn *insn = prog->insnsi; 286 u32 insn_cnt = prog->len, i; 287 s32 imm; 288 s16 off; 289 290 for (i = 0; i < insn_cnt; i++, insn++) { 291 u8 code = insn->code; 292 293 if (tgt_idx <= i && i < tgt_idx + delta) 294 continue; 295 296 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || 297 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) 298 continue; 299 300 if (insn->code == (BPF_JMP32 | BPF_JA)) { 301 if (i + 1 + insn->imm != tgt_idx) 302 continue; 303 if (check_add_overflow(insn->imm, delta, &imm)) 304 return -ERANGE; 305 insn->imm = imm; 306 } else { 307 if (i + 1 + insn->off != tgt_idx) 308 continue; 309 if (check_add_overflow(insn->off, delta, &off)) 310 return -ERANGE; 311 insn->off = off; 312 } 313 } 314 return 0; 315 } 316 317 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 318 u32 off, u32 cnt) 319 { 320 int i, j; 321 322 /* find first prog starting at or after off (first to remove) */ 323 for (i = 0; i < env->subprog_cnt; i++) 324 if (env->subprog_info[i].start >= off) 325 break; 326 /* find first prog starting at or after off + cnt (first to stay) */ 327 for (j = i; j < env->subprog_cnt; j++) 328 if (env->subprog_info[j].start >= off + cnt) 329 break; 330 /* if j doesn't start exactly at off + cnt, we are just removing 331 * the front of previous prog 332 */ 333 if (env->subprog_info[j].start != off + cnt) 334 j--; 335 336 if (j > i) { 337 struct bpf_prog_aux *aux = env->prog->aux; 338 int move; 339 340 /* move fake 'exit' subprog as well */ 341 move = env->subprog_cnt + 1 - j; 342 343 memmove(env->subprog_info + i, 344 env->subprog_info + j, 345 sizeof(*env->subprog_info) * move); 346 env->subprog_cnt -= j - i; 347 348 /* remove func_info */ 349 if (aux->func_info) { 350 move = aux->func_info_cnt - j; 351 352 memmove(aux->func_info + i, 353 aux->func_info + j, 354 sizeof(*aux->func_info) * move); 355 aux->func_info_cnt -= j - i; 356 /* func_info->insn_off is set after all code rewrites, 357 * in adjust_btf_func() - no need to adjust 358 */ 359 } 360 } else { 361 /* convert i from "first prog to remove" to "first to adjust" */ 362 if (env->subprog_info[i].start == off) 363 i++; 364 } 365 366 /* update fake 'exit' subprog as well */ 367 for (; i <= env->subprog_cnt; i++) 368 env->subprog_info[i].start -= cnt; 369 370 return 0; 371 } 372 373 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 374 u32 cnt) 375 { 376 struct bpf_prog *prog = env->prog; 377 u32 i, l_off, l_cnt, nr_linfo; 378 struct bpf_line_info *linfo; 379 380 nr_linfo = prog->aux->nr_linfo; 381 if (!nr_linfo) 382 return 0; 383 384 linfo = prog->aux->linfo; 385 386 /* find first line info to remove, count lines to be removed */ 387 for (i = 0; i < nr_linfo; i++) 388 if (linfo[i].insn_off >= off) 389 break; 390 391 l_off = i; 392 l_cnt = 0; 393 for (; i < nr_linfo; i++) 394 if (linfo[i].insn_off < off + cnt) 395 l_cnt++; 396 else 397 break; 398 399 /* First live insn doesn't match first live linfo, it needs to "inherit" 400 * last removed linfo. prog is already modified, so prog->len == off 401 * means no live instructions after (tail of the program was removed). 402 */ 403 if (prog->len != off && l_cnt && 404 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 405 l_cnt--; 406 linfo[--i].insn_off = off + cnt; 407 } 408 409 /* remove the line info which refer to the removed instructions */ 410 if (l_cnt) { 411 memmove(linfo + l_off, linfo + i, 412 sizeof(*linfo) * (nr_linfo - i)); 413 414 prog->aux->nr_linfo -= l_cnt; 415 nr_linfo = prog->aux->nr_linfo; 416 } 417 418 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 419 for (i = l_off; i < nr_linfo; i++) 420 linfo[i].insn_off -= cnt; 421 422 /* fix up all subprogs (incl. 'exit') which start >= off */ 423 for (i = 0; i <= env->subprog_cnt; i++) 424 if (env->subprog_info[i].linfo_idx > l_off) { 425 /* program may have started in the removed region but 426 * may not be fully removed 427 */ 428 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 429 env->subprog_info[i].linfo_idx -= l_cnt; 430 else 431 env->subprog_info[i].linfo_idx = l_off; 432 } 433 434 return 0; 435 } 436 437 /* 438 * Clean up dynamically allocated fields of aux data for instructions [start, ...] 439 */ 440 void bpf_clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len) 441 { 442 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 443 struct bpf_insn *insns = env->prog->insnsi; 444 int end = start + len; 445 int i; 446 447 for (i = start; i < end; i++) { 448 if (aux_data[i].jt) { 449 kvfree(aux_data[i].jt); 450 aux_data[i].jt = NULL; 451 } 452 453 if (bpf_is_ldimm64(&insns[i])) 454 i++; 455 } 456 } 457 458 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 459 { 460 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 461 unsigned int orig_prog_len = env->prog->len; 462 int err; 463 464 if (bpf_prog_is_offloaded(env->prog->aux)) 465 bpf_prog_offload_remove_insns(env, off, cnt); 466 467 /* Should be called before bpf_remove_insns, as it uses prog->insnsi */ 468 bpf_clear_insn_aux_data(env, off, cnt); 469 470 err = bpf_remove_insns(env->prog, off, cnt); 471 if (err) 472 return err; 473 474 err = adjust_subprog_starts_after_remove(env, off, cnt); 475 if (err) 476 return err; 477 478 err = bpf_adj_linfo_after_remove(env, off, cnt); 479 if (err) 480 return err; 481 482 adjust_insn_arrays_after_remove(env, off, cnt); 483 484 memmove(aux_data + off, aux_data + off + cnt, 485 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 486 487 return 0; 488 } 489 490 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 491 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0); 492 493 bool bpf_insn_is_cond_jump(u8 code) 494 { 495 u8 op; 496 497 op = BPF_OP(code); 498 if (BPF_CLASS(code) == BPF_JMP32) 499 return op != BPF_JA; 500 501 if (BPF_CLASS(code) != BPF_JMP) 502 return false; 503 504 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 505 } 506 507 void bpf_opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 508 { 509 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 510 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 511 struct bpf_insn *insn = env->prog->insnsi; 512 const int insn_cnt = env->prog->len; 513 int i; 514 515 for (i = 0; i < insn_cnt; i++, insn++) { 516 if (!bpf_insn_is_cond_jump(insn->code)) 517 continue; 518 519 if (!aux_data[i + 1].seen) 520 ja.off = insn->off; 521 else if (!aux_data[i + 1 + insn->off].seen) 522 ja.off = 0; 523 else 524 continue; 525 526 if (bpf_prog_is_offloaded(env->prog->aux)) 527 bpf_prog_offload_replace_insn(env, i, &ja); 528 529 memcpy(insn, &ja, sizeof(ja)); 530 } 531 } 532 533 int bpf_opt_remove_dead_code(struct bpf_verifier_env *env) 534 { 535 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 536 int insn_cnt = env->prog->len; 537 int i, err; 538 539 for (i = 0; i < insn_cnt; i++) { 540 int j; 541 542 j = 0; 543 while (i + j < insn_cnt && !aux_data[i + j].seen) 544 j++; 545 if (!j) 546 continue; 547 548 err = verifier_remove_insns(env, i, j); 549 if (err) 550 return err; 551 insn_cnt = env->prog->len; 552 } 553 554 return 0; 555 } 556 557 int bpf_opt_remove_nops(struct bpf_verifier_env *env) 558 { 559 struct bpf_insn *insn = env->prog->insnsi; 560 int insn_cnt = env->prog->len; 561 bool is_may_goto_0, is_ja; 562 int i, err; 563 564 for (i = 0; i < insn_cnt; i++) { 565 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0)); 566 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP)); 567 568 if (!is_may_goto_0 && !is_ja) 569 continue; 570 571 err = verifier_remove_insns(env, i, 1); 572 if (err) 573 return err; 574 insn_cnt--; 575 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */ 576 i -= (is_may_goto_0 && i > 0) ? 2 : 1; 577 } 578 579 return 0; 580 } 581 582 int bpf_opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 583 const union bpf_attr *attr) 584 { 585 struct bpf_insn *patch; 586 /* use env->insn_buf as two independent buffers */ 587 struct bpf_insn *zext_patch = env->insn_buf; 588 struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2]; 589 struct bpf_insn_aux_data *aux = env->insn_aux_data; 590 int i, patch_len, delta = 0, len = env->prog->len; 591 struct bpf_insn *insns = env->prog->insnsi; 592 struct bpf_prog *new_prog; 593 bool rnd_hi32; 594 595 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 596 zext_patch[1] = BPF_ZEXT_REG(0); 597 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 598 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 599 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 600 for (i = 0; i < len; i++) { 601 int adj_idx = i + delta; 602 struct bpf_insn insn; 603 int load_reg; 604 605 insn = insns[adj_idx]; 606 load_reg = insn_def_regno(&insn); 607 if (!aux[adj_idx].zext_dst) { 608 u8 code, class; 609 u32 imm_rnd; 610 611 if (!rnd_hi32) 612 continue; 613 614 code = insn.code; 615 class = BPF_CLASS(code); 616 if (load_reg == -1) 617 continue; 618 619 /* NOTE: arg "reg" (the fourth one) is only used for 620 * BPF_STX + SRC_OP, so it is safe to pass NULL 621 * here. 622 */ 623 if (bpf_is_reg64(&insn, load_reg, NULL, DST_OP)) { 624 if (class == BPF_LD && 625 BPF_MODE(code) == BPF_IMM) 626 i++; 627 continue; 628 } 629 630 /* ctx load could be transformed into wider load. */ 631 if (class == BPF_LDX && 632 aux[adj_idx].ptr_type == PTR_TO_CTX) 633 continue; 634 635 imm_rnd = get_random_u32(); 636 rnd_hi32_patch[0] = insn; 637 rnd_hi32_patch[1].imm = imm_rnd; 638 rnd_hi32_patch[3].dst_reg = load_reg; 639 patch = rnd_hi32_patch; 640 patch_len = 4; 641 goto apply_patch_buffer; 642 } 643 644 /* Add in an zero-extend instruction if a) the JIT has requested 645 * it or b) it's a CMPXCHG. 646 * 647 * The latter is because: BPF_CMPXCHG always loads a value into 648 * R0, therefore always zero-extends. However some archs' 649 * equivalent instruction only does this load when the 650 * comparison is successful. This detail of CMPXCHG is 651 * orthogonal to the general zero-extension behaviour of the 652 * CPU, so it's treated independently of bpf_jit_needs_zext. 653 */ 654 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn)) 655 continue; 656 657 /* Zero-extension is done by the caller. */ 658 if (bpf_pseudo_kfunc_call(&insn)) 659 continue; 660 661 if (verifier_bug_if(load_reg == -1, env, 662 "zext_dst is set, but no reg is defined")) 663 return -EFAULT; 664 665 zext_patch[0] = insn; 666 zext_patch[1].dst_reg = load_reg; 667 zext_patch[1].src_reg = load_reg; 668 patch = zext_patch; 669 patch_len = 2; 670 apply_patch_buffer: 671 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 672 if (!new_prog) 673 return -ENOMEM; 674 env->prog = new_prog; 675 insns = new_prog->insnsi; 676 aux = env->insn_aux_data; 677 delta += patch_len - 1; 678 } 679 680 return 0; 681 } 682 683 /* convert load instructions that access fields of a context type into a 684 * sequence of instructions that access fields of the underlying structure: 685 * struct __sk_buff -> struct sk_buff 686 * struct bpf_sock_ops -> struct sock 687 */ 688 int bpf_convert_ctx_accesses(struct bpf_verifier_env *env) 689 { 690 struct bpf_subprog_info *subprogs = env->subprog_info; 691 const struct bpf_verifier_ops *ops = env->ops; 692 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0; 693 const int insn_cnt = env->prog->len; 694 struct bpf_insn *epilogue_buf = env->epilogue_buf; 695 struct bpf_insn *insn_buf = env->insn_buf; 696 struct bpf_insn *insn; 697 u32 target_size, size_default, off; 698 struct bpf_prog *new_prog; 699 enum bpf_access_type type; 700 bool is_narrower_load; 701 int epilogue_idx = 0; 702 703 if (ops->gen_epilogue) { 704 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog, 705 -(subprogs[0].stack_depth + 8)); 706 if (epilogue_cnt >= INSN_BUF_SIZE) { 707 verifier_bug(env, "epilogue is too long"); 708 return -EFAULT; 709 } else if (epilogue_cnt) { 710 /* Save the ARG_PTR_TO_CTX for the epilogue to use */ 711 cnt = 0; 712 subprogs[0].stack_depth += 8; 713 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1, 714 -subprogs[0].stack_depth); 715 insn_buf[cnt++] = env->prog->insnsi[0]; 716 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 717 if (!new_prog) 718 return -ENOMEM; 719 env->prog = new_prog; 720 delta += cnt - 1; 721 722 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1); 723 if (ret < 0) 724 return ret; 725 } 726 } 727 728 if (ops->gen_prologue || env->seen_direct_write) { 729 if (!ops->gen_prologue) { 730 verifier_bug(env, "gen_prologue is null"); 731 return -EFAULT; 732 } 733 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 734 env->prog); 735 if (cnt >= INSN_BUF_SIZE) { 736 verifier_bug(env, "prologue is too long"); 737 return -EFAULT; 738 } else if (cnt) { 739 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 740 if (!new_prog) 741 return -ENOMEM; 742 743 env->prog = new_prog; 744 delta += cnt - 1; 745 746 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1); 747 if (ret < 0) 748 return ret; 749 } 750 } 751 752 if (delta) 753 WARN_ON(adjust_jmp_off(env->prog, 0, delta)); 754 755 if (bpf_prog_is_offloaded(env->prog->aux)) 756 return 0; 757 758 insn = env->prog->insnsi + delta; 759 760 for (i = 0; i < insn_cnt; i++, insn++) { 761 bpf_convert_ctx_access_t convert_ctx_access; 762 u8 mode; 763 764 if (env->insn_aux_data[i + delta].nospec) { 765 WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state); 766 struct bpf_insn *patch = insn_buf; 767 768 *patch++ = BPF_ST_NOSPEC(); 769 *patch++ = *insn; 770 cnt = patch - insn_buf; 771 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 772 if (!new_prog) 773 return -ENOMEM; 774 775 delta += cnt - 1; 776 env->prog = new_prog; 777 insn = new_prog->insnsi + i + delta; 778 /* This can not be easily merged with the 779 * nospec_result-case, because an insn may require a 780 * nospec before and after itself. Therefore also do not 781 * 'continue' here but potentially apply further 782 * patching to insn. *insn should equal patch[1] now. 783 */ 784 } 785 786 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 787 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 788 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 789 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) || 790 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) || 791 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) || 792 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) { 793 type = BPF_READ; 794 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 795 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 796 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 797 insn->code == (BPF_STX | BPF_MEM | BPF_DW) || 798 insn->code == (BPF_ST | BPF_MEM | BPF_B) || 799 insn->code == (BPF_ST | BPF_MEM | BPF_H) || 800 insn->code == (BPF_ST | BPF_MEM | BPF_W) || 801 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { 802 type = BPF_WRITE; 803 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || 804 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || 805 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || 806 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && 807 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { 808 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); 809 env->prog->aux->num_exentries++; 810 continue; 811 } else if (insn->code == (BPF_JMP | BPF_EXIT) && 812 epilogue_cnt && 813 i + delta < subprogs[1].start) { 814 /* Generate epilogue for the main prog */ 815 if (epilogue_idx) { 816 /* jump back to the earlier generated epilogue */ 817 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1); 818 cnt = 1; 819 } else { 820 memcpy(insn_buf, epilogue_buf, 821 epilogue_cnt * sizeof(*epilogue_buf)); 822 cnt = epilogue_cnt; 823 /* epilogue_idx cannot be 0. It must have at 824 * least one ctx ptr saving insn before the 825 * epilogue. 826 */ 827 epilogue_idx = i + delta; 828 } 829 goto patch_insn_buf; 830 } else { 831 continue; 832 } 833 834 if (type == BPF_WRITE && 835 env->insn_aux_data[i + delta].nospec_result) { 836 /* nospec_result is only used to mitigate Spectre v4 and 837 * to limit verification-time for Spectre v1. 838 */ 839 struct bpf_insn *patch = insn_buf; 840 841 *patch++ = *insn; 842 *patch++ = BPF_ST_NOSPEC(); 843 cnt = patch - insn_buf; 844 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 845 if (!new_prog) 846 return -ENOMEM; 847 848 delta += cnt - 1; 849 env->prog = new_prog; 850 insn = new_prog->insnsi + i + delta; 851 continue; 852 } 853 854 switch ((int)env->insn_aux_data[i + delta].ptr_type) { 855 case PTR_TO_CTX: 856 if (!ops->convert_ctx_access) 857 continue; 858 convert_ctx_access = ops->convert_ctx_access; 859 break; 860 case PTR_TO_SOCKET: 861 case PTR_TO_SOCK_COMMON: 862 convert_ctx_access = bpf_sock_convert_ctx_access; 863 break; 864 case PTR_TO_TCP_SOCK: 865 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 866 break; 867 case PTR_TO_XDP_SOCK: 868 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 869 break; 870 case PTR_TO_BTF_ID: 871 case PTR_TO_BTF_ID | PTR_UNTRUSTED: 872 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike 873 * PTR_TO_BTF_ID, and an active referenced id, but the same cannot 874 * be said once it is marked PTR_UNTRUSTED, hence we must handle 875 * any faults for loads into such types. BPF_WRITE is disallowed 876 * for this case. 877 */ 878 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: 879 case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED: 880 if (type == BPF_READ) { 881 if (BPF_MODE(insn->code) == BPF_MEM) 882 insn->code = BPF_LDX | BPF_PROBE_MEM | 883 BPF_SIZE((insn)->code); 884 else 885 insn->code = BPF_LDX | BPF_PROBE_MEMSX | 886 BPF_SIZE((insn)->code); 887 env->prog->aux->num_exentries++; 888 } 889 continue; 890 case PTR_TO_ARENA: 891 if (BPF_MODE(insn->code) == BPF_MEMSX) { 892 if (!bpf_jit_supports_insn(insn, true)) { 893 verbose(env, "sign extending loads from arena are not supported yet\n"); 894 return -EOPNOTSUPP; 895 } 896 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code); 897 } else { 898 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); 899 } 900 env->prog->aux->num_exentries++; 901 continue; 902 default: 903 continue; 904 } 905 906 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 907 size = BPF_LDST_BYTES(insn); 908 mode = BPF_MODE(insn->code); 909 910 /* If the read access is a narrower load of the field, 911 * convert to a 4/8-byte load, to minimum program type specific 912 * convert_ctx_access changes. If conversion is successful, 913 * we will apply proper mask to the result. 914 */ 915 is_narrower_load = size < ctx_field_size; 916 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 917 off = insn->off; 918 if (is_narrower_load) { 919 u8 size_code; 920 921 if (type == BPF_WRITE) { 922 verifier_bug(env, "narrow ctx access misconfigured"); 923 return -EFAULT; 924 } 925 926 size_code = BPF_H; 927 if (ctx_field_size == 4) 928 size_code = BPF_W; 929 else if (ctx_field_size == 8) 930 size_code = BPF_DW; 931 932 insn->off = off & ~(size_default - 1); 933 insn->code = BPF_LDX | BPF_MEM | size_code; 934 } 935 936 target_size = 0; 937 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 938 &target_size); 939 if (cnt == 0 || cnt >= INSN_BUF_SIZE || 940 (ctx_field_size && !target_size)) { 941 verifier_bug(env, "error during ctx access conversion (%d)", cnt); 942 return -EFAULT; 943 } 944 945 if (is_narrower_load && size < target_size) { 946 u8 shift = bpf_ctx_narrow_access_offset( 947 off, size, size_default) * 8; 948 if (shift && cnt + 1 >= INSN_BUF_SIZE) { 949 verifier_bug(env, "narrow ctx load misconfigured"); 950 return -EFAULT; 951 } 952 if (ctx_field_size <= 4) { 953 if (shift) 954 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 955 insn->dst_reg, 956 shift); 957 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 958 (1 << size * 8) - 1); 959 } else { 960 if (shift) 961 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 962 insn->dst_reg, 963 shift); 964 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 965 (1ULL << size * 8) - 1); 966 } 967 } 968 if (mode == BPF_MEMSX) 969 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X, 970 insn->dst_reg, insn->dst_reg, 971 size * 8, 0); 972 973 patch_insn_buf: 974 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 975 if (!new_prog) 976 return -ENOMEM; 977 978 delta += cnt - 1; 979 980 /* keep walking new program and skip insns we just inserted */ 981 env->prog = new_prog; 982 insn = new_prog->insnsi + i + delta; 983 } 984 985 return 0; 986 } 987 988 static u32 *bpf_dup_subprog_starts(struct bpf_verifier_env *env) 989 { 990 u32 *starts = NULL; 991 992 starts = kvmalloc_objs(u32, env->subprog_cnt, GFP_KERNEL_ACCOUNT); 993 if (starts) { 994 for (int i = 0; i < env->subprog_cnt; i++) 995 starts[i] = env->subprog_info[i].start; 996 } 997 return starts; 998 } 999 1000 static void bpf_restore_subprog_starts(struct bpf_verifier_env *env, u32 *orig_starts) 1001 { 1002 for (int i = 0; i < env->subprog_cnt; i++) 1003 env->subprog_info[i].start = orig_starts[i]; 1004 /* restore the start of fake 'exit' subprog as well */ 1005 env->subprog_info[env->subprog_cnt].start = env->prog->len; 1006 } 1007 1008 struct bpf_insn_aux_data *bpf_dup_insn_aux_data(struct bpf_verifier_env *env) 1009 { 1010 size_t size; 1011 void *new_aux; 1012 1013 size = array_size(sizeof(struct bpf_insn_aux_data), env->prog->len); 1014 new_aux = __vmalloc(size, GFP_KERNEL_ACCOUNT); 1015 if (new_aux) 1016 memcpy(new_aux, env->insn_aux_data, size); 1017 return new_aux; 1018 } 1019 1020 void bpf_restore_insn_aux_data(struct bpf_verifier_env *env, 1021 struct bpf_insn_aux_data *orig_insn_aux) 1022 { 1023 /* the expanded elements are zero-filled, so no special handling is required */ 1024 vfree(env->insn_aux_data); 1025 env->insn_aux_data = orig_insn_aux; 1026 } 1027 1028 static int jit_subprogs(struct bpf_verifier_env *env) 1029 { 1030 struct bpf_prog *prog = env->prog, **func, *tmp; 1031 int i, j, subprog_start, subprog_end = 0, len, subprog; 1032 struct bpf_map *map_ptr; 1033 struct bpf_insn *insn; 1034 void *old_bpf_func; 1035 int err, num_exentries; 1036 1037 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1038 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) 1039 continue; 1040 1041 /* Upon error here we cannot fall back to interpreter but 1042 * need a hard reject of the program. Thus -EFAULT is 1043 * propagated in any case. 1044 */ 1045 subprog = bpf_find_subprog(env, i + insn->imm + 1); 1046 if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d", 1047 i + insn->imm + 1)) 1048 return -EFAULT; 1049 /* temporarily remember subprog id inside insn instead of 1050 * aux_data, since next loop will split up all insns into funcs 1051 */ 1052 insn->off = subprog; 1053 /* remember original imm in case JIT fails and fallback 1054 * to interpreter will be needed 1055 */ 1056 env->insn_aux_data[i].call_imm = insn->imm; 1057 /* point imm to __bpf_call_base+1 from JITs point of view */ 1058 insn->imm = 1; 1059 if (bpf_pseudo_func(insn)) { 1060 #if defined(MODULES_VADDR) 1061 u64 addr = MODULES_VADDR; 1062 #else 1063 u64 addr = VMALLOC_START; 1064 #endif 1065 /* jit (e.g. x86_64) may emit fewer instructions 1066 * if it learns a u32 imm is the same as a u64 imm. 1067 * Set close enough to possible prog address. 1068 */ 1069 insn[0].imm = (u32)addr; 1070 insn[1].imm = addr >> 32; 1071 } 1072 } 1073 1074 err = bpf_prog_alloc_jited_linfo(prog); 1075 if (err) 1076 goto out_undo_insn; 1077 1078 err = -ENOMEM; 1079 func = kzalloc_objs(prog, env->subprog_cnt); 1080 if (!func) 1081 goto out_undo_insn; 1082 1083 for (i = 0; i < env->subprog_cnt; i++) { 1084 subprog_start = subprog_end; 1085 subprog_end = env->subprog_info[i + 1].start; 1086 1087 len = subprog_end - subprog_start; 1088 /* bpf_prog_run() doesn't call subprogs directly, 1089 * hence main prog stats include the runtime of subprogs. 1090 * subprogs don't have IDs and not reachable via prog_get_next_id 1091 * func[i]->stats will never be accessed and stays NULL 1092 */ 1093 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 1094 if (!func[i]) 1095 goto out_free; 1096 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 1097 len * sizeof(struct bpf_insn)); 1098 func[i]->type = prog->type; 1099 func[i]->len = len; 1100 if (bpf_prog_calc_tag(func[i])) 1101 goto out_free; 1102 func[i]->is_func = 1; 1103 func[i]->sleepable = prog->sleepable; 1104 func[i]->blinded = prog->blinded; 1105 func[i]->aux->func_idx = i; 1106 /* Below members will be freed only at prog->aux */ 1107 func[i]->aux->btf = prog->aux->btf; 1108 func[i]->aux->subprog_start = subprog_start; 1109 func[i]->aux->func_info = prog->aux->func_info; 1110 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; 1111 func[i]->aux->poke_tab = prog->aux->poke_tab; 1112 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; 1113 func[i]->aux->main_prog_aux = prog->aux; 1114 1115 for (j = 0; j < prog->aux->size_poke_tab; j++) { 1116 struct bpf_jit_poke_descriptor *poke; 1117 1118 poke = &prog->aux->poke_tab[j]; 1119 if (poke->insn_idx < subprog_end && 1120 poke->insn_idx >= subprog_start) 1121 poke->aux = func[i]->aux; 1122 } 1123 1124 func[i]->aux->name[0] = 'F'; 1125 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 1126 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) 1127 func[i]->aux->jits_use_priv_stack = true; 1128 1129 func[i]->jit_requested = 1; 1130 func[i]->blinding_requested = prog->blinding_requested; 1131 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; 1132 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; 1133 func[i]->aux->linfo = prog->aux->linfo; 1134 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 1135 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 1136 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 1137 func[i]->aux->arena = prog->aux->arena; 1138 func[i]->aux->used_maps = env->used_maps; 1139 func[i]->aux->used_map_cnt = env->used_map_cnt; 1140 num_exentries = 0; 1141 insn = func[i]->insnsi; 1142 for (j = 0; j < func[i]->len; j++, insn++) { 1143 if (BPF_CLASS(insn->code) == BPF_LDX && 1144 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 1145 BPF_MODE(insn->code) == BPF_PROBE_MEM32 || 1146 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX || 1147 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) 1148 num_exentries++; 1149 if ((BPF_CLASS(insn->code) == BPF_STX || 1150 BPF_CLASS(insn->code) == BPF_ST) && 1151 BPF_MODE(insn->code) == BPF_PROBE_MEM32) 1152 num_exentries++; 1153 if (BPF_CLASS(insn->code) == BPF_STX && 1154 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) 1155 num_exentries++; 1156 } 1157 func[i]->aux->num_exentries = num_exentries; 1158 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; 1159 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; 1160 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; 1161 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; 1162 func[i]->aux->token = prog->aux->token; 1163 if (!i) 1164 func[i]->aux->exception_boundary = env->seen_exception; 1165 func[i] = bpf_int_jit_compile(env, func[i]); 1166 if (!func[i]->jited) { 1167 err = -ENOTSUPP; 1168 goto out_free; 1169 } 1170 cond_resched(); 1171 } 1172 1173 /* at this point all bpf functions were successfully JITed 1174 * now populate all bpf_calls with correct addresses and 1175 * run last pass of JIT 1176 */ 1177 for (i = 0; i < env->subprog_cnt; i++) { 1178 insn = func[i]->insnsi; 1179 for (j = 0; j < func[i]->len; j++, insn++) { 1180 if (bpf_pseudo_func(insn)) { 1181 subprog = insn->off; 1182 insn[0].imm = (u32)(long)func[subprog]->bpf_func; 1183 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; 1184 continue; 1185 } 1186 if (!bpf_pseudo_call(insn)) 1187 continue; 1188 subprog = insn->off; 1189 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func); 1190 } 1191 1192 /* we use the aux data to keep a list of the start addresses 1193 * of the JITed images for each function in the program 1194 * 1195 * for some architectures, such as powerpc64, the imm field 1196 * might not be large enough to hold the offset of the start 1197 * address of the callee's JITed image from __bpf_call_base 1198 * 1199 * in such cases, we can lookup the start address of a callee 1200 * by using its subprog id, available from the off field of 1201 * the call instruction, as an index for this list 1202 */ 1203 func[i]->aux->func = func; 1204 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 1205 func[i]->aux->real_func_cnt = env->subprog_cnt; 1206 } 1207 for (i = 0; i < env->subprog_cnt; i++) { 1208 old_bpf_func = func[i]->bpf_func; 1209 tmp = bpf_int_jit_compile(env, func[i]); 1210 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 1211 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 1212 err = -ENOTSUPP; 1213 goto out_free; 1214 } 1215 cond_resched(); 1216 } 1217 1218 /* 1219 * Cleanup func[i]->aux fields which aren't required 1220 * or can become invalid in future 1221 */ 1222 for (i = 0; i < env->subprog_cnt; i++) { 1223 func[i]->aux->used_maps = NULL; 1224 func[i]->aux->used_map_cnt = 0; 1225 } 1226 1227 /* finally lock prog and jit images for all functions and 1228 * populate kallsysm. Begin at the first subprogram, since 1229 * bpf_prog_load will add the kallsyms for the main program. 1230 */ 1231 for (i = 1; i < env->subprog_cnt; i++) { 1232 err = bpf_prog_lock_ro(func[i]); 1233 if (err) 1234 goto out_free; 1235 } 1236 1237 for (i = 1; i < env->subprog_cnt; i++) 1238 bpf_prog_kallsyms_add(func[i]); 1239 1240 /* Last step: make now unused interpreter insns from main 1241 * prog consistent for later dump requests, so they can 1242 * later look the same as if they were interpreted only. 1243 */ 1244 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1245 if (bpf_pseudo_func(insn)) { 1246 insn[0].imm = env->insn_aux_data[i].call_imm; 1247 insn[1].imm = insn->off; 1248 insn->off = 0; 1249 continue; 1250 } 1251 if (!bpf_pseudo_call(insn)) 1252 continue; 1253 insn->imm = env->insn_aux_data[i].call_imm; 1254 subprog = bpf_find_subprog(env, i + insn->imm + 1); 1255 insn->off = subprog; 1256 } 1257 1258 prog->jited = 1; 1259 prog->bpf_func = func[0]->bpf_func; 1260 prog->jited_len = func[0]->jited_len; 1261 prog->aux->extable = func[0]->aux->extable; 1262 prog->aux->num_exentries = func[0]->aux->num_exentries; 1263 prog->aux->func = func; 1264 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 1265 prog->aux->real_func_cnt = env->subprog_cnt; 1266 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func; 1267 prog->aux->exception_boundary = func[0]->aux->exception_boundary; 1268 prog->aux->stack_arg_sp_adjust = func[0]->aux->stack_arg_sp_adjust; 1269 bpf_prog_jit_attempt_done(prog); 1270 return 0; 1271 out_free: 1272 /* We failed JIT'ing, so at this point we need to unregister poke 1273 * descriptors from subprogs, so that kernel is not attempting to 1274 * patch it anymore as we're freeing the subprog JIT memory. 1275 */ 1276 for (i = 0; i < prog->aux->size_poke_tab; i++) { 1277 map_ptr = prog->aux->poke_tab[i].tail_call.map; 1278 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); 1279 } 1280 /* At this point we're guaranteed that poke descriptors are not 1281 * live anymore. We can just unlink its descriptor table as it's 1282 * released with the main prog. 1283 */ 1284 for (i = 0; i < env->subprog_cnt; i++) { 1285 if (!func[i]) 1286 continue; 1287 func[i]->aux->poke_tab = NULL; 1288 bpf_jit_free(func[i]); 1289 } 1290 kfree(func); 1291 out_undo_insn: 1292 bpf_prog_jit_attempt_done(prog); 1293 return err; 1294 } 1295 1296 int bpf_jit_subprogs(struct bpf_verifier_env *env) 1297 { 1298 int err, i; 1299 bool blinded = false; 1300 struct bpf_insn *insn; 1301 struct bpf_prog *prog, *orig_prog; 1302 struct bpf_insn_aux_data *orig_insn_aux; 1303 u32 *orig_subprog_starts; 1304 1305 if (env->subprog_cnt <= 1) 1306 return 0; 1307 1308 prog = orig_prog = env->prog; 1309 if (bpf_prog_need_blind(prog)) { 1310 orig_insn_aux = bpf_dup_insn_aux_data(env); 1311 if (!orig_insn_aux) { 1312 err = -ENOMEM; 1313 goto out_cleanup; 1314 } 1315 orig_subprog_starts = bpf_dup_subprog_starts(env); 1316 if (!orig_subprog_starts) { 1317 vfree(orig_insn_aux); 1318 err = -ENOMEM; 1319 goto out_cleanup; 1320 } 1321 prog = bpf_jit_blind_constants(env, prog); 1322 if (IS_ERR(prog)) { 1323 err = -ENOMEM; 1324 prog = orig_prog; 1325 goto out_restore; 1326 } 1327 blinded = true; 1328 } 1329 1330 err = jit_subprogs(env); 1331 if (err) 1332 goto out_jit_err; 1333 1334 if (blinded) { 1335 bpf_jit_prog_release_other(prog, orig_prog); 1336 kvfree(orig_subprog_starts); 1337 vfree(orig_insn_aux); 1338 } 1339 1340 return 0; 1341 1342 out_jit_err: 1343 if (blinded) { 1344 bpf_jit_prog_release_other(orig_prog, prog); 1345 /* roll back to the clean original prog */ 1346 prog = env->prog = orig_prog; 1347 goto out_restore; 1348 } else { 1349 if (err != -EFAULT) { 1350 /* 1351 * We will fall back to interpreter mode when err is not -EFAULT, before 1352 * that, insn->off and insn->imm should be restored to their original 1353 * values since they were modified by jit_subprogs. 1354 */ 1355 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 1356 if (!bpf_pseudo_call(insn)) 1357 continue; 1358 insn->off = 0; 1359 insn->imm = env->insn_aux_data[i].call_imm; 1360 } 1361 } 1362 goto out_cleanup; 1363 } 1364 1365 out_restore: 1366 bpf_restore_subprog_starts(env, orig_subprog_starts); 1367 bpf_restore_insn_aux_data(env, orig_insn_aux); 1368 kvfree(orig_subprog_starts); 1369 out_cleanup: 1370 /* cleanup main prog to be interpreted */ 1371 prog->jit_requested = 0; 1372 prog->blinding_requested = 0; 1373 return err; 1374 } 1375 1376 int bpf_fixup_call_args(struct bpf_verifier_env *env) 1377 { 1378 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 1379 struct bpf_prog *prog = env->prog; 1380 struct bpf_insn *insn = prog->insnsi; 1381 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); 1382 int depth; 1383 #endif 1384 int i, err = 0; 1385 1386 for (i = 0; i < env->subprog_cnt; i++) { 1387 struct bpf_subprog_info *subprog = &env->subprog_info[i]; 1388 u16 outgoing = subprog->stack_arg_cnt - bpf_in_stack_arg_cnt(subprog); 1389 1390 if (subprog->max_out_stack_arg_cnt > outgoing) { 1391 verbose(env, 1392 "func#%d writes %u stack arg slots, but calls only require %u\n", 1393 i, subprog->max_out_stack_arg_cnt, outgoing); 1394 return -EINVAL; 1395 } 1396 } 1397 1398 if (env->prog->jit_requested && 1399 !bpf_prog_is_offloaded(env->prog->aux)) { 1400 err = bpf_jit_subprogs(env); 1401 if (err == 0) 1402 return 0; 1403 if (err == -EFAULT) 1404 return err; 1405 } 1406 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 1407 if (has_kfunc_call) { 1408 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); 1409 return -EINVAL; 1410 } 1411 for (i = 0; i < env->subprog_cnt; i++) { 1412 if (bpf_in_stack_arg_cnt(&env->subprog_info[i])) { 1413 verbose(env, "stack args are not supported in non-JITed programs\n"); 1414 return -EINVAL; 1415 } 1416 } 1417 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { 1418 /* When JIT fails the progs with bpf2bpf calls and tail_calls 1419 * have to be rejected, since interpreter doesn't support them yet. 1420 */ 1421 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); 1422 return -EINVAL; 1423 } 1424 for (i = 0; i < prog->len; i++, insn++) { 1425 if (bpf_pseudo_func(insn)) { 1426 /* When JIT fails the progs with callback calls 1427 * have to be rejected, since interpreter doesn't support them yet. 1428 */ 1429 verbose(env, "callbacks are not allowed in non-JITed programs\n"); 1430 return -EINVAL; 1431 } 1432 1433 if (!bpf_pseudo_call(insn)) 1434 continue; 1435 depth = get_callee_stack_depth(env, insn, i); 1436 if (depth < 0) 1437 return depth; 1438 err = bpf_patch_call_args(insn, depth); 1439 if (err) { 1440 verbose(env, "stack depth %d exceeds interpreter stack depth limit\n", 1441 depth); 1442 return err; 1443 } 1444 } 1445 err = 0; 1446 #endif 1447 return err; 1448 } 1449 1450 1451 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ 1452 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) 1453 { 1454 struct bpf_subprog_info *info = env->subprog_info; 1455 int cnt = env->subprog_cnt; 1456 struct bpf_prog *prog; 1457 1458 /* We only reserve one slot for hidden subprogs in subprog_info. */ 1459 if (env->hidden_subprog_cnt) { 1460 verifier_bug(env, "only one hidden subprog supported"); 1461 return -EFAULT; 1462 } 1463 /* We're not patching any existing instruction, just appending the new 1464 * ones for the hidden subprog. Hence all of the adjustment operations 1465 * in bpf_patch_insn_data are no-ops. 1466 */ 1467 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len); 1468 if (!prog) 1469 return -ENOMEM; 1470 env->prog = prog; 1471 info[cnt + 1].start = info[cnt].start; 1472 info[cnt].start = prog->len - len + 1; 1473 env->subprog_cnt++; 1474 env->hidden_subprog_cnt++; 1475 return 0; 1476 } 1477 1478 /* Do various post-verification rewrites in a single program pass. 1479 * These rewrites simplify JIT and interpreter implementations. 1480 */ 1481 int bpf_do_misc_fixups(struct bpf_verifier_env *env) 1482 { 1483 struct bpf_prog *prog = env->prog; 1484 enum bpf_attach_type eatype = prog->expected_attach_type; 1485 enum bpf_prog_type prog_type = resolve_prog_type(prog); 1486 struct bpf_insn *insn = prog->insnsi; 1487 const struct bpf_func_proto *fn; 1488 const int insn_cnt = prog->len; 1489 const struct bpf_map_ops *ops; 1490 struct bpf_insn_aux_data *aux; 1491 struct bpf_insn *insn_buf = env->insn_buf; 1492 struct bpf_prog *new_prog; 1493 struct bpf_map *map_ptr; 1494 int i, ret, cnt, delta = 0, cur_subprog = 0; 1495 struct bpf_subprog_info *subprogs = env->subprog_info; 1496 u16 stack_depth = subprogs[cur_subprog].stack_depth; 1497 u16 stack_depth_extra = 0; 1498 1499 if (env->seen_exception && !env->exception_callback_subprog) { 1500 struct bpf_insn *patch = insn_buf; 1501 1502 *patch++ = env->prog->insnsi[insn_cnt - 1]; 1503 *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); 1504 *patch++ = BPF_EXIT_INSN(); 1505 ret = add_hidden_subprog(env, insn_buf, patch - insn_buf); 1506 if (ret < 0) 1507 return ret; 1508 prog = env->prog; 1509 insn = prog->insnsi; 1510 1511 env->exception_callback_subprog = env->subprog_cnt - 1; 1512 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */ 1513 bpf_mark_subprog_exc_cb(env, env->exception_callback_subprog); 1514 } 1515 1516 for (i = 0; i < insn_cnt;) { 1517 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { 1518 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || 1519 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { 1520 /* convert to 32-bit mov that clears upper 32-bit */ 1521 insn->code = BPF_ALU | BPF_MOV | BPF_X; 1522 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ 1523 insn->off = 0; 1524 insn->imm = 0; 1525 } /* cast from as(0) to as(1) should be handled by JIT */ 1526 goto next_insn; 1527 } 1528 1529 if (env->insn_aux_data[i + delta].needs_zext) 1530 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ 1531 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code); 1532 1533 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ 1534 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || 1535 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || 1536 insn->code == (BPF_ALU | BPF_MOD | BPF_K) || 1537 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && 1538 insn->off == 1 && insn->imm == -1) { 1539 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 1540 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 1541 struct bpf_insn *patch = insn_buf; 1542 1543 if (isdiv) 1544 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1545 BPF_NEG | BPF_K, insn->dst_reg, 1546 0, 0, 0); 1547 else 1548 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 1549 1550 cnt = patch - insn_buf; 1551 1552 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1553 if (!new_prog) 1554 return -ENOMEM; 1555 1556 delta += cnt - 1; 1557 env->prog = prog = new_prog; 1558 insn = new_prog->insnsi + i + delta; 1559 goto next_insn; 1560 } 1561 1562 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ 1563 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 1564 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 1565 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 1566 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 1567 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 1568 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 1569 bool is_sdiv = isdiv && insn->off == 1; 1570 bool is_smod = !isdiv && insn->off == 1; 1571 struct bpf_insn *patch = insn_buf; 1572 1573 if (is_sdiv) { 1574 /* [R,W]x sdiv 0 -> 0 1575 * LLONG_MIN sdiv -1 -> LLONG_MIN 1576 * INT_MIN sdiv -1 -> INT_MIN 1577 */ 1578 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1579 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1580 BPF_ADD | BPF_K, BPF_REG_AX, 1581 0, 0, 1); 1582 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1583 BPF_JGT | BPF_K, BPF_REG_AX, 1584 0, 4, 1); 1585 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1586 BPF_JEQ | BPF_K, BPF_REG_AX, 1587 0, 1, 0); 1588 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1589 BPF_MOV | BPF_K, insn->dst_reg, 1590 0, 0, 0); 1591 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ 1592 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1593 BPF_NEG | BPF_K, insn->dst_reg, 1594 0, 0, 0); 1595 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1596 *patch++ = *insn; 1597 cnt = patch - insn_buf; 1598 } else if (is_smod) { 1599 /* [R,W]x mod 0 -> [R,W]x */ 1600 /* [R,W]x mod -1 -> 0 */ 1601 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1602 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 1603 BPF_ADD | BPF_K, BPF_REG_AX, 1604 0, 0, 1); 1605 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1606 BPF_JGT | BPF_K, BPF_REG_AX, 1607 0, 3, 1); 1608 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1609 BPF_JEQ | BPF_K, BPF_REG_AX, 1610 0, 3 + (is64 ? 0 : 1), 1); 1611 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 1612 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1613 *patch++ = *insn; 1614 1615 if (!is64) { 1616 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1617 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 1618 } 1619 cnt = patch - insn_buf; 1620 } else if (isdiv) { 1621 /* [R,W]x div 0 -> 0 */ 1622 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1623 BPF_JNE | BPF_K, insn->src_reg, 1624 0, 2, 0); 1625 *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg); 1626 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1627 *patch++ = *insn; 1628 cnt = patch - insn_buf; 1629 } else { 1630 /* [R,W]x mod 0 -> [R,W]x */ 1631 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 1632 BPF_JEQ | BPF_K, insn->src_reg, 1633 0, 1 + (is64 ? 0 : 1), 0); 1634 *patch++ = *insn; 1635 1636 if (!is64) { 1637 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1638 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 1639 } 1640 cnt = patch - insn_buf; 1641 } 1642 1643 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1644 if (!new_prog) 1645 return -ENOMEM; 1646 1647 delta += cnt - 1; 1648 env->prog = prog = new_prog; 1649 insn = new_prog->insnsi + i + delta; 1650 goto next_insn; 1651 } 1652 1653 /* Make it impossible to de-reference a userspace address */ 1654 if (BPF_CLASS(insn->code) == BPF_LDX && 1655 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 1656 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { 1657 struct bpf_insn *patch = insn_buf; 1658 u64 uaddress_limit = bpf_arch_uaddress_limit(); 1659 1660 if (!uaddress_limit) 1661 goto next_insn; 1662 1663 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 1664 if (insn->off) 1665 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); 1666 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); 1667 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); 1668 *patch++ = *insn; 1669 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1670 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); 1671 1672 cnt = patch - insn_buf; 1673 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1674 if (!new_prog) 1675 return -ENOMEM; 1676 1677 delta += cnt - 1; 1678 env->prog = prog = new_prog; 1679 insn = new_prog->insnsi + i + delta; 1680 goto next_insn; 1681 } 1682 1683 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ 1684 if (BPF_CLASS(insn->code) == BPF_LD && 1685 (BPF_MODE(insn->code) == BPF_ABS || 1686 BPF_MODE(insn->code) == BPF_IND)) { 1687 cnt = env->ops->gen_ld_abs(insn, insn_buf); 1688 if (cnt == 0 || cnt >= INSN_BUF_SIZE) { 1689 verifier_bug(env, "%d insns generated for ld_abs", cnt); 1690 return -EFAULT; 1691 } 1692 1693 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1694 if (!new_prog) 1695 return -ENOMEM; 1696 1697 delta += cnt - 1; 1698 env->prog = prog = new_prog; 1699 insn = new_prog->insnsi + i + delta; 1700 goto next_insn; 1701 } 1702 1703 /* Rewrite pointer arithmetic to mitigate speculation attacks. */ 1704 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 1705 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 1706 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 1707 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 1708 struct bpf_insn *patch = insn_buf; 1709 bool issrc, isneg, isimm; 1710 u32 off_reg; 1711 1712 aux = &env->insn_aux_data[i + delta]; 1713 if (!aux->alu_state || 1714 aux->alu_state == BPF_ALU_NON_POINTER) 1715 goto next_insn; 1716 1717 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 1718 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 1719 BPF_ALU_SANITIZE_SRC; 1720 isimm = aux->alu_state & BPF_ALU_IMMEDIATE; 1721 1722 off_reg = issrc ? insn->src_reg : insn->dst_reg; 1723 if (isimm) { 1724 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 1725 } else { 1726 if (isneg) 1727 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 1728 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 1729 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 1730 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 1731 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 1732 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 1733 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg); 1734 } 1735 if (!issrc) 1736 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg); 1737 insn->src_reg = BPF_REG_AX; 1738 if (isneg) 1739 insn->code = insn->code == code_add ? 1740 code_sub : code_add; 1741 *patch++ = *insn; 1742 if (issrc && isneg && !isimm) 1743 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 1744 cnt = patch - insn_buf; 1745 1746 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1747 if (!new_prog) 1748 return -ENOMEM; 1749 1750 delta += cnt - 1; 1751 env->prog = prog = new_prog; 1752 insn = new_prog->insnsi + i + delta; 1753 goto next_insn; 1754 } 1755 1756 if (bpf_is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) { 1757 int stack_off_cnt = -stack_depth - 16; 1758 1759 /* 1760 * Two 8 byte slots, depth-16 stores the count, and 1761 * depth-8 stores the start timestamp of the loop. 1762 * 1763 * The starting value of count is BPF_MAX_TIMED_LOOPS 1764 * (0xffff). Every iteration loads it and subs it by 1, 1765 * until the value becomes 0 in AX (thus, 1 in stack), 1766 * after which we call arch_bpf_timed_may_goto, which 1767 * either sets AX to 0xffff to keep looping, or to 0 1768 * upon timeout. AX is then stored into the stack. In 1769 * the next iteration, we either see 0 and break out, or 1770 * continue iterating until the next time value is 0 1771 * after subtraction, rinse and repeat. 1772 */ 1773 stack_depth_extra = 16; 1774 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt); 1775 if (insn->off >= 0) 1776 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5); 1777 else 1778 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 1779 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 1780 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2); 1781 /* 1782 * AX is used as an argument to pass in stack_off_cnt 1783 * (to add to r10/fp), and also as the return value of 1784 * the call to arch_bpf_timed_may_goto. 1785 */ 1786 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt); 1787 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto); 1788 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt); 1789 cnt = 7; 1790 1791 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1792 if (!new_prog) 1793 return -ENOMEM; 1794 1795 delta += cnt - 1; 1796 env->prog = prog = new_prog; 1797 insn = new_prog->insnsi + i + delta; 1798 goto next_insn; 1799 } else if (bpf_is_may_goto_insn(insn)) { 1800 int stack_off = -stack_depth - 8; 1801 1802 stack_depth_extra = 8; 1803 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); 1804 if (insn->off >= 0) 1805 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); 1806 else 1807 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 1808 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 1809 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); 1810 cnt = 4; 1811 1812 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1813 if (!new_prog) 1814 return -ENOMEM; 1815 1816 delta += cnt - 1; 1817 env->prog = prog = new_prog; 1818 insn = new_prog->insnsi + i + delta; 1819 goto next_insn; 1820 } 1821 1822 if (insn->code != (BPF_JMP | BPF_CALL)) 1823 goto next_insn; 1824 if (insn->src_reg == BPF_PSEUDO_CALL) 1825 goto next_insn; 1826 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1827 ret = bpf_fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); 1828 if (ret) 1829 return ret; 1830 if (cnt == 0) 1831 goto next_insn; 1832 1833 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1834 if (!new_prog) 1835 return -ENOMEM; 1836 1837 delta += cnt - 1; 1838 env->prog = prog = new_prog; 1839 insn = new_prog->insnsi + i + delta; 1840 goto next_insn; 1841 } 1842 1843 /* Skip inlining the helper call if the JIT does it. */ 1844 if (bpf_jit_inlines_helper_call(insn->imm)) 1845 goto next_insn; 1846 1847 if (insn->imm == BPF_FUNC_get_route_realm) 1848 prog->dst_needed = 1; 1849 if (insn->imm == BPF_FUNC_get_prandom_u32) 1850 bpf_user_rnd_init_once(); 1851 if (insn->imm == BPF_FUNC_override_return) 1852 prog->kprobe_override = 1; 1853 if (insn->imm == BPF_FUNC_tail_call) { 1854 /* If we tail call into other programs, we 1855 * cannot make any assumptions since they can 1856 * be replaced dynamically during runtime in 1857 * the program array. 1858 */ 1859 prog->cb_access = 1; 1860 if (!bpf_allow_tail_call_in_subprogs(env)) 1861 prog->aux->stack_depth = MAX_BPF_STACK; 1862 prog->aux->max_pkt_offset = MAX_PACKET_OFF; 1863 1864 /* mark bpf_tail_call as different opcode to avoid 1865 * conditional branch in the interpreter for every normal 1866 * call and to prevent accidental JITing by JIT compiler 1867 * that doesn't support bpf_tail_call yet 1868 */ 1869 insn->imm = 0; 1870 insn->code = BPF_JMP | BPF_TAIL_CALL; 1871 1872 aux = &env->insn_aux_data[i + delta]; 1873 if (env->bpf_capable && !prog->blinding_requested && 1874 prog->jit_requested && 1875 !bpf_map_key_poisoned(aux) && 1876 !bpf_map_ptr_poisoned(aux) && 1877 !bpf_map_ptr_unpriv(aux)) { 1878 struct bpf_jit_poke_descriptor desc = { 1879 .reason = BPF_POKE_REASON_TAIL_CALL, 1880 .tail_call.map = aux->map_ptr_state.map_ptr, 1881 .tail_call.key = bpf_map_key_immediate(aux), 1882 .insn_idx = i + delta, 1883 }; 1884 1885 ret = bpf_jit_add_poke_descriptor(prog, &desc); 1886 if (ret < 0) { 1887 verbose(env, "adding tail call poke descriptor failed\n"); 1888 return ret; 1889 } 1890 1891 insn->imm = ret + 1; 1892 goto next_insn; 1893 } 1894 1895 if (!bpf_map_ptr_unpriv(aux)) 1896 goto next_insn; 1897 1898 /* instead of changing every JIT dealing with tail_call 1899 * emit two extra insns: 1900 * if (index >= max_entries) goto out; 1901 * index &= array->index_mask; 1902 * to avoid out-of-bounds cpu speculation 1903 */ 1904 if (bpf_map_ptr_poisoned(aux)) { 1905 verbose(env, "tail_call abusing map_ptr\n"); 1906 return -EINVAL; 1907 } 1908 1909 map_ptr = aux->map_ptr_state.map_ptr; 1910 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 1911 map_ptr->max_entries, 2); 1912 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 1913 container_of(map_ptr, 1914 struct bpf_array, 1915 map)->index_mask); 1916 insn_buf[2] = *insn; 1917 cnt = 3; 1918 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1919 if (!new_prog) 1920 return -ENOMEM; 1921 1922 delta += cnt - 1; 1923 env->prog = prog = new_prog; 1924 insn = new_prog->insnsi + i + delta; 1925 goto next_insn; 1926 } 1927 1928 if (insn->imm == BPF_FUNC_timer_set_callback) { 1929 /* The verifier will process callback_fn as many times as necessary 1930 * with different maps and the register states prepared by 1931 * set_timer_callback_state will be accurate. 1932 * 1933 * The following use case is valid: 1934 * map1 is shared by prog1, prog2, prog3. 1935 * prog1 calls bpf_timer_init for some map1 elements 1936 * prog2 calls bpf_timer_set_callback for some map1 elements. 1937 * Those that were not bpf_timer_init-ed will return -EINVAL. 1938 * prog3 calls bpf_timer_start for some map1 elements. 1939 * Those that were not both bpf_timer_init-ed and 1940 * bpf_timer_set_callback-ed will return -EINVAL. 1941 */ 1942 struct bpf_insn ld_addrs[2] = { 1943 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux), 1944 }; 1945 1946 insn_buf[0] = ld_addrs[0]; 1947 insn_buf[1] = ld_addrs[1]; 1948 insn_buf[2] = *insn; 1949 cnt = 3; 1950 1951 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1952 if (!new_prog) 1953 return -ENOMEM; 1954 1955 delta += cnt - 1; 1956 env->prog = prog = new_prog; 1957 insn = new_prog->insnsi + i + delta; 1958 goto patch_call_imm; 1959 } 1960 1961 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */ 1962 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) { 1963 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data, 1964 * bpf_mem_alloc() returns a ptr to the percpu data ptr. 1965 */ 1966 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); 1967 insn_buf[1] = *insn; 1968 cnt = 2; 1969 1970 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 1971 if (!new_prog) 1972 return -ENOMEM; 1973 1974 delta += cnt - 1; 1975 env->prog = prog = new_prog; 1976 insn = new_prog->insnsi + i + delta; 1977 goto patch_call_imm; 1978 } 1979 1980 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 1981 * and other inlining handlers are currently limited to 64 bit 1982 * only. 1983 */ 1984 if (prog->jit_requested && BITS_PER_LONG == 64 && 1985 (insn->imm == BPF_FUNC_map_lookup_elem || 1986 insn->imm == BPF_FUNC_map_update_elem || 1987 insn->imm == BPF_FUNC_map_delete_elem || 1988 insn->imm == BPF_FUNC_map_push_elem || 1989 insn->imm == BPF_FUNC_map_pop_elem || 1990 insn->imm == BPF_FUNC_map_peek_elem || 1991 insn->imm == BPF_FUNC_redirect_map || 1992 insn->imm == BPF_FUNC_for_each_map_elem || 1993 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { 1994 aux = &env->insn_aux_data[i + delta]; 1995 if (bpf_map_ptr_poisoned(aux)) 1996 goto patch_call_imm; 1997 1998 map_ptr = aux->map_ptr_state.map_ptr; 1999 ops = map_ptr->ops; 2000 if (insn->imm == BPF_FUNC_map_lookup_elem && 2001 ops->map_gen_lookup) { 2002 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 2003 if (cnt == -EOPNOTSUPP) 2004 goto patch_map_ops_generic; 2005 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { 2006 verifier_bug(env, "%d insns generated for map lookup", cnt); 2007 return -EFAULT; 2008 } 2009 2010 new_prog = bpf_patch_insn_data(env, i + delta, 2011 insn_buf, cnt); 2012 if (!new_prog) 2013 return -ENOMEM; 2014 2015 delta += cnt - 1; 2016 env->prog = prog = new_prog; 2017 insn = new_prog->insnsi + i + delta; 2018 goto next_insn; 2019 } 2020 2021 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 2022 (void *(*)(struct bpf_map *map, void *key))NULL)); 2023 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 2024 (long (*)(struct bpf_map *map, void *key))NULL)); 2025 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 2026 (long (*)(struct bpf_map *map, void *key, void *value, 2027 u64 flags))NULL)); 2028 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 2029 (long (*)(struct bpf_map *map, void *value, 2030 u64 flags))NULL)); 2031 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 2032 (long (*)(struct bpf_map *map, void *value))NULL)); 2033 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 2034 (long (*)(struct bpf_map *map, void *value))NULL)); 2035 BUILD_BUG_ON(!__same_type(ops->map_redirect, 2036 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); 2037 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, 2038 (long (*)(struct bpf_map *map, 2039 bpf_callback_t callback_fn, 2040 void *callback_ctx, 2041 u64 flags))NULL)); 2042 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, 2043 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); 2044 2045 patch_map_ops_generic: 2046 switch (insn->imm) { 2047 case BPF_FUNC_map_lookup_elem: 2048 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); 2049 goto next_insn; 2050 case BPF_FUNC_map_update_elem: 2051 insn->imm = BPF_CALL_IMM(ops->map_update_elem); 2052 goto next_insn; 2053 case BPF_FUNC_map_delete_elem: 2054 insn->imm = BPF_CALL_IMM(ops->map_delete_elem); 2055 goto next_insn; 2056 case BPF_FUNC_map_push_elem: 2057 insn->imm = BPF_CALL_IMM(ops->map_push_elem); 2058 goto next_insn; 2059 case BPF_FUNC_map_pop_elem: 2060 insn->imm = BPF_CALL_IMM(ops->map_pop_elem); 2061 goto next_insn; 2062 case BPF_FUNC_map_peek_elem: 2063 insn->imm = BPF_CALL_IMM(ops->map_peek_elem); 2064 goto next_insn; 2065 case BPF_FUNC_redirect_map: 2066 insn->imm = BPF_CALL_IMM(ops->map_redirect); 2067 goto next_insn; 2068 case BPF_FUNC_for_each_map_elem: 2069 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); 2070 goto next_insn; 2071 case BPF_FUNC_map_lookup_percpu_elem: 2072 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); 2073 goto next_insn; 2074 } 2075 2076 goto patch_call_imm; 2077 } 2078 2079 /* Implement bpf_jiffies64 inline. */ 2080 if (prog->jit_requested && BITS_PER_LONG == 64 && 2081 insn->imm == BPF_FUNC_jiffies64) { 2082 struct bpf_insn ld_jiffies_addr[2] = { 2083 BPF_LD_IMM64(BPF_REG_0, 2084 (unsigned long)&jiffies), 2085 }; 2086 2087 insn_buf[0] = ld_jiffies_addr[0]; 2088 insn_buf[1] = ld_jiffies_addr[1]; 2089 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 2090 BPF_REG_0, 0); 2091 cnt = 3; 2092 2093 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 2094 cnt); 2095 if (!new_prog) 2096 return -ENOMEM; 2097 2098 delta += cnt - 1; 2099 env->prog = prog = new_prog; 2100 insn = new_prog->insnsi + i + delta; 2101 goto next_insn; 2102 } 2103 2104 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 2105 /* Implement bpf_get_smp_processor_id() inline. */ 2106 if (insn->imm == BPF_FUNC_get_smp_processor_id && 2107 bpf_verifier_inlines_helper_call(env, insn->imm)) { 2108 /* BPF_FUNC_get_smp_processor_id inlining is an 2109 * optimization, so if cpu_number is ever 2110 * changed in some incompatible and hard to support 2111 * way, it's fine to back out this inlining logic 2112 */ 2113 #ifdef CONFIG_SMP 2114 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number); 2115 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2116 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); 2117 cnt = 3; 2118 #else 2119 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); 2120 cnt = 1; 2121 #endif 2122 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2123 if (!new_prog) 2124 return -ENOMEM; 2125 2126 delta += cnt - 1; 2127 env->prog = prog = new_prog; 2128 insn = new_prog->insnsi + i + delta; 2129 goto next_insn; 2130 } 2131 2132 /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */ 2133 if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) && 2134 bpf_verifier_inlines_helper_call(env, insn->imm)) { 2135 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)¤t_task); 2136 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2137 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2138 cnt = 3; 2139 2140 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2141 if (!new_prog) 2142 return -ENOMEM; 2143 2144 delta += cnt - 1; 2145 env->prog = prog = new_prog; 2146 insn = new_prog->insnsi + i + delta; 2147 goto next_insn; 2148 } 2149 #endif 2150 /* Implement bpf_get_func_arg inline. */ 2151 if (prog_type == BPF_PROG_TYPE_TRACING && 2152 insn->imm == BPF_FUNC_get_func_arg) { 2153 if (eatype == BPF_TRACE_RAW_TP) { 2154 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 2155 2156 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 2157 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 2158 cnt = 1; 2159 } else { 2160 /* Load nr_args from ctx - 8 */ 2161 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2162 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2163 cnt = 2; 2164 } 2165 insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); 2166 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); 2167 insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); 2168 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); 2169 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 2170 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0); 2171 insn_buf[cnt++] = BPF_JMP_A(1); 2172 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 2173 2174 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2175 if (!new_prog) 2176 return -ENOMEM; 2177 2178 delta += cnt - 1; 2179 env->prog = prog = new_prog; 2180 insn = new_prog->insnsi + i + delta; 2181 goto next_insn; 2182 } 2183 2184 /* Implement bpf_get_func_ret inline. */ 2185 if (prog_type == BPF_PROG_TYPE_TRACING && 2186 insn->imm == BPF_FUNC_get_func_ret) { 2187 if (eatype == BPF_TRACE_FEXIT || 2188 eatype == BPF_TRACE_FSESSION || 2189 eatype == BPF_TRACE_FEXIT_MULTI || 2190 eatype == BPF_TRACE_FSESSION_MULTI || 2191 eatype == BPF_MODIFY_RETURN) { 2192 /* Load nr_args from ctx - 8 */ 2193 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2194 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2195 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); 2196 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); 2197 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 2198 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); 2199 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); 2200 cnt = 7; 2201 } else { 2202 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); 2203 cnt = 1; 2204 } 2205 2206 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2207 if (!new_prog) 2208 return -ENOMEM; 2209 2210 delta += cnt - 1; 2211 env->prog = prog = new_prog; 2212 insn = new_prog->insnsi + i + delta; 2213 goto next_insn; 2214 } 2215 2216 /* Implement get_func_arg_cnt inline. */ 2217 if (prog_type == BPF_PROG_TYPE_TRACING && 2218 insn->imm == BPF_FUNC_get_func_arg_cnt) { 2219 if (eatype == BPF_TRACE_RAW_TP) { 2220 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 2221 2222 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 2223 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 2224 cnt = 1; 2225 } else { 2226 /* Load nr_args from ctx - 8 */ 2227 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 2228 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 2229 cnt = 2; 2230 } 2231 2232 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2233 if (!new_prog) 2234 return -ENOMEM; 2235 2236 delta += cnt - 1; 2237 env->prog = prog = new_prog; 2238 insn = new_prog->insnsi + i + delta; 2239 goto next_insn; 2240 } 2241 2242 /* Implement bpf_get_func_ip inline. */ 2243 if (prog_type == BPF_PROG_TYPE_TRACING && 2244 insn->imm == BPF_FUNC_get_func_ip) { 2245 /* Load IP address from ctx - 16 */ 2246 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); 2247 2248 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); 2249 if (!new_prog) 2250 return -ENOMEM; 2251 2252 env->prog = prog = new_prog; 2253 insn = new_prog->insnsi + i + delta; 2254 goto next_insn; 2255 } 2256 2257 /* Implement bpf_get_branch_snapshot inline. */ 2258 if (IS_ENABLED(CONFIG_PERF_EVENTS) && 2259 prog->jit_requested && BITS_PER_LONG == 64 && 2260 insn->imm == BPF_FUNC_get_branch_snapshot) { 2261 /* We are dealing with the following func protos: 2262 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags); 2263 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt); 2264 */ 2265 const u32 br_entry_size = sizeof(struct perf_branch_entry); 2266 2267 /* struct perf_branch_entry is part of UAPI and is 2268 * used as an array element, so extremely unlikely to 2269 * ever grow or shrink 2270 */ 2271 BUILD_BUG_ON(br_entry_size != 24); 2272 2273 /* if (unlikely(flags)) return -EINVAL */ 2274 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7); 2275 2276 /* Transform size (bytes) into number of entries (cnt = size / 24). 2277 * But to avoid expensive division instruction, we implement 2278 * divide-by-3 through multiplication, followed by further 2279 * division by 8 through 3-bit right shift. 2280 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr., 2281 * p. 227, chapter "Unsigned Division by 3" for details and proofs. 2282 * 2283 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab. 2284 */ 2285 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab); 2286 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0); 2287 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36); 2288 2289 /* call perf_snapshot_branch_stack implementation */ 2290 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack)); 2291 /* if (entry_cnt == 0) return -ENOENT */ 2292 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4); 2293 /* return entry_cnt * sizeof(struct perf_branch_entry) */ 2294 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size); 2295 insn_buf[7] = BPF_JMP_A(3); 2296 /* return -EINVAL; */ 2297 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 2298 insn_buf[9] = BPF_JMP_A(1); 2299 /* return -ENOENT; */ 2300 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT); 2301 cnt = 11; 2302 2303 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2304 if (!new_prog) 2305 return -ENOMEM; 2306 2307 delta += cnt - 1; 2308 env->prog = prog = new_prog; 2309 insn = new_prog->insnsi + i + delta; 2310 goto next_insn; 2311 } 2312 2313 /* Implement bpf_kptr_xchg inline */ 2314 if (prog->jit_requested && BITS_PER_LONG == 64 && 2315 insn->imm == BPF_FUNC_kptr_xchg && 2316 bpf_jit_supports_ptr_xchg()) { 2317 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); 2318 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); 2319 cnt = 2; 2320 2321 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 2322 if (!new_prog) 2323 return -ENOMEM; 2324 2325 delta += cnt - 1; 2326 env->prog = prog = new_prog; 2327 insn = new_prog->insnsi + i + delta; 2328 goto next_insn; 2329 } 2330 patch_call_imm: 2331 fn = env->ops->get_func_proto(insn->imm, env->prog); 2332 /* all functions that have prototype and verifier allowed 2333 * programs to call them, must be real in-kernel functions 2334 */ 2335 if (!fn->func) { 2336 verifier_bug(env, 2337 "not inlined functions %s#%d is missing func", 2338 func_id_name(insn->imm), insn->imm); 2339 return -EFAULT; 2340 } 2341 insn->imm = fn->func - __bpf_call_base; 2342 next_insn: 2343 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 2344 subprogs[cur_subprog].stack_depth += stack_depth_extra; 2345 subprogs[cur_subprog].stack_extra = stack_depth_extra; 2346 2347 stack_depth = subprogs[cur_subprog].stack_depth; 2348 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) { 2349 verbose(env, "stack size %d(extra %d) is too large\n", 2350 stack_depth, stack_depth_extra); 2351 return -EINVAL; 2352 } 2353 cur_subprog++; 2354 stack_depth = subprogs[cur_subprog].stack_depth; 2355 stack_depth_extra = 0; 2356 } 2357 i++; 2358 insn++; 2359 } 2360 2361 env->prog->aux->stack_depth = subprogs[0].stack_depth; 2362 for (i = 0; i < env->subprog_cnt; i++) { 2363 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1; 2364 int subprog_start = subprogs[i].start; 2365 int stack_slots = subprogs[i].stack_extra / 8; 2366 int slots = delta, cnt = 0; 2367 2368 if (!stack_slots) 2369 continue; 2370 /* We need two slots in case timed may_goto is supported. */ 2371 if (stack_slots > slots) { 2372 verifier_bug(env, "stack_slots supports may_goto only"); 2373 return -EFAULT; 2374 } 2375 2376 stack_depth = subprogs[i].stack_depth; 2377 if (bpf_jit_supports_timed_may_goto()) { 2378 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 2379 BPF_MAX_TIMED_LOOPS); 2380 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0); 2381 } else { 2382 /* Add ST insn to subprog prologue to init extra stack */ 2383 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 2384 BPF_MAX_LOOPS); 2385 } 2386 /* Copy first actual insn to preserve it */ 2387 insn_buf[cnt++] = env->prog->insnsi[subprog_start]; 2388 2389 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt); 2390 if (!new_prog) 2391 return -ENOMEM; 2392 env->prog = prog = new_prog; 2393 /* 2394 * If may_goto is a first insn of a prog there could be a jmp 2395 * insn that points to it, hence adjust all such jmps to point 2396 * to insn after BPF_ST that inits may_goto count. 2397 * Adjustment will succeed because bpf_patch_insn_data() didn't fail. 2398 */ 2399 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta)); 2400 } 2401 2402 /* Since poke tab is now finalized, publish aux to tracker. */ 2403 for (i = 0; i < prog->aux->size_poke_tab; i++) { 2404 map_ptr = prog->aux->poke_tab[i].tail_call.map; 2405 if (!map_ptr->ops->map_poke_track || 2406 !map_ptr->ops->map_poke_untrack || 2407 !map_ptr->ops->map_poke_run) { 2408 verifier_bug(env, "poke tab is misconfigured"); 2409 return -EFAULT; 2410 } 2411 2412 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 2413 if (ret < 0) { 2414 verbose(env, "tracking tail call prog failed\n"); 2415 return ret; 2416 } 2417 } 2418 2419 ret = sort_kfunc_descs_by_imm_off(env); 2420 if (ret) 2421 return ret; 2422 2423 return 0; 2424 } 2425 2426 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, 2427 int position, 2428 s32 stack_base, 2429 u32 callback_subprogno, 2430 u32 *total_cnt) 2431 { 2432 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; 2433 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; 2434 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; 2435 int reg_loop_max = BPF_REG_6; 2436 int reg_loop_cnt = BPF_REG_7; 2437 int reg_loop_ctx = BPF_REG_8; 2438 2439 struct bpf_insn *insn_buf = env->insn_buf; 2440 struct bpf_prog *new_prog; 2441 u32 callback_start; 2442 u32 call_insn_offset; 2443 s32 callback_offset; 2444 u32 cnt = 0; 2445 2446 /* This represents an inlined version of bpf_iter.c:bpf_loop, 2447 * be careful to modify this code in sync. 2448 */ 2449 2450 /* Return error and jump to the end of the patch if 2451 * expected number of iterations is too big. 2452 */ 2453 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2); 2454 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG); 2455 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16); 2456 /* spill R6, R7, R8 to use these as loop vars */ 2457 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset); 2458 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset); 2459 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset); 2460 /* initialize loop vars */ 2461 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1); 2462 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0); 2463 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3); 2464 /* loop header, 2465 * if reg_loop_cnt >= reg_loop_max skip the loop body 2466 */ 2467 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5); 2468 /* callback call, 2469 * correct callback offset would be set after patching 2470 */ 2471 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt); 2472 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx); 2473 insn_buf[cnt++] = BPF_CALL_REL(0); 2474 /* increment loop counter */ 2475 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1); 2476 /* jump to loop header if callback returned 0 */ 2477 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6); 2478 /* return value of bpf_loop, 2479 * set R0 to the number of iterations 2480 */ 2481 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt); 2482 /* restore original values of R6, R7, R8 */ 2483 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset); 2484 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset); 2485 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset); 2486 2487 *total_cnt = cnt; 2488 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt); 2489 if (!new_prog) 2490 return new_prog; 2491 2492 /* callback start is known only after patching */ 2493 callback_start = env->subprog_info[callback_subprogno].start; 2494 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ 2495 call_insn_offset = position + 12; 2496 callback_offset = callback_start - call_insn_offset - 1; 2497 new_prog->insnsi[call_insn_offset].imm = callback_offset; 2498 2499 return new_prog; 2500 } 2501 2502 static bool is_bpf_loop_call(struct bpf_insn *insn) 2503 { 2504 return insn->code == (BPF_JMP | BPF_CALL) && 2505 insn->src_reg == 0 && 2506 insn->imm == BPF_FUNC_loop; 2507 } 2508 2509 /* For all sub-programs in the program (including main) check 2510 * insn_aux_data to see if there are bpf_loop calls that require 2511 * inlining. If such calls are found the calls are replaced with a 2512 * sequence of instructions produced by `inline_bpf_loop` function and 2513 * subprog stack_depth is increased by the size of 3 registers. 2514 * This stack space is used to spill values of the R6, R7, R8. These 2515 * registers are used to store the loop bound, counter and context 2516 * variables. 2517 */ 2518 int bpf_optimize_bpf_loop(struct bpf_verifier_env *env) 2519 { 2520 struct bpf_subprog_info *subprogs = env->subprog_info; 2521 int i, cur_subprog = 0, cnt, delta = 0; 2522 struct bpf_insn *insn = env->prog->insnsi; 2523 int insn_cnt = env->prog->len; 2524 u16 stack_depth = subprogs[cur_subprog].stack_depth; 2525 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 2526 u16 stack_depth_extra = 0; 2527 2528 for (i = 0; i < insn_cnt; i++, insn++) { 2529 struct bpf_loop_inline_state *inline_state = 2530 &env->insn_aux_data[i + delta].loop_inline_state; 2531 2532 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { 2533 struct bpf_prog *new_prog; 2534 2535 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; 2536 new_prog = inline_bpf_loop(env, 2537 i + delta, 2538 -(stack_depth + stack_depth_extra), 2539 inline_state->callback_subprogno, 2540 &cnt); 2541 if (!new_prog) 2542 return -ENOMEM; 2543 2544 delta += cnt - 1; 2545 env->prog = new_prog; 2546 insn = new_prog->insnsi + i + delta; 2547 } 2548 2549 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 2550 subprogs[cur_subprog].stack_depth += stack_depth_extra; 2551 cur_subprog++; 2552 stack_depth = subprogs[cur_subprog].stack_depth; 2553 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 2554 stack_depth_extra = 0; 2555 } 2556 } 2557 2558 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 2559 2560 return 0; 2561 } 2562 2563 /* Remove unnecessary spill/fill pairs, members of fastcall pattern, 2564 * adjust subprograms stack depth when possible. 2565 */ 2566 int bpf_remove_fastcall_spills_fills(struct bpf_verifier_env *env) 2567 { 2568 struct bpf_subprog_info *subprog = env->subprog_info; 2569 struct bpf_insn_aux_data *aux = env->insn_aux_data; 2570 struct bpf_insn *insn = env->prog->insnsi; 2571 int insn_cnt = env->prog->len; 2572 u32 spills_num; 2573 bool modified = false; 2574 int i, j; 2575 2576 for (i = 0; i < insn_cnt; i++, insn++) { 2577 if (aux[i].fastcall_spills_num > 0) { 2578 spills_num = aux[i].fastcall_spills_num; 2579 /* NOPs would be removed by opt_remove_nops() */ 2580 for (j = 1; j <= spills_num; ++j) { 2581 *(insn - j) = NOP; 2582 *(insn + j) = NOP; 2583 } 2584 modified = true; 2585 } 2586 if ((subprog + 1)->start == i + 1) { 2587 if (modified && !subprog->keep_fastcall_stack) 2588 subprog->stack_depth = -subprog->fastcall_stack_off; 2589 subprog++; 2590 modified = false; 2591 } 2592 } 2593 2594 return 0; 2595 } 2596 2597