1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux Socket Filter - Kernel level socket filtering 4 * 5 * Based on the design of the Berkeley Packet Filter. The new 6 * internal format has been designed by PLUMgrid: 7 * 8 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 9 * 10 * Authors: 11 * 12 * Jay Schulist <jschlst@samba.org> 13 * Alexei Starovoitov <ast@plumgrid.com> 14 * Daniel Borkmann <dborkman@redhat.com> 15 * 16 * Andi Kleen - Fix a few bad bugs and races. 17 * Kris Katterjohn - Added many additional checks in bpf_check_classic() 18 */ 19 20 #include <uapi/linux/btf.h> 21 #include <linux/filter.h> 22 #include <linux/skbuff.h> 23 #include <linux/vmalloc.h> 24 #include <linux/random.h> 25 #include <linux/moduleloader.h> 26 #include <linux/bpf.h> 27 #include <linux/btf.h> 28 #include <linux/objtool.h> 29 #include <linux/rbtree_latch.h> 30 #include <linux/kallsyms.h> 31 #include <linux/rcupdate.h> 32 #include <linux/perf_event.h> 33 #include <linux/extable.h> 34 #include <linux/log2.h> 35 #include <linux/bpf_verifier.h> 36 37 #include <asm/barrier.h> 38 #include <asm/unaligned.h> 39 40 /* Registers */ 41 #define BPF_R0 regs[BPF_REG_0] 42 #define BPF_R1 regs[BPF_REG_1] 43 #define BPF_R2 regs[BPF_REG_2] 44 #define BPF_R3 regs[BPF_REG_3] 45 #define BPF_R4 regs[BPF_REG_4] 46 #define BPF_R5 regs[BPF_REG_5] 47 #define BPF_R6 regs[BPF_REG_6] 48 #define BPF_R7 regs[BPF_REG_7] 49 #define BPF_R8 regs[BPF_REG_8] 50 #define BPF_R9 regs[BPF_REG_9] 51 #define BPF_R10 regs[BPF_REG_10] 52 53 /* Named registers */ 54 #define DST regs[insn->dst_reg] 55 #define SRC regs[insn->src_reg] 56 #define FP regs[BPF_REG_FP] 57 #define AX regs[BPF_REG_AX] 58 #define ARG1 regs[BPF_REG_ARG1] 59 #define CTX regs[BPF_REG_CTX] 60 #define IMM insn->imm 61 62 /* No hurry in this branch 63 * 64 * Exported for the bpf jit load helper. 65 */ 66 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) 67 { 68 u8 *ptr = NULL; 69 70 if (k >= SKF_NET_OFF) 71 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 72 else if (k >= SKF_LL_OFF) 73 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 74 75 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) 76 return ptr; 77 78 return NULL; 79 } 80 81 struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) 82 { 83 gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; 84 struct bpf_prog_aux *aux; 85 struct bpf_prog *fp; 86 87 size = round_up(size, PAGE_SIZE); 88 fp = __vmalloc(size, gfp_flags); 89 if (fp == NULL) 90 return NULL; 91 92 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags); 93 if (aux == NULL) { 94 vfree(fp); 95 return NULL; 96 } 97 fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags); 98 if (!fp->active) { 99 vfree(fp); 100 kfree(aux); 101 return NULL; 102 } 103 104 fp->pages = size / PAGE_SIZE; 105 fp->aux = aux; 106 fp->aux->prog = fp; 107 fp->jit_requested = ebpf_jit_enabled(); 108 fp->blinding_requested = bpf_jit_blinding_enabled(fp); 109 110 INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); 111 mutex_init(&fp->aux->used_maps_mutex); 112 mutex_init(&fp->aux->dst_mutex); 113 114 return fp; 115 } 116 117 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) 118 { 119 gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; 120 struct bpf_prog *prog; 121 int cpu; 122 123 prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); 124 if (!prog) 125 return NULL; 126 127 prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); 128 if (!prog->stats) { 129 free_percpu(prog->active); 130 kfree(prog->aux); 131 vfree(prog); 132 return NULL; 133 } 134 135 for_each_possible_cpu(cpu) { 136 struct bpf_prog_stats *pstats; 137 138 pstats = per_cpu_ptr(prog->stats, cpu); 139 u64_stats_init(&pstats->syncp); 140 } 141 return prog; 142 } 143 EXPORT_SYMBOL_GPL(bpf_prog_alloc); 144 145 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) 146 { 147 if (!prog->aux->nr_linfo || !prog->jit_requested) 148 return 0; 149 150 prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, 151 sizeof(*prog->aux->jited_linfo), 152 GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 153 if (!prog->aux->jited_linfo) 154 return -ENOMEM; 155 156 return 0; 157 } 158 159 void bpf_prog_jit_attempt_done(struct bpf_prog *prog) 160 { 161 if (prog->aux->jited_linfo && 162 (!prog->jited || !prog->aux->jited_linfo[0])) { 163 kvfree(prog->aux->jited_linfo); 164 prog->aux->jited_linfo = NULL; 165 } 166 167 kfree(prog->aux->kfunc_tab); 168 prog->aux->kfunc_tab = NULL; 169 } 170 171 /* The jit engine is responsible to provide an array 172 * for insn_off to the jited_off mapping (insn_to_jit_off). 173 * 174 * The idx to this array is the insn_off. Hence, the insn_off 175 * here is relative to the prog itself instead of the main prog. 176 * This array has one entry for each xlated bpf insn. 177 * 178 * jited_off is the byte off to the last byte of the jited insn. 179 * 180 * Hence, with 181 * insn_start: 182 * The first bpf insn off of the prog. The insn off 183 * here is relative to the main prog. 184 * e.g. if prog is a subprog, insn_start > 0 185 * linfo_idx: 186 * The prog's idx to prog->aux->linfo and jited_linfo 187 * 188 * jited_linfo[linfo_idx] = prog->bpf_func 189 * 190 * For i > linfo_idx, 191 * 192 * jited_linfo[i] = prog->bpf_func + 193 * insn_to_jit_off[linfo[i].insn_off - insn_start - 1] 194 */ 195 void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, 196 const u32 *insn_to_jit_off) 197 { 198 u32 linfo_idx, insn_start, insn_end, nr_linfo, i; 199 const struct bpf_line_info *linfo; 200 void **jited_linfo; 201 202 if (!prog->aux->jited_linfo) 203 /* Userspace did not provide linfo */ 204 return; 205 206 linfo_idx = prog->aux->linfo_idx; 207 linfo = &prog->aux->linfo[linfo_idx]; 208 insn_start = linfo[0].insn_off; 209 insn_end = insn_start + prog->len; 210 211 jited_linfo = &prog->aux->jited_linfo[linfo_idx]; 212 jited_linfo[0] = prog->bpf_func; 213 214 nr_linfo = prog->aux->nr_linfo - linfo_idx; 215 216 for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++) 217 /* The verifier ensures that linfo[i].insn_off is 218 * strictly increasing 219 */ 220 jited_linfo[i] = prog->bpf_func + 221 insn_to_jit_off[linfo[i].insn_off - insn_start - 1]; 222 } 223 224 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, 225 gfp_t gfp_extra_flags) 226 { 227 gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; 228 struct bpf_prog *fp; 229 u32 pages; 230 231 size = round_up(size, PAGE_SIZE); 232 pages = size / PAGE_SIZE; 233 if (pages <= fp_old->pages) 234 return fp_old; 235 236 fp = __vmalloc(size, gfp_flags); 237 if (fp) { 238 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); 239 fp->pages = pages; 240 fp->aux->prog = fp; 241 242 /* We keep fp->aux from fp_old around in the new 243 * reallocated structure. 244 */ 245 fp_old->aux = NULL; 246 fp_old->stats = NULL; 247 fp_old->active = NULL; 248 __bpf_prog_free(fp_old); 249 } 250 251 return fp; 252 } 253 254 void __bpf_prog_free(struct bpf_prog *fp) 255 { 256 if (fp->aux) { 257 mutex_destroy(&fp->aux->used_maps_mutex); 258 mutex_destroy(&fp->aux->dst_mutex); 259 kfree(fp->aux->poke_tab); 260 kfree(fp->aux); 261 } 262 free_percpu(fp->stats); 263 free_percpu(fp->active); 264 vfree(fp); 265 } 266 267 int bpf_prog_calc_tag(struct bpf_prog *fp) 268 { 269 const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64); 270 u32 raw_size = bpf_prog_tag_scratch_size(fp); 271 u32 digest[SHA1_DIGEST_WORDS]; 272 u32 ws[SHA1_WORKSPACE_WORDS]; 273 u32 i, bsize, psize, blocks; 274 struct bpf_insn *dst; 275 bool was_ld_map; 276 u8 *raw, *todo; 277 __be32 *result; 278 __be64 *bits; 279 280 raw = vmalloc(raw_size); 281 if (!raw) 282 return -ENOMEM; 283 284 sha1_init(digest); 285 memset(ws, 0, sizeof(ws)); 286 287 /* We need to take out the map fd for the digest calculation 288 * since they are unstable from user space side. 289 */ 290 dst = (void *)raw; 291 for (i = 0, was_ld_map = false; i < fp->len; i++) { 292 dst[i] = fp->insnsi[i]; 293 if (!was_ld_map && 294 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && 295 (dst[i].src_reg == BPF_PSEUDO_MAP_FD || 296 dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { 297 was_ld_map = true; 298 dst[i].imm = 0; 299 } else if (was_ld_map && 300 dst[i].code == 0 && 301 dst[i].dst_reg == 0 && 302 dst[i].src_reg == 0 && 303 dst[i].off == 0) { 304 was_ld_map = false; 305 dst[i].imm = 0; 306 } else { 307 was_ld_map = false; 308 } 309 } 310 311 psize = bpf_prog_insn_size(fp); 312 memset(&raw[psize], 0, raw_size - psize); 313 raw[psize++] = 0x80; 314 315 bsize = round_up(psize, SHA1_BLOCK_SIZE); 316 blocks = bsize / SHA1_BLOCK_SIZE; 317 todo = raw; 318 if (bsize - psize >= sizeof(__be64)) { 319 bits = (__be64 *)(todo + bsize - sizeof(__be64)); 320 } else { 321 bits = (__be64 *)(todo + bsize + bits_offset); 322 blocks++; 323 } 324 *bits = cpu_to_be64((psize - 1) << 3); 325 326 while (blocks--) { 327 sha1_transform(digest, todo, ws); 328 todo += SHA1_BLOCK_SIZE; 329 } 330 331 result = (__force __be32 *)digest; 332 for (i = 0; i < SHA1_DIGEST_WORDS; i++) 333 result[i] = cpu_to_be32(digest[i]); 334 memcpy(fp->tag, result, sizeof(fp->tag)); 335 336 vfree(raw); 337 return 0; 338 } 339 340 static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, 341 s32 end_new, s32 curr, const bool probe_pass) 342 { 343 const s64 imm_min = S32_MIN, imm_max = S32_MAX; 344 s32 delta = end_new - end_old; 345 s64 imm = insn->imm; 346 347 if (curr < pos && curr + imm + 1 >= end_old) 348 imm += delta; 349 else if (curr >= end_new && curr + imm + 1 < end_new) 350 imm -= delta; 351 if (imm < imm_min || imm > imm_max) 352 return -ERANGE; 353 if (!probe_pass) 354 insn->imm = imm; 355 return 0; 356 } 357 358 static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, 359 s32 end_new, s32 curr, const bool probe_pass) 360 { 361 const s32 off_min = S16_MIN, off_max = S16_MAX; 362 s32 delta = end_new - end_old; 363 s32 off = insn->off; 364 365 if (curr < pos && curr + off + 1 >= end_old) 366 off += delta; 367 else if (curr >= end_new && curr + off + 1 < end_new) 368 off -= delta; 369 if (off < off_min || off > off_max) 370 return -ERANGE; 371 if (!probe_pass) 372 insn->off = off; 373 return 0; 374 } 375 376 static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, 377 s32 end_new, const bool probe_pass) 378 { 379 u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); 380 struct bpf_insn *insn = prog->insnsi; 381 int ret = 0; 382 383 for (i = 0; i < insn_cnt; i++, insn++) { 384 u8 code; 385 386 /* In the probing pass we still operate on the original, 387 * unpatched image in order to check overflows before we 388 * do any other adjustments. Therefore skip the patchlet. 389 */ 390 if (probe_pass && i == pos) { 391 i = end_new; 392 insn = prog->insnsi + end_old; 393 } 394 if (bpf_pseudo_func(insn)) { 395 ret = bpf_adj_delta_to_imm(insn, pos, end_old, 396 end_new, i, probe_pass); 397 if (ret) 398 return ret; 399 continue; 400 } 401 code = insn->code; 402 if ((BPF_CLASS(code) != BPF_JMP && 403 BPF_CLASS(code) != BPF_JMP32) || 404 BPF_OP(code) == BPF_EXIT) 405 continue; 406 /* Adjust offset of jmps if we cross patch boundaries. */ 407 if (BPF_OP(code) == BPF_CALL) { 408 if (insn->src_reg != BPF_PSEUDO_CALL) 409 continue; 410 ret = bpf_adj_delta_to_imm(insn, pos, end_old, 411 end_new, i, probe_pass); 412 } else { 413 ret = bpf_adj_delta_to_off(insn, pos, end_old, 414 end_new, i, probe_pass); 415 } 416 if (ret) 417 break; 418 } 419 420 return ret; 421 } 422 423 static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta) 424 { 425 struct bpf_line_info *linfo; 426 u32 i, nr_linfo; 427 428 nr_linfo = prog->aux->nr_linfo; 429 if (!nr_linfo || !delta) 430 return; 431 432 linfo = prog->aux->linfo; 433 434 for (i = 0; i < nr_linfo; i++) 435 if (off < linfo[i].insn_off) 436 break; 437 438 /* Push all off < linfo[i].insn_off by delta */ 439 for (; i < nr_linfo; i++) 440 linfo[i].insn_off += delta; 441 } 442 443 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, 444 const struct bpf_insn *patch, u32 len) 445 { 446 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; 447 const u32 cnt_max = S16_MAX; 448 struct bpf_prog *prog_adj; 449 int err; 450 451 /* Since our patchlet doesn't expand the image, we're done. */ 452 if (insn_delta == 0) { 453 memcpy(prog->insnsi + off, patch, sizeof(*patch)); 454 return prog; 455 } 456 457 insn_adj_cnt = prog->len + insn_delta; 458 459 /* Reject anything that would potentially let the insn->off 460 * target overflow when we have excessive program expansions. 461 * We need to probe here before we do any reallocation where 462 * we afterwards may not fail anymore. 463 */ 464 if (insn_adj_cnt > cnt_max && 465 (err = bpf_adj_branches(prog, off, off + 1, off + len, true))) 466 return ERR_PTR(err); 467 468 /* Several new instructions need to be inserted. Make room 469 * for them. Likely, there's no need for a new allocation as 470 * last page could have large enough tailroom. 471 */ 472 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), 473 GFP_USER); 474 if (!prog_adj) 475 return ERR_PTR(-ENOMEM); 476 477 prog_adj->len = insn_adj_cnt; 478 479 /* Patching happens in 3 steps: 480 * 481 * 1) Move over tail of insnsi from next instruction onwards, 482 * so we can patch the single target insn with one or more 483 * new ones (patching is always from 1 to n insns, n > 0). 484 * 2) Inject new instructions at the target location. 485 * 3) Adjust branch offsets if necessary. 486 */ 487 insn_rest = insn_adj_cnt - off - len; 488 489 memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, 490 sizeof(*patch) * insn_rest); 491 memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); 492 493 /* We are guaranteed to not fail at this point, otherwise 494 * the ship has sailed to reverse to the original state. An 495 * overflow cannot happen at this point. 496 */ 497 BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); 498 499 bpf_adj_linfo(prog_adj, off, insn_delta); 500 501 return prog_adj; 502 } 503 504 int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) 505 { 506 /* Branch offsets can't overflow when program is shrinking, no need 507 * to call bpf_adj_branches(..., true) here 508 */ 509 memmove(prog->insnsi + off, prog->insnsi + off + cnt, 510 sizeof(struct bpf_insn) * (prog->len - off - cnt)); 511 prog->len -= cnt; 512 513 return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); 514 } 515 516 static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) 517 { 518 int i; 519 520 for (i = 0; i < fp->aux->func_cnt; i++) 521 bpf_prog_kallsyms_del(fp->aux->func[i]); 522 } 523 524 void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) 525 { 526 bpf_prog_kallsyms_del_subprogs(fp); 527 bpf_prog_kallsyms_del(fp); 528 } 529 530 #ifdef CONFIG_BPF_JIT 531 /* All BPF JIT sysctl knobs here. */ 532 int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); 533 int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); 534 int bpf_jit_harden __read_mostly; 535 long bpf_jit_limit __read_mostly; 536 long bpf_jit_limit_max __read_mostly; 537 538 static void 539 bpf_prog_ksym_set_addr(struct bpf_prog *prog) 540 { 541 WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); 542 543 prog->aux->ksym.start = (unsigned long) prog->bpf_func; 544 prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len; 545 } 546 547 static void 548 bpf_prog_ksym_set_name(struct bpf_prog *prog) 549 { 550 char *sym = prog->aux->ksym.name; 551 const char *end = sym + KSYM_NAME_LEN; 552 const struct btf_type *type; 553 const char *func_name; 554 555 BUILD_BUG_ON(sizeof("bpf_prog_") + 556 sizeof(prog->tag) * 2 + 557 /* name has been null terminated. 558 * We should need +1 for the '_' preceding 559 * the name. However, the null character 560 * is double counted between the name and the 561 * sizeof("bpf_prog_") above, so we omit 562 * the +1 here. 563 */ 564 sizeof(prog->aux->name) > KSYM_NAME_LEN); 565 566 sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); 567 sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); 568 569 /* prog->aux->name will be ignored if full btf name is available */ 570 if (prog->aux->func_info_cnt) { 571 type = btf_type_by_id(prog->aux->btf, 572 prog->aux->func_info[prog->aux->func_idx].type_id); 573 func_name = btf_name_by_offset(prog->aux->btf, type->name_off); 574 snprintf(sym, (size_t)(end - sym), "_%s", func_name); 575 return; 576 } 577 578 if (prog->aux->name[0]) 579 snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); 580 else 581 *sym = 0; 582 } 583 584 static unsigned long bpf_get_ksym_start(struct latch_tree_node *n) 585 { 586 return container_of(n, struct bpf_ksym, tnode)->start; 587 } 588 589 static __always_inline bool bpf_tree_less(struct latch_tree_node *a, 590 struct latch_tree_node *b) 591 { 592 return bpf_get_ksym_start(a) < bpf_get_ksym_start(b); 593 } 594 595 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) 596 { 597 unsigned long val = (unsigned long)key; 598 const struct bpf_ksym *ksym; 599 600 ksym = container_of(n, struct bpf_ksym, tnode); 601 602 if (val < ksym->start) 603 return -1; 604 if (val >= ksym->end) 605 return 1; 606 607 return 0; 608 } 609 610 static const struct latch_tree_ops bpf_tree_ops = { 611 .less = bpf_tree_less, 612 .comp = bpf_tree_comp, 613 }; 614 615 static DEFINE_SPINLOCK(bpf_lock); 616 static LIST_HEAD(bpf_kallsyms); 617 static struct latch_tree_root bpf_tree __cacheline_aligned; 618 619 void bpf_ksym_add(struct bpf_ksym *ksym) 620 { 621 spin_lock_bh(&bpf_lock); 622 WARN_ON_ONCE(!list_empty(&ksym->lnode)); 623 list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms); 624 latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops); 625 spin_unlock_bh(&bpf_lock); 626 } 627 628 static void __bpf_ksym_del(struct bpf_ksym *ksym) 629 { 630 if (list_empty(&ksym->lnode)) 631 return; 632 633 latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops); 634 list_del_rcu(&ksym->lnode); 635 } 636 637 void bpf_ksym_del(struct bpf_ksym *ksym) 638 { 639 spin_lock_bh(&bpf_lock); 640 __bpf_ksym_del(ksym); 641 spin_unlock_bh(&bpf_lock); 642 } 643 644 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) 645 { 646 return fp->jited && !bpf_prog_was_classic(fp); 647 } 648 649 static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) 650 { 651 return list_empty(&fp->aux->ksym.lnode) || 652 fp->aux->ksym.lnode.prev == LIST_POISON2; 653 } 654 655 void bpf_prog_kallsyms_add(struct bpf_prog *fp) 656 { 657 if (!bpf_prog_kallsyms_candidate(fp) || 658 !bpf_capable()) 659 return; 660 661 bpf_prog_ksym_set_addr(fp); 662 bpf_prog_ksym_set_name(fp); 663 fp->aux->ksym.prog = true; 664 665 bpf_ksym_add(&fp->aux->ksym); 666 } 667 668 void bpf_prog_kallsyms_del(struct bpf_prog *fp) 669 { 670 if (!bpf_prog_kallsyms_candidate(fp)) 671 return; 672 673 bpf_ksym_del(&fp->aux->ksym); 674 } 675 676 static struct bpf_ksym *bpf_ksym_find(unsigned long addr) 677 { 678 struct latch_tree_node *n; 679 680 n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); 681 return n ? container_of(n, struct bpf_ksym, tnode) : NULL; 682 } 683 684 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, 685 unsigned long *off, char *sym) 686 { 687 struct bpf_ksym *ksym; 688 char *ret = NULL; 689 690 rcu_read_lock(); 691 ksym = bpf_ksym_find(addr); 692 if (ksym) { 693 unsigned long symbol_start = ksym->start; 694 unsigned long symbol_end = ksym->end; 695 696 strncpy(sym, ksym->name, KSYM_NAME_LEN); 697 698 ret = sym; 699 if (size) 700 *size = symbol_end - symbol_start; 701 if (off) 702 *off = addr - symbol_start; 703 } 704 rcu_read_unlock(); 705 706 return ret; 707 } 708 709 bool is_bpf_text_address(unsigned long addr) 710 { 711 bool ret; 712 713 rcu_read_lock(); 714 ret = bpf_ksym_find(addr) != NULL; 715 rcu_read_unlock(); 716 717 return ret; 718 } 719 720 static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) 721 { 722 struct bpf_ksym *ksym = bpf_ksym_find(addr); 723 724 return ksym && ksym->prog ? 725 container_of(ksym, struct bpf_prog_aux, ksym)->prog : 726 NULL; 727 } 728 729 const struct exception_table_entry *search_bpf_extables(unsigned long addr) 730 { 731 const struct exception_table_entry *e = NULL; 732 struct bpf_prog *prog; 733 734 rcu_read_lock(); 735 prog = bpf_prog_ksym_find(addr); 736 if (!prog) 737 goto out; 738 if (!prog->aux->num_exentries) 739 goto out; 740 741 e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr); 742 out: 743 rcu_read_unlock(); 744 return e; 745 } 746 747 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 748 char *sym) 749 { 750 struct bpf_ksym *ksym; 751 unsigned int it = 0; 752 int ret = -ERANGE; 753 754 if (!bpf_jit_kallsyms_enabled()) 755 return ret; 756 757 rcu_read_lock(); 758 list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) { 759 if (it++ != symnum) 760 continue; 761 762 strncpy(sym, ksym->name, KSYM_NAME_LEN); 763 764 *value = ksym->start; 765 *type = BPF_SYM_ELF_TYPE; 766 767 ret = 0; 768 break; 769 } 770 rcu_read_unlock(); 771 772 return ret; 773 } 774 775 int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, 776 struct bpf_jit_poke_descriptor *poke) 777 { 778 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; 779 static const u32 poke_tab_max = 1024; 780 u32 slot = prog->aux->size_poke_tab; 781 u32 size = slot + 1; 782 783 if (size > poke_tab_max) 784 return -ENOSPC; 785 if (poke->tailcall_target || poke->tailcall_target_stable || 786 poke->tailcall_bypass || poke->adj_off || poke->bypass_addr) 787 return -EINVAL; 788 789 switch (poke->reason) { 790 case BPF_POKE_REASON_TAIL_CALL: 791 if (!poke->tail_call.map) 792 return -EINVAL; 793 break; 794 default: 795 return -EINVAL; 796 } 797 798 tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL); 799 if (!tab) 800 return -ENOMEM; 801 802 memcpy(&tab[slot], poke, sizeof(*poke)); 803 prog->aux->size_poke_tab = size; 804 prog->aux->poke_tab = tab; 805 806 return slot; 807 } 808 809 /* 810 * BPF program pack allocator. 811 * 812 * Most BPF programs are pretty small. Allocating a hole page for each 813 * program is sometime a waste. Many small bpf program also adds pressure 814 * to instruction TLB. To solve this issue, we introduce a BPF program pack 815 * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86) 816 * to host BPF programs. 817 */ 818 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 819 #define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE 820 #else 821 #define BPF_PROG_PACK_SIZE PAGE_SIZE 822 #endif 823 #define BPF_PROG_CHUNK_SHIFT 6 824 #define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT) 825 #define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1)) 826 #define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE) 827 828 struct bpf_prog_pack { 829 struct list_head list; 830 void *ptr; 831 unsigned long bitmap[]; 832 }; 833 834 #define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE 835 #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) 836 837 static DEFINE_MUTEX(pack_mutex); 838 static LIST_HEAD(pack_list); 839 840 static struct bpf_prog_pack *alloc_new_pack(void) 841 { 842 struct bpf_prog_pack *pack; 843 844 pack = kzalloc(sizeof(*pack) + BITS_TO_BYTES(BPF_PROG_CHUNK_COUNT), GFP_KERNEL); 845 if (!pack) 846 return NULL; 847 pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); 848 if (!pack->ptr) { 849 kfree(pack); 850 return NULL; 851 } 852 bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); 853 list_add_tail(&pack->list, &pack_list); 854 855 set_vm_flush_reset_perms(pack->ptr); 856 set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 857 set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 858 return pack; 859 } 860 861 static void *bpf_prog_pack_alloc(u32 size) 862 { 863 unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); 864 struct bpf_prog_pack *pack; 865 unsigned long pos; 866 void *ptr = NULL; 867 868 if (size > BPF_PROG_MAX_PACK_PROG_SIZE) { 869 size = round_up(size, PAGE_SIZE); 870 ptr = module_alloc(size); 871 if (ptr) { 872 set_vm_flush_reset_perms(ptr); 873 set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); 874 set_memory_x((unsigned long)ptr, size / PAGE_SIZE); 875 } 876 return ptr; 877 } 878 mutex_lock(&pack_mutex); 879 list_for_each_entry(pack, &pack_list, list) { 880 pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, 881 nbits, 0); 882 if (pos < BPF_PROG_CHUNK_COUNT) 883 goto found_free_area; 884 } 885 886 pack = alloc_new_pack(); 887 if (!pack) 888 goto out; 889 890 pos = 0; 891 892 found_free_area: 893 bitmap_set(pack->bitmap, pos, nbits); 894 ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT); 895 896 out: 897 mutex_unlock(&pack_mutex); 898 return ptr; 899 } 900 901 static void bpf_prog_pack_free(struct bpf_binary_header *hdr) 902 { 903 struct bpf_prog_pack *pack = NULL, *tmp; 904 unsigned int nbits; 905 unsigned long pos; 906 void *pack_ptr; 907 908 if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) { 909 module_memfree(hdr); 910 return; 911 } 912 913 pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1)); 914 mutex_lock(&pack_mutex); 915 916 list_for_each_entry(tmp, &pack_list, list) { 917 if (tmp->ptr == pack_ptr) { 918 pack = tmp; 919 break; 920 } 921 } 922 923 if (WARN_ONCE(!pack, "bpf_prog_pack bug\n")) 924 goto out; 925 926 nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); 927 pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; 928 929 bitmap_clear(pack->bitmap, pos, nbits); 930 if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, 931 BPF_PROG_CHUNK_COUNT, 0) == 0) { 932 list_del(&pack->list); 933 module_memfree(pack->ptr); 934 kfree(pack); 935 } 936 out: 937 mutex_unlock(&pack_mutex); 938 } 939 940 static atomic_long_t bpf_jit_current; 941 942 /* Can be overridden by an arch's JIT compiler if it has a custom, 943 * dedicated BPF backend memory area, or if neither of the two 944 * below apply. 945 */ 946 u64 __weak bpf_jit_alloc_exec_limit(void) 947 { 948 #if defined(MODULES_VADDR) 949 return MODULES_END - MODULES_VADDR; 950 #else 951 return VMALLOC_END - VMALLOC_START; 952 #endif 953 } 954 955 static int __init bpf_jit_charge_init(void) 956 { 957 /* Only used as heuristic here to derive limit. */ 958 bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); 959 bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, 960 PAGE_SIZE), LONG_MAX); 961 return 0; 962 } 963 pure_initcall(bpf_jit_charge_init); 964 965 int bpf_jit_charge_modmem(u32 size) 966 { 967 if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) { 968 if (!bpf_capable()) { 969 atomic_long_sub(size, &bpf_jit_current); 970 return -EPERM; 971 } 972 } 973 974 return 0; 975 } 976 977 void bpf_jit_uncharge_modmem(u32 size) 978 { 979 atomic_long_sub(size, &bpf_jit_current); 980 } 981 982 void *__weak bpf_jit_alloc_exec(unsigned long size) 983 { 984 return module_alloc(size); 985 } 986 987 void __weak bpf_jit_free_exec(void *addr) 988 { 989 module_memfree(addr); 990 } 991 992 struct bpf_binary_header * 993 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, 994 unsigned int alignment, 995 bpf_jit_fill_hole_t bpf_fill_ill_insns) 996 { 997 struct bpf_binary_header *hdr; 998 u32 size, hole, start; 999 1000 WARN_ON_ONCE(!is_power_of_2(alignment) || 1001 alignment > BPF_IMAGE_ALIGNMENT); 1002 1003 /* Most of BPF filters are really small, but if some of them 1004 * fill a page, allow at least 128 extra bytes to insert a 1005 * random section of illegal instructions. 1006 */ 1007 size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); 1008 1009 if (bpf_jit_charge_modmem(size)) 1010 return NULL; 1011 hdr = bpf_jit_alloc_exec(size); 1012 if (!hdr) { 1013 bpf_jit_uncharge_modmem(size); 1014 return NULL; 1015 } 1016 1017 /* Fill space with illegal/arch-dep instructions. */ 1018 bpf_fill_ill_insns(hdr, size); 1019 1020 hdr->size = size; 1021 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), 1022 PAGE_SIZE - sizeof(*hdr)); 1023 start = (get_random_int() % hole) & ~(alignment - 1); 1024 1025 /* Leave a random number of instructions before BPF code. */ 1026 *image_ptr = &hdr->image[start]; 1027 1028 return hdr; 1029 } 1030 1031 void bpf_jit_binary_free(struct bpf_binary_header *hdr) 1032 { 1033 u32 size = hdr->size; 1034 1035 bpf_jit_free_exec(hdr); 1036 bpf_jit_uncharge_modmem(size); 1037 } 1038 1039 /* Allocate jit binary from bpf_prog_pack allocator. 1040 * Since the allocated memory is RO+X, the JIT engine cannot write directly 1041 * to the memory. To solve this problem, a RW buffer is also allocated at 1042 * as the same time. The JIT engine should calculate offsets based on the 1043 * RO memory address, but write JITed program to the RW buffer. Once the 1044 * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies 1045 * the JITed program to the RO memory. 1046 */ 1047 struct bpf_binary_header * 1048 bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, 1049 unsigned int alignment, 1050 struct bpf_binary_header **rw_header, 1051 u8 **rw_image, 1052 bpf_jit_fill_hole_t bpf_fill_ill_insns) 1053 { 1054 struct bpf_binary_header *ro_header; 1055 u32 size, hole, start; 1056 1057 WARN_ON_ONCE(!is_power_of_2(alignment) || 1058 alignment > BPF_IMAGE_ALIGNMENT); 1059 1060 /* add 16 bytes for a random section of illegal instructions */ 1061 size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE); 1062 1063 if (bpf_jit_charge_modmem(size)) 1064 return NULL; 1065 ro_header = bpf_prog_pack_alloc(size); 1066 if (!ro_header) { 1067 bpf_jit_uncharge_modmem(size); 1068 return NULL; 1069 } 1070 1071 *rw_header = kvmalloc(size, GFP_KERNEL); 1072 if (!*rw_header) { 1073 bpf_arch_text_copy(&ro_header->size, &size, sizeof(size)); 1074 bpf_prog_pack_free(ro_header); 1075 bpf_jit_uncharge_modmem(size); 1076 return NULL; 1077 } 1078 1079 /* Fill space with illegal/arch-dep instructions. */ 1080 bpf_fill_ill_insns(*rw_header, size); 1081 (*rw_header)->size = size; 1082 1083 hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)), 1084 BPF_PROG_CHUNK_SIZE - sizeof(*ro_header)); 1085 start = (get_random_int() % hole) & ~(alignment - 1); 1086 1087 *image_ptr = &ro_header->image[start]; 1088 *rw_image = &(*rw_header)->image[start]; 1089 1090 return ro_header; 1091 } 1092 1093 /* Copy JITed text from rw_header to its final location, the ro_header. */ 1094 int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, 1095 struct bpf_binary_header *ro_header, 1096 struct bpf_binary_header *rw_header) 1097 { 1098 void *ptr; 1099 1100 ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size); 1101 1102 kvfree(rw_header); 1103 1104 if (IS_ERR(ptr)) { 1105 bpf_prog_pack_free(ro_header); 1106 return PTR_ERR(ptr); 1107 } 1108 prog->aux->use_bpf_prog_pack = true; 1109 return 0; 1110 } 1111 1112 /* bpf_jit_binary_pack_free is called in two different scenarios: 1113 * 1) when the program is freed after; 1114 * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize). 1115 * For case 2), we need to free both the RO memory and the RW buffer. 1116 * 1117 * bpf_jit_binary_pack_free requires proper ro_header->size. However, 1118 * bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size 1119 * must be set with either bpf_jit_binary_pack_finalize (normal path) or 1120 * bpf_arch_text_copy (when jit fails). 1121 */ 1122 void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, 1123 struct bpf_binary_header *rw_header) 1124 { 1125 u32 size = ro_header->size; 1126 1127 bpf_prog_pack_free(ro_header); 1128 kvfree(rw_header); 1129 bpf_jit_uncharge_modmem(size); 1130 } 1131 1132 static inline struct bpf_binary_header * 1133 bpf_jit_binary_hdr(const struct bpf_prog *fp) 1134 { 1135 unsigned long real_start = (unsigned long)fp->bpf_func; 1136 unsigned long addr; 1137 1138 if (fp->aux->use_bpf_prog_pack) 1139 addr = real_start & BPF_PROG_CHUNK_MASK; 1140 else 1141 addr = real_start & PAGE_MASK; 1142 1143 return (void *)addr; 1144 } 1145 1146 /* This symbol is only overridden by archs that have different 1147 * requirements than the usual eBPF JITs, f.e. when they only 1148 * implement cBPF JIT, do not set images read-only, etc. 1149 */ 1150 void __weak bpf_jit_free(struct bpf_prog *fp) 1151 { 1152 if (fp->jited) { 1153 struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); 1154 1155 if (fp->aux->use_bpf_prog_pack) 1156 bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */); 1157 else 1158 bpf_jit_binary_free(hdr); 1159 1160 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); 1161 } 1162 1163 bpf_prog_unlock_free(fp); 1164 } 1165 1166 int bpf_jit_get_func_addr(const struct bpf_prog *prog, 1167 const struct bpf_insn *insn, bool extra_pass, 1168 u64 *func_addr, bool *func_addr_fixed) 1169 { 1170 s16 off = insn->off; 1171 s32 imm = insn->imm; 1172 u8 *addr; 1173 1174 *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; 1175 if (!*func_addr_fixed) { 1176 /* Place-holder address till the last pass has collected 1177 * all addresses for JITed subprograms in which case we 1178 * can pick them up from prog->aux. 1179 */ 1180 if (!extra_pass) 1181 addr = NULL; 1182 else if (prog->aux->func && 1183 off >= 0 && off < prog->aux->func_cnt) 1184 addr = (u8 *)prog->aux->func[off]->bpf_func; 1185 else 1186 return -EINVAL; 1187 } else { 1188 /* Address of a BPF helper call. Since part of the core 1189 * kernel, it's always at a fixed location. __bpf_call_base 1190 * and the helper with imm relative to it are both in core 1191 * kernel. 1192 */ 1193 addr = (u8 *)__bpf_call_base + imm; 1194 } 1195 1196 *func_addr = (unsigned long)addr; 1197 return 0; 1198 } 1199 1200 static int bpf_jit_blind_insn(const struct bpf_insn *from, 1201 const struct bpf_insn *aux, 1202 struct bpf_insn *to_buff, 1203 bool emit_zext) 1204 { 1205 struct bpf_insn *to = to_buff; 1206 u32 imm_rnd = get_random_int(); 1207 s16 off; 1208 1209 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); 1210 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); 1211 1212 /* Constraints on AX register: 1213 * 1214 * AX register is inaccessible from user space. It is mapped in 1215 * all JITs, and used here for constant blinding rewrites. It is 1216 * typically "stateless" meaning its contents are only valid within 1217 * the executed instruction, but not across several instructions. 1218 * There are a few exceptions however which are further detailed 1219 * below. 1220 * 1221 * Constant blinding is only used by JITs, not in the interpreter. 1222 * The interpreter uses AX in some occasions as a local temporary 1223 * register e.g. in DIV or MOD instructions. 1224 * 1225 * In restricted circumstances, the verifier can also use the AX 1226 * register for rewrites as long as they do not interfere with 1227 * the above cases! 1228 */ 1229 if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) 1230 goto out; 1231 1232 if (from->imm == 0 && 1233 (from->code == (BPF_ALU | BPF_MOV | BPF_K) || 1234 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { 1235 *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); 1236 goto out; 1237 } 1238 1239 switch (from->code) { 1240 case BPF_ALU | BPF_ADD | BPF_K: 1241 case BPF_ALU | BPF_SUB | BPF_K: 1242 case BPF_ALU | BPF_AND | BPF_K: 1243 case BPF_ALU | BPF_OR | BPF_K: 1244 case BPF_ALU | BPF_XOR | BPF_K: 1245 case BPF_ALU | BPF_MUL | BPF_K: 1246 case BPF_ALU | BPF_MOV | BPF_K: 1247 case BPF_ALU | BPF_DIV | BPF_K: 1248 case BPF_ALU | BPF_MOD | BPF_K: 1249 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); 1250 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1251 *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); 1252 break; 1253 1254 case BPF_ALU64 | BPF_ADD | BPF_K: 1255 case BPF_ALU64 | BPF_SUB | BPF_K: 1256 case BPF_ALU64 | BPF_AND | BPF_K: 1257 case BPF_ALU64 | BPF_OR | BPF_K: 1258 case BPF_ALU64 | BPF_XOR | BPF_K: 1259 case BPF_ALU64 | BPF_MUL | BPF_K: 1260 case BPF_ALU64 | BPF_MOV | BPF_K: 1261 case BPF_ALU64 | BPF_DIV | BPF_K: 1262 case BPF_ALU64 | BPF_MOD | BPF_K: 1263 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); 1264 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1265 *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); 1266 break; 1267 1268 case BPF_JMP | BPF_JEQ | BPF_K: 1269 case BPF_JMP | BPF_JNE | BPF_K: 1270 case BPF_JMP | BPF_JGT | BPF_K: 1271 case BPF_JMP | BPF_JLT | BPF_K: 1272 case BPF_JMP | BPF_JGE | BPF_K: 1273 case BPF_JMP | BPF_JLE | BPF_K: 1274 case BPF_JMP | BPF_JSGT | BPF_K: 1275 case BPF_JMP | BPF_JSLT | BPF_K: 1276 case BPF_JMP | BPF_JSGE | BPF_K: 1277 case BPF_JMP | BPF_JSLE | BPF_K: 1278 case BPF_JMP | BPF_JSET | BPF_K: 1279 /* Accommodate for extra offset in case of a backjump. */ 1280 off = from->off; 1281 if (off < 0) 1282 off -= 2; 1283 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); 1284 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1285 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); 1286 break; 1287 1288 case BPF_JMP32 | BPF_JEQ | BPF_K: 1289 case BPF_JMP32 | BPF_JNE | BPF_K: 1290 case BPF_JMP32 | BPF_JGT | BPF_K: 1291 case BPF_JMP32 | BPF_JLT | BPF_K: 1292 case BPF_JMP32 | BPF_JGE | BPF_K: 1293 case BPF_JMP32 | BPF_JLE | BPF_K: 1294 case BPF_JMP32 | BPF_JSGT | BPF_K: 1295 case BPF_JMP32 | BPF_JSLT | BPF_K: 1296 case BPF_JMP32 | BPF_JSGE | BPF_K: 1297 case BPF_JMP32 | BPF_JSLE | BPF_K: 1298 case BPF_JMP32 | BPF_JSET | BPF_K: 1299 /* Accommodate for extra offset in case of a backjump. */ 1300 off = from->off; 1301 if (off < 0) 1302 off -= 2; 1303 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); 1304 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1305 *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, 1306 off); 1307 break; 1308 1309 case BPF_LD | BPF_IMM | BPF_DW: 1310 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); 1311 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1312 *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 1313 *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); 1314 break; 1315 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ 1316 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); 1317 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1318 if (emit_zext) 1319 *to++ = BPF_ZEXT_REG(BPF_REG_AX); 1320 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); 1321 break; 1322 1323 case BPF_ST | BPF_MEM | BPF_DW: 1324 case BPF_ST | BPF_MEM | BPF_W: 1325 case BPF_ST | BPF_MEM | BPF_H: 1326 case BPF_ST | BPF_MEM | BPF_B: 1327 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); 1328 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); 1329 *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); 1330 break; 1331 } 1332 out: 1333 return to - to_buff; 1334 } 1335 1336 static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, 1337 gfp_t gfp_extra_flags) 1338 { 1339 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; 1340 struct bpf_prog *fp; 1341 1342 fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags); 1343 if (fp != NULL) { 1344 /* aux->prog still points to the fp_other one, so 1345 * when promoting the clone to the real program, 1346 * this still needs to be adapted. 1347 */ 1348 memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE); 1349 } 1350 1351 return fp; 1352 } 1353 1354 static void bpf_prog_clone_free(struct bpf_prog *fp) 1355 { 1356 /* aux was stolen by the other clone, so we cannot free 1357 * it from this path! It will be freed eventually by the 1358 * other program on release. 1359 * 1360 * At this point, we don't need a deferred release since 1361 * clone is guaranteed to not be locked. 1362 */ 1363 fp->aux = NULL; 1364 fp->stats = NULL; 1365 fp->active = NULL; 1366 __bpf_prog_free(fp); 1367 } 1368 1369 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) 1370 { 1371 /* We have to repoint aux->prog to self, as we don't 1372 * know whether fp here is the clone or the original. 1373 */ 1374 fp->aux->prog = fp; 1375 bpf_prog_clone_free(fp_other); 1376 } 1377 1378 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) 1379 { 1380 struct bpf_insn insn_buff[16], aux[2]; 1381 struct bpf_prog *clone, *tmp; 1382 int insn_delta, insn_cnt; 1383 struct bpf_insn *insn; 1384 int i, rewritten; 1385 1386 if (!prog->blinding_requested || prog->blinded) 1387 return prog; 1388 1389 clone = bpf_prog_clone_create(prog, GFP_USER); 1390 if (!clone) 1391 return ERR_PTR(-ENOMEM); 1392 1393 insn_cnt = clone->len; 1394 insn = clone->insnsi; 1395 1396 for (i = 0; i < insn_cnt; i++, insn++) { 1397 /* We temporarily need to hold the original ld64 insn 1398 * so that we can still access the first part in the 1399 * second blinding run. 1400 */ 1401 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) && 1402 insn[1].code == 0) 1403 memcpy(aux, insn, sizeof(aux)); 1404 1405 rewritten = bpf_jit_blind_insn(insn, aux, insn_buff, 1406 clone->aux->verifier_zext); 1407 if (!rewritten) 1408 continue; 1409 1410 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); 1411 if (IS_ERR(tmp)) { 1412 /* Patching may have repointed aux->prog during 1413 * realloc from the original one, so we need to 1414 * fix it up here on error. 1415 */ 1416 bpf_jit_prog_release_other(prog, clone); 1417 return tmp; 1418 } 1419 1420 clone = tmp; 1421 insn_delta = rewritten - 1; 1422 1423 /* Walk new program and skip insns we just inserted. */ 1424 insn = clone->insnsi + i + insn_delta; 1425 insn_cnt += insn_delta; 1426 i += insn_delta; 1427 } 1428 1429 clone->blinded = 1; 1430 return clone; 1431 } 1432 #endif /* CONFIG_BPF_JIT */ 1433 1434 /* Base function for offset calculation. Needs to go into .text section, 1435 * therefore keeping it non-static as well; will also be used by JITs 1436 * anyway later on, so do not let the compiler omit it. This also needs 1437 * to go into kallsyms for correlation from e.g. bpftool, so naming 1438 * must not change. 1439 */ 1440 noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1441 { 1442 return 0; 1443 } 1444 EXPORT_SYMBOL_GPL(__bpf_call_base); 1445 1446 /* All UAPI available opcodes. */ 1447 #define BPF_INSN_MAP(INSN_2, INSN_3) \ 1448 /* 32 bit ALU operations. */ \ 1449 /* Register based. */ \ 1450 INSN_3(ALU, ADD, X), \ 1451 INSN_3(ALU, SUB, X), \ 1452 INSN_3(ALU, AND, X), \ 1453 INSN_3(ALU, OR, X), \ 1454 INSN_3(ALU, LSH, X), \ 1455 INSN_3(ALU, RSH, X), \ 1456 INSN_3(ALU, XOR, X), \ 1457 INSN_3(ALU, MUL, X), \ 1458 INSN_3(ALU, MOV, X), \ 1459 INSN_3(ALU, ARSH, X), \ 1460 INSN_3(ALU, DIV, X), \ 1461 INSN_3(ALU, MOD, X), \ 1462 INSN_2(ALU, NEG), \ 1463 INSN_3(ALU, END, TO_BE), \ 1464 INSN_3(ALU, END, TO_LE), \ 1465 /* Immediate based. */ \ 1466 INSN_3(ALU, ADD, K), \ 1467 INSN_3(ALU, SUB, K), \ 1468 INSN_3(ALU, AND, K), \ 1469 INSN_3(ALU, OR, K), \ 1470 INSN_3(ALU, LSH, K), \ 1471 INSN_3(ALU, RSH, K), \ 1472 INSN_3(ALU, XOR, K), \ 1473 INSN_3(ALU, MUL, K), \ 1474 INSN_3(ALU, MOV, K), \ 1475 INSN_3(ALU, ARSH, K), \ 1476 INSN_3(ALU, DIV, K), \ 1477 INSN_3(ALU, MOD, K), \ 1478 /* 64 bit ALU operations. */ \ 1479 /* Register based. */ \ 1480 INSN_3(ALU64, ADD, X), \ 1481 INSN_3(ALU64, SUB, X), \ 1482 INSN_3(ALU64, AND, X), \ 1483 INSN_3(ALU64, OR, X), \ 1484 INSN_3(ALU64, LSH, X), \ 1485 INSN_3(ALU64, RSH, X), \ 1486 INSN_3(ALU64, XOR, X), \ 1487 INSN_3(ALU64, MUL, X), \ 1488 INSN_3(ALU64, MOV, X), \ 1489 INSN_3(ALU64, ARSH, X), \ 1490 INSN_3(ALU64, DIV, X), \ 1491 INSN_3(ALU64, MOD, X), \ 1492 INSN_2(ALU64, NEG), \ 1493 /* Immediate based. */ \ 1494 INSN_3(ALU64, ADD, K), \ 1495 INSN_3(ALU64, SUB, K), \ 1496 INSN_3(ALU64, AND, K), \ 1497 INSN_3(ALU64, OR, K), \ 1498 INSN_3(ALU64, LSH, K), \ 1499 INSN_3(ALU64, RSH, K), \ 1500 INSN_3(ALU64, XOR, K), \ 1501 INSN_3(ALU64, MUL, K), \ 1502 INSN_3(ALU64, MOV, K), \ 1503 INSN_3(ALU64, ARSH, K), \ 1504 INSN_3(ALU64, DIV, K), \ 1505 INSN_3(ALU64, MOD, K), \ 1506 /* Call instruction. */ \ 1507 INSN_2(JMP, CALL), \ 1508 /* Exit instruction. */ \ 1509 INSN_2(JMP, EXIT), \ 1510 /* 32-bit Jump instructions. */ \ 1511 /* Register based. */ \ 1512 INSN_3(JMP32, JEQ, X), \ 1513 INSN_3(JMP32, JNE, X), \ 1514 INSN_3(JMP32, JGT, X), \ 1515 INSN_3(JMP32, JLT, X), \ 1516 INSN_3(JMP32, JGE, X), \ 1517 INSN_3(JMP32, JLE, X), \ 1518 INSN_3(JMP32, JSGT, X), \ 1519 INSN_3(JMP32, JSLT, X), \ 1520 INSN_3(JMP32, JSGE, X), \ 1521 INSN_3(JMP32, JSLE, X), \ 1522 INSN_3(JMP32, JSET, X), \ 1523 /* Immediate based. */ \ 1524 INSN_3(JMP32, JEQ, K), \ 1525 INSN_3(JMP32, JNE, K), \ 1526 INSN_3(JMP32, JGT, K), \ 1527 INSN_3(JMP32, JLT, K), \ 1528 INSN_3(JMP32, JGE, K), \ 1529 INSN_3(JMP32, JLE, K), \ 1530 INSN_3(JMP32, JSGT, K), \ 1531 INSN_3(JMP32, JSLT, K), \ 1532 INSN_3(JMP32, JSGE, K), \ 1533 INSN_3(JMP32, JSLE, K), \ 1534 INSN_3(JMP32, JSET, K), \ 1535 /* Jump instructions. */ \ 1536 /* Register based. */ \ 1537 INSN_3(JMP, JEQ, X), \ 1538 INSN_3(JMP, JNE, X), \ 1539 INSN_3(JMP, JGT, X), \ 1540 INSN_3(JMP, JLT, X), \ 1541 INSN_3(JMP, JGE, X), \ 1542 INSN_3(JMP, JLE, X), \ 1543 INSN_3(JMP, JSGT, X), \ 1544 INSN_3(JMP, JSLT, X), \ 1545 INSN_3(JMP, JSGE, X), \ 1546 INSN_3(JMP, JSLE, X), \ 1547 INSN_3(JMP, JSET, X), \ 1548 /* Immediate based. */ \ 1549 INSN_3(JMP, JEQ, K), \ 1550 INSN_3(JMP, JNE, K), \ 1551 INSN_3(JMP, JGT, K), \ 1552 INSN_3(JMP, JLT, K), \ 1553 INSN_3(JMP, JGE, K), \ 1554 INSN_3(JMP, JLE, K), \ 1555 INSN_3(JMP, JSGT, K), \ 1556 INSN_3(JMP, JSLT, K), \ 1557 INSN_3(JMP, JSGE, K), \ 1558 INSN_3(JMP, JSLE, K), \ 1559 INSN_3(JMP, JSET, K), \ 1560 INSN_2(JMP, JA), \ 1561 /* Store instructions. */ \ 1562 /* Register based. */ \ 1563 INSN_3(STX, MEM, B), \ 1564 INSN_3(STX, MEM, H), \ 1565 INSN_3(STX, MEM, W), \ 1566 INSN_3(STX, MEM, DW), \ 1567 INSN_3(STX, ATOMIC, W), \ 1568 INSN_3(STX, ATOMIC, DW), \ 1569 /* Immediate based. */ \ 1570 INSN_3(ST, MEM, B), \ 1571 INSN_3(ST, MEM, H), \ 1572 INSN_3(ST, MEM, W), \ 1573 INSN_3(ST, MEM, DW), \ 1574 /* Load instructions. */ \ 1575 /* Register based. */ \ 1576 INSN_3(LDX, MEM, B), \ 1577 INSN_3(LDX, MEM, H), \ 1578 INSN_3(LDX, MEM, W), \ 1579 INSN_3(LDX, MEM, DW), \ 1580 /* Immediate based. */ \ 1581 INSN_3(LD, IMM, DW) 1582 1583 bool bpf_opcode_in_insntable(u8 code) 1584 { 1585 #define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true 1586 #define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true 1587 static const bool public_insntable[256] = { 1588 [0 ... 255] = false, 1589 /* Now overwrite non-defaults ... */ 1590 BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), 1591 /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */ 1592 [BPF_LD | BPF_ABS | BPF_B] = true, 1593 [BPF_LD | BPF_ABS | BPF_H] = true, 1594 [BPF_LD | BPF_ABS | BPF_W] = true, 1595 [BPF_LD | BPF_IND | BPF_B] = true, 1596 [BPF_LD | BPF_IND | BPF_H] = true, 1597 [BPF_LD | BPF_IND | BPF_W] = true, 1598 }; 1599 #undef BPF_INSN_3_TBL 1600 #undef BPF_INSN_2_TBL 1601 return public_insntable[code]; 1602 } 1603 1604 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 1605 u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) 1606 { 1607 memset(dst, 0, size); 1608 return -EFAULT; 1609 } 1610 1611 /** 1612 * ___bpf_prog_run - run eBPF program on a given context 1613 * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers 1614 * @insn: is the array of eBPF instructions 1615 * 1616 * Decode and execute eBPF instructions. 1617 * 1618 * Return: whatever value is in %BPF_R0 at program exit 1619 */ 1620 static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) 1621 { 1622 #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y 1623 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z 1624 static const void * const jumptable[256] __annotate_jump_table = { 1625 [0 ... 255] = &&default_label, 1626 /* Now overwrite non-defaults ... */ 1627 BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL), 1628 /* Non-UAPI available opcodes. */ 1629 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, 1630 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, 1631 [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, 1632 [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, 1633 [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, 1634 [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, 1635 [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW, 1636 }; 1637 #undef BPF_INSN_3_LBL 1638 #undef BPF_INSN_2_LBL 1639 u32 tail_call_cnt = 0; 1640 1641 #define CONT ({ insn++; goto select_insn; }) 1642 #define CONT_JMP ({ insn++; goto select_insn; }) 1643 1644 select_insn: 1645 goto *jumptable[insn->code]; 1646 1647 /* Explicitly mask the register-based shift amounts with 63 or 31 1648 * to avoid undefined behavior. Normally this won't affect the 1649 * generated code, for example, in case of native 64 bit archs such 1650 * as x86-64 or arm64, the compiler is optimizing the AND away for 1651 * the interpreter. In case of JITs, each of the JIT backends compiles 1652 * the BPF shift operations to machine instructions which produce 1653 * implementation-defined results in such a case; the resulting 1654 * contents of the register may be arbitrary, but program behaviour 1655 * as a whole remains defined. In other words, in case of JIT backends, 1656 * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation. 1657 */ 1658 /* ALU (shifts) */ 1659 #define SHT(OPCODE, OP) \ 1660 ALU64_##OPCODE##_X: \ 1661 DST = DST OP (SRC & 63); \ 1662 CONT; \ 1663 ALU_##OPCODE##_X: \ 1664 DST = (u32) DST OP ((u32) SRC & 31); \ 1665 CONT; \ 1666 ALU64_##OPCODE##_K: \ 1667 DST = DST OP IMM; \ 1668 CONT; \ 1669 ALU_##OPCODE##_K: \ 1670 DST = (u32) DST OP (u32) IMM; \ 1671 CONT; 1672 /* ALU (rest) */ 1673 #define ALU(OPCODE, OP) \ 1674 ALU64_##OPCODE##_X: \ 1675 DST = DST OP SRC; \ 1676 CONT; \ 1677 ALU_##OPCODE##_X: \ 1678 DST = (u32) DST OP (u32) SRC; \ 1679 CONT; \ 1680 ALU64_##OPCODE##_K: \ 1681 DST = DST OP IMM; \ 1682 CONT; \ 1683 ALU_##OPCODE##_K: \ 1684 DST = (u32) DST OP (u32) IMM; \ 1685 CONT; 1686 ALU(ADD, +) 1687 ALU(SUB, -) 1688 ALU(AND, &) 1689 ALU(OR, |) 1690 ALU(XOR, ^) 1691 ALU(MUL, *) 1692 SHT(LSH, <<) 1693 SHT(RSH, >>) 1694 #undef SHT 1695 #undef ALU 1696 ALU_NEG: 1697 DST = (u32) -DST; 1698 CONT; 1699 ALU64_NEG: 1700 DST = -DST; 1701 CONT; 1702 ALU_MOV_X: 1703 DST = (u32) SRC; 1704 CONT; 1705 ALU_MOV_K: 1706 DST = (u32) IMM; 1707 CONT; 1708 ALU64_MOV_X: 1709 DST = SRC; 1710 CONT; 1711 ALU64_MOV_K: 1712 DST = IMM; 1713 CONT; 1714 LD_IMM_DW: 1715 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; 1716 insn++; 1717 CONT; 1718 ALU_ARSH_X: 1719 DST = (u64) (u32) (((s32) DST) >> (SRC & 31)); 1720 CONT; 1721 ALU_ARSH_K: 1722 DST = (u64) (u32) (((s32) DST) >> IMM); 1723 CONT; 1724 ALU64_ARSH_X: 1725 (*(s64 *) &DST) >>= (SRC & 63); 1726 CONT; 1727 ALU64_ARSH_K: 1728 (*(s64 *) &DST) >>= IMM; 1729 CONT; 1730 ALU64_MOD_X: 1731 div64_u64_rem(DST, SRC, &AX); 1732 DST = AX; 1733 CONT; 1734 ALU_MOD_X: 1735 AX = (u32) DST; 1736 DST = do_div(AX, (u32) SRC); 1737 CONT; 1738 ALU64_MOD_K: 1739 div64_u64_rem(DST, IMM, &AX); 1740 DST = AX; 1741 CONT; 1742 ALU_MOD_K: 1743 AX = (u32) DST; 1744 DST = do_div(AX, (u32) IMM); 1745 CONT; 1746 ALU64_DIV_X: 1747 DST = div64_u64(DST, SRC); 1748 CONT; 1749 ALU_DIV_X: 1750 AX = (u32) DST; 1751 do_div(AX, (u32) SRC); 1752 DST = (u32) AX; 1753 CONT; 1754 ALU64_DIV_K: 1755 DST = div64_u64(DST, IMM); 1756 CONT; 1757 ALU_DIV_K: 1758 AX = (u32) DST; 1759 do_div(AX, (u32) IMM); 1760 DST = (u32) AX; 1761 CONT; 1762 ALU_END_TO_BE: 1763 switch (IMM) { 1764 case 16: 1765 DST = (__force u16) cpu_to_be16(DST); 1766 break; 1767 case 32: 1768 DST = (__force u32) cpu_to_be32(DST); 1769 break; 1770 case 64: 1771 DST = (__force u64) cpu_to_be64(DST); 1772 break; 1773 } 1774 CONT; 1775 ALU_END_TO_LE: 1776 switch (IMM) { 1777 case 16: 1778 DST = (__force u16) cpu_to_le16(DST); 1779 break; 1780 case 32: 1781 DST = (__force u32) cpu_to_le32(DST); 1782 break; 1783 case 64: 1784 DST = (__force u64) cpu_to_le64(DST); 1785 break; 1786 } 1787 CONT; 1788 1789 /* CALL */ 1790 JMP_CALL: 1791 /* Function call scratches BPF_R1-BPF_R5 registers, 1792 * preserves BPF_R6-BPF_R9, and stores return value 1793 * into BPF_R0. 1794 */ 1795 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, 1796 BPF_R4, BPF_R5); 1797 CONT; 1798 1799 JMP_CALL_ARGS: 1800 BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2, 1801 BPF_R3, BPF_R4, 1802 BPF_R5, 1803 insn + insn->off + 1); 1804 CONT; 1805 1806 JMP_TAIL_CALL: { 1807 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; 1808 struct bpf_array *array = container_of(map, struct bpf_array, map); 1809 struct bpf_prog *prog; 1810 u32 index = BPF_R3; 1811 1812 if (unlikely(index >= array->map.max_entries)) 1813 goto out; 1814 1815 if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) 1816 goto out; 1817 1818 tail_call_cnt++; 1819 1820 prog = READ_ONCE(array->ptrs[index]); 1821 if (!prog) 1822 goto out; 1823 1824 /* ARG1 at this point is guaranteed to point to CTX from 1825 * the verifier side due to the fact that the tail call is 1826 * handled like a helper, that is, bpf_tail_call_proto, 1827 * where arg1_type is ARG_PTR_TO_CTX. 1828 */ 1829 insn = prog->insnsi; 1830 goto select_insn; 1831 out: 1832 CONT; 1833 } 1834 JMP_JA: 1835 insn += insn->off; 1836 CONT; 1837 JMP_EXIT: 1838 return BPF_R0; 1839 /* JMP */ 1840 #define COND_JMP(SIGN, OPCODE, CMP_OP) \ 1841 JMP_##OPCODE##_X: \ 1842 if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \ 1843 insn += insn->off; \ 1844 CONT_JMP; \ 1845 } \ 1846 CONT; \ 1847 JMP32_##OPCODE##_X: \ 1848 if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \ 1849 insn += insn->off; \ 1850 CONT_JMP; \ 1851 } \ 1852 CONT; \ 1853 JMP_##OPCODE##_K: \ 1854 if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \ 1855 insn += insn->off; \ 1856 CONT_JMP; \ 1857 } \ 1858 CONT; \ 1859 JMP32_##OPCODE##_K: \ 1860 if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \ 1861 insn += insn->off; \ 1862 CONT_JMP; \ 1863 } \ 1864 CONT; 1865 COND_JMP(u, JEQ, ==) 1866 COND_JMP(u, JNE, !=) 1867 COND_JMP(u, JGT, >) 1868 COND_JMP(u, JLT, <) 1869 COND_JMP(u, JGE, >=) 1870 COND_JMP(u, JLE, <=) 1871 COND_JMP(u, JSET, &) 1872 COND_JMP(s, JSGT, >) 1873 COND_JMP(s, JSLT, <) 1874 COND_JMP(s, JSGE, >=) 1875 COND_JMP(s, JSLE, <=) 1876 #undef COND_JMP 1877 /* ST, STX and LDX*/ 1878 ST_NOSPEC: 1879 /* Speculation barrier for mitigating Speculative Store Bypass. 1880 * In case of arm64, we rely on the firmware mitigation as 1881 * controlled via the ssbd kernel parameter. Whenever the 1882 * mitigation is enabled, it works for all of the kernel code 1883 * with no need to provide any additional instructions here. 1884 * In case of x86, we use 'lfence' insn for mitigation. We 1885 * reuse preexisting logic from Spectre v1 mitigation that 1886 * happens to produce the required code on x86 for v4 as well. 1887 */ 1888 #ifdef CONFIG_X86 1889 barrier_nospec(); 1890 #endif 1891 CONT; 1892 #define LDST(SIZEOP, SIZE) \ 1893 STX_MEM_##SIZEOP: \ 1894 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ 1895 CONT; \ 1896 ST_MEM_##SIZEOP: \ 1897 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ 1898 CONT; \ 1899 LDX_MEM_##SIZEOP: \ 1900 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ 1901 CONT; 1902 1903 LDST(B, u8) 1904 LDST(H, u16) 1905 LDST(W, u32) 1906 LDST(DW, u64) 1907 #undef LDST 1908 #define LDX_PROBE(SIZEOP, SIZE) \ 1909 LDX_PROBE_MEM_##SIZEOP: \ 1910 bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ 1911 CONT; 1912 LDX_PROBE(B, 1) 1913 LDX_PROBE(H, 2) 1914 LDX_PROBE(W, 4) 1915 LDX_PROBE(DW, 8) 1916 #undef LDX_PROBE 1917 1918 #define ATOMIC_ALU_OP(BOP, KOP) \ 1919 case BOP: \ 1920 if (BPF_SIZE(insn->code) == BPF_W) \ 1921 atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ 1922 (DST + insn->off)); \ 1923 else \ 1924 atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ 1925 (DST + insn->off)); \ 1926 break; \ 1927 case BOP | BPF_FETCH: \ 1928 if (BPF_SIZE(insn->code) == BPF_W) \ 1929 SRC = (u32) atomic_fetch_##KOP( \ 1930 (u32) SRC, \ 1931 (atomic_t *)(unsigned long) (DST + insn->off)); \ 1932 else \ 1933 SRC = (u64) atomic64_fetch_##KOP( \ 1934 (u64) SRC, \ 1935 (atomic64_t *)(unsigned long) (DST + insn->off)); \ 1936 break; 1937 1938 STX_ATOMIC_DW: 1939 STX_ATOMIC_W: 1940 switch (IMM) { 1941 ATOMIC_ALU_OP(BPF_ADD, add) 1942 ATOMIC_ALU_OP(BPF_AND, and) 1943 ATOMIC_ALU_OP(BPF_OR, or) 1944 ATOMIC_ALU_OP(BPF_XOR, xor) 1945 #undef ATOMIC_ALU_OP 1946 1947 case BPF_XCHG: 1948 if (BPF_SIZE(insn->code) == BPF_W) 1949 SRC = (u32) atomic_xchg( 1950 (atomic_t *)(unsigned long) (DST + insn->off), 1951 (u32) SRC); 1952 else 1953 SRC = (u64) atomic64_xchg( 1954 (atomic64_t *)(unsigned long) (DST + insn->off), 1955 (u64) SRC); 1956 break; 1957 case BPF_CMPXCHG: 1958 if (BPF_SIZE(insn->code) == BPF_W) 1959 BPF_R0 = (u32) atomic_cmpxchg( 1960 (atomic_t *)(unsigned long) (DST + insn->off), 1961 (u32) BPF_R0, (u32) SRC); 1962 else 1963 BPF_R0 = (u64) atomic64_cmpxchg( 1964 (atomic64_t *)(unsigned long) (DST + insn->off), 1965 (u64) BPF_R0, (u64) SRC); 1966 break; 1967 1968 default: 1969 goto default_label; 1970 } 1971 CONT; 1972 1973 default_label: 1974 /* If we ever reach this, we have a bug somewhere. Die hard here 1975 * instead of just returning 0; we could be somewhere in a subprog, 1976 * so execution could continue otherwise which we do /not/ want. 1977 * 1978 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable(). 1979 */ 1980 pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n", 1981 insn->code, insn->imm); 1982 BUG_ON(1); 1983 return 0; 1984 } 1985 1986 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size 1987 #define DEFINE_BPF_PROG_RUN(stack_size) \ 1988 static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ 1989 { \ 1990 u64 stack[stack_size / sizeof(u64)]; \ 1991 u64 regs[MAX_BPF_EXT_REG]; \ 1992 \ 1993 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ 1994 ARG1 = (u64) (unsigned long) ctx; \ 1995 return ___bpf_prog_run(regs, insn); \ 1996 } 1997 1998 #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size 1999 #define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \ 2000 static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ 2001 const struct bpf_insn *insn) \ 2002 { \ 2003 u64 stack[stack_size / sizeof(u64)]; \ 2004 u64 regs[MAX_BPF_EXT_REG]; \ 2005 \ 2006 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ 2007 BPF_R1 = r1; \ 2008 BPF_R2 = r2; \ 2009 BPF_R3 = r3; \ 2010 BPF_R4 = r4; \ 2011 BPF_R5 = r5; \ 2012 return ___bpf_prog_run(regs, insn); \ 2013 } 2014 2015 #define EVAL1(FN, X) FN(X) 2016 #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) 2017 #define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) 2018 #define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y) 2019 #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y) 2020 #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y) 2021 2022 EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192); 2023 EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); 2024 EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); 2025 2026 EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192); 2027 EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384); 2028 EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512); 2029 2030 #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), 2031 2032 static unsigned int (*interpreters[])(const void *ctx, 2033 const struct bpf_insn *insn) = { 2034 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) 2035 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) 2036 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 2037 }; 2038 #undef PROG_NAME_LIST 2039 #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), 2040 static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, 2041 const struct bpf_insn *insn) = { 2042 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) 2043 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) 2044 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 2045 }; 2046 #undef PROG_NAME_LIST 2047 2048 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) 2049 { 2050 stack_depth = max_t(u32, stack_depth, 1); 2051 insn->off = (s16) insn->imm; 2052 insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - 2053 __bpf_call_base_args; 2054 insn->code = BPF_JMP | BPF_CALL_ARGS; 2055 } 2056 2057 #else 2058 static unsigned int __bpf_prog_ret0_warn(const void *ctx, 2059 const struct bpf_insn *insn) 2060 { 2061 /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON 2062 * is not working properly, so warn about it! 2063 */ 2064 WARN_ON_ONCE(1); 2065 return 0; 2066 } 2067 #endif 2068 2069 bool bpf_prog_map_compatible(struct bpf_map *map, 2070 const struct bpf_prog *fp) 2071 { 2072 bool ret; 2073 2074 if (fp->kprobe_override) 2075 return false; 2076 2077 spin_lock(&map->owner.lock); 2078 if (!map->owner.type) { 2079 /* There's no owner yet where we could check for 2080 * compatibility. 2081 */ 2082 map->owner.type = fp->type; 2083 map->owner.jited = fp->jited; 2084 map->owner.xdp_has_frags = fp->aux->xdp_has_frags; 2085 ret = true; 2086 } else { 2087 ret = map->owner.type == fp->type && 2088 map->owner.jited == fp->jited && 2089 map->owner.xdp_has_frags == fp->aux->xdp_has_frags; 2090 } 2091 spin_unlock(&map->owner.lock); 2092 2093 return ret; 2094 } 2095 2096 static int bpf_check_tail_call(const struct bpf_prog *fp) 2097 { 2098 struct bpf_prog_aux *aux = fp->aux; 2099 int i, ret = 0; 2100 2101 mutex_lock(&aux->used_maps_mutex); 2102 for (i = 0; i < aux->used_map_cnt; i++) { 2103 struct bpf_map *map = aux->used_maps[i]; 2104 2105 if (!map_type_contains_progs(map)) 2106 continue; 2107 2108 if (!bpf_prog_map_compatible(map, fp)) { 2109 ret = -EINVAL; 2110 goto out; 2111 } 2112 } 2113 2114 out: 2115 mutex_unlock(&aux->used_maps_mutex); 2116 return ret; 2117 } 2118 2119 static void bpf_prog_select_func(struct bpf_prog *fp) 2120 { 2121 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 2122 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); 2123 2124 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 2125 #else 2126 fp->bpf_func = __bpf_prog_ret0_warn; 2127 #endif 2128 } 2129 2130 /** 2131 * bpf_prog_select_runtime - select exec runtime for BPF program 2132 * @fp: bpf_prog populated with BPF program 2133 * @err: pointer to error variable 2134 * 2135 * Try to JIT eBPF program, if JIT is not available, use interpreter. 2136 * The BPF program will be executed via bpf_prog_run() function. 2137 * 2138 * Return: the &fp argument along with &err set to 0 for success or 2139 * a negative errno code on failure 2140 */ 2141 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) 2142 { 2143 /* In case of BPF to BPF calls, verifier did all the prep 2144 * work with regards to JITing, etc. 2145 */ 2146 bool jit_needed = false; 2147 2148 if (fp->bpf_func) 2149 goto finalize; 2150 2151 if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) || 2152 bpf_prog_has_kfunc_call(fp)) 2153 jit_needed = true; 2154 2155 bpf_prog_select_func(fp); 2156 2157 /* eBPF JITs can rewrite the program in case constant 2158 * blinding is active. However, in case of error during 2159 * blinding, bpf_int_jit_compile() must always return a 2160 * valid program, which in this case would simply not 2161 * be JITed, but falls back to the interpreter. 2162 */ 2163 if (!bpf_prog_is_dev_bound(fp->aux)) { 2164 *err = bpf_prog_alloc_jited_linfo(fp); 2165 if (*err) 2166 return fp; 2167 2168 fp = bpf_int_jit_compile(fp); 2169 bpf_prog_jit_attempt_done(fp); 2170 if (!fp->jited && jit_needed) { 2171 *err = -ENOTSUPP; 2172 return fp; 2173 } 2174 } else { 2175 *err = bpf_prog_offload_compile(fp); 2176 if (*err) 2177 return fp; 2178 } 2179 2180 finalize: 2181 bpf_prog_lock_ro(fp); 2182 2183 /* The tail call compatibility check can only be done at 2184 * this late stage as we need to determine, if we deal 2185 * with JITed or non JITed program concatenations and not 2186 * all eBPF JITs might immediately support all features. 2187 */ 2188 *err = bpf_check_tail_call(fp); 2189 2190 return fp; 2191 } 2192 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); 2193 2194 static unsigned int __bpf_prog_ret1(const void *ctx, 2195 const struct bpf_insn *insn) 2196 { 2197 return 1; 2198 } 2199 2200 static struct bpf_prog_dummy { 2201 struct bpf_prog prog; 2202 } dummy_bpf_prog = { 2203 .prog = { 2204 .bpf_func = __bpf_prog_ret1, 2205 }, 2206 }; 2207 2208 struct bpf_empty_prog_array bpf_empty_prog_array = { 2209 .null_prog = NULL, 2210 }; 2211 EXPORT_SYMBOL(bpf_empty_prog_array); 2212 2213 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) 2214 { 2215 if (prog_cnt) 2216 return kzalloc(sizeof(struct bpf_prog_array) + 2217 sizeof(struct bpf_prog_array_item) * 2218 (prog_cnt + 1), 2219 flags); 2220 2221 return &bpf_empty_prog_array.hdr; 2222 } 2223 2224 void bpf_prog_array_free(struct bpf_prog_array *progs) 2225 { 2226 if (!progs || progs == &bpf_empty_prog_array.hdr) 2227 return; 2228 kfree_rcu(progs, rcu); 2229 } 2230 2231 int bpf_prog_array_length(struct bpf_prog_array *array) 2232 { 2233 struct bpf_prog_array_item *item; 2234 u32 cnt = 0; 2235 2236 for (item = array->items; item->prog; item++) 2237 if (item->prog != &dummy_bpf_prog.prog) 2238 cnt++; 2239 return cnt; 2240 } 2241 2242 bool bpf_prog_array_is_empty(struct bpf_prog_array *array) 2243 { 2244 struct bpf_prog_array_item *item; 2245 2246 for (item = array->items; item->prog; item++) 2247 if (item->prog != &dummy_bpf_prog.prog) 2248 return false; 2249 return true; 2250 } 2251 2252 static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, 2253 u32 *prog_ids, 2254 u32 request_cnt) 2255 { 2256 struct bpf_prog_array_item *item; 2257 int i = 0; 2258 2259 for (item = array->items; item->prog; item++) { 2260 if (item->prog == &dummy_bpf_prog.prog) 2261 continue; 2262 prog_ids[i] = item->prog->aux->id; 2263 if (++i == request_cnt) { 2264 item++; 2265 break; 2266 } 2267 } 2268 2269 return !!(item->prog); 2270 } 2271 2272 int bpf_prog_array_copy_to_user(struct bpf_prog_array *array, 2273 __u32 __user *prog_ids, u32 cnt) 2274 { 2275 unsigned long err = 0; 2276 bool nospc; 2277 u32 *ids; 2278 2279 /* users of this function are doing: 2280 * cnt = bpf_prog_array_length(); 2281 * if (cnt > 0) 2282 * bpf_prog_array_copy_to_user(..., cnt); 2283 * so below kcalloc doesn't need extra cnt > 0 check. 2284 */ 2285 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); 2286 if (!ids) 2287 return -ENOMEM; 2288 nospc = bpf_prog_array_copy_core(array, ids, cnt); 2289 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); 2290 kfree(ids); 2291 if (err) 2292 return -EFAULT; 2293 if (nospc) 2294 return -ENOSPC; 2295 return 0; 2296 } 2297 2298 void bpf_prog_array_delete_safe(struct bpf_prog_array *array, 2299 struct bpf_prog *old_prog) 2300 { 2301 struct bpf_prog_array_item *item; 2302 2303 for (item = array->items; item->prog; item++) 2304 if (item->prog == old_prog) { 2305 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); 2306 break; 2307 } 2308 } 2309 2310 /** 2311 * bpf_prog_array_delete_safe_at() - Replaces the program at the given 2312 * index into the program array with 2313 * a dummy no-op program. 2314 * @array: a bpf_prog_array 2315 * @index: the index of the program to replace 2316 * 2317 * Skips over dummy programs, by not counting them, when calculating 2318 * the position of the program to replace. 2319 * 2320 * Return: 2321 * * 0 - Success 2322 * * -EINVAL - Invalid index value. Must be a non-negative integer. 2323 * * -ENOENT - Index out of range 2324 */ 2325 int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index) 2326 { 2327 return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog); 2328 } 2329 2330 /** 2331 * bpf_prog_array_update_at() - Updates the program at the given index 2332 * into the program array. 2333 * @array: a bpf_prog_array 2334 * @index: the index of the program to update 2335 * @prog: the program to insert into the array 2336 * 2337 * Skips over dummy programs, by not counting them, when calculating 2338 * the position of the program to update. 2339 * 2340 * Return: 2341 * * 0 - Success 2342 * * -EINVAL - Invalid index value. Must be a non-negative integer. 2343 * * -ENOENT - Index out of range 2344 */ 2345 int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, 2346 struct bpf_prog *prog) 2347 { 2348 struct bpf_prog_array_item *item; 2349 2350 if (unlikely(index < 0)) 2351 return -EINVAL; 2352 2353 for (item = array->items; item->prog; item++) { 2354 if (item->prog == &dummy_bpf_prog.prog) 2355 continue; 2356 if (!index) { 2357 WRITE_ONCE(item->prog, prog); 2358 return 0; 2359 } 2360 index--; 2361 } 2362 return -ENOENT; 2363 } 2364 2365 int bpf_prog_array_copy(struct bpf_prog_array *old_array, 2366 struct bpf_prog *exclude_prog, 2367 struct bpf_prog *include_prog, 2368 u64 bpf_cookie, 2369 struct bpf_prog_array **new_array) 2370 { 2371 int new_prog_cnt, carry_prog_cnt = 0; 2372 struct bpf_prog_array_item *existing, *new; 2373 struct bpf_prog_array *array; 2374 bool found_exclude = false; 2375 2376 /* Figure out how many existing progs we need to carry over to 2377 * the new array. 2378 */ 2379 if (old_array) { 2380 existing = old_array->items; 2381 for (; existing->prog; existing++) { 2382 if (existing->prog == exclude_prog) { 2383 found_exclude = true; 2384 continue; 2385 } 2386 if (existing->prog != &dummy_bpf_prog.prog) 2387 carry_prog_cnt++; 2388 if (existing->prog == include_prog) 2389 return -EEXIST; 2390 } 2391 } 2392 2393 if (exclude_prog && !found_exclude) 2394 return -ENOENT; 2395 2396 /* How many progs (not NULL) will be in the new array? */ 2397 new_prog_cnt = carry_prog_cnt; 2398 if (include_prog) 2399 new_prog_cnt += 1; 2400 2401 /* Do we have any prog (not NULL) in the new array? */ 2402 if (!new_prog_cnt) { 2403 *new_array = NULL; 2404 return 0; 2405 } 2406 2407 /* +1 as the end of prog_array is marked with NULL */ 2408 array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); 2409 if (!array) 2410 return -ENOMEM; 2411 new = array->items; 2412 2413 /* Fill in the new prog array */ 2414 if (carry_prog_cnt) { 2415 existing = old_array->items; 2416 for (; existing->prog; existing++) { 2417 if (existing->prog == exclude_prog || 2418 existing->prog == &dummy_bpf_prog.prog) 2419 continue; 2420 2421 new->prog = existing->prog; 2422 new->bpf_cookie = existing->bpf_cookie; 2423 new++; 2424 } 2425 } 2426 if (include_prog) { 2427 new->prog = include_prog; 2428 new->bpf_cookie = bpf_cookie; 2429 new++; 2430 } 2431 new->prog = NULL; 2432 *new_array = array; 2433 return 0; 2434 } 2435 2436 int bpf_prog_array_copy_info(struct bpf_prog_array *array, 2437 u32 *prog_ids, u32 request_cnt, 2438 u32 *prog_cnt) 2439 { 2440 u32 cnt = 0; 2441 2442 if (array) 2443 cnt = bpf_prog_array_length(array); 2444 2445 *prog_cnt = cnt; 2446 2447 /* return early if user requested only program count or nothing to copy */ 2448 if (!request_cnt || !cnt) 2449 return 0; 2450 2451 /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ 2452 return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC 2453 : 0; 2454 } 2455 2456 void __bpf_free_used_maps(struct bpf_prog_aux *aux, 2457 struct bpf_map **used_maps, u32 len) 2458 { 2459 struct bpf_map *map; 2460 u32 i; 2461 2462 for (i = 0; i < len; i++) { 2463 map = used_maps[i]; 2464 if (map->ops->map_poke_untrack) 2465 map->ops->map_poke_untrack(map, aux); 2466 bpf_map_put(map); 2467 } 2468 } 2469 2470 static void bpf_free_used_maps(struct bpf_prog_aux *aux) 2471 { 2472 __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt); 2473 kfree(aux->used_maps); 2474 } 2475 2476 void __bpf_free_used_btfs(struct bpf_prog_aux *aux, 2477 struct btf_mod_pair *used_btfs, u32 len) 2478 { 2479 #ifdef CONFIG_BPF_SYSCALL 2480 struct btf_mod_pair *btf_mod; 2481 u32 i; 2482 2483 for (i = 0; i < len; i++) { 2484 btf_mod = &used_btfs[i]; 2485 if (btf_mod->module) 2486 module_put(btf_mod->module); 2487 btf_put(btf_mod->btf); 2488 } 2489 #endif 2490 } 2491 2492 static void bpf_free_used_btfs(struct bpf_prog_aux *aux) 2493 { 2494 __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt); 2495 kfree(aux->used_btfs); 2496 } 2497 2498 static void bpf_prog_free_deferred(struct work_struct *work) 2499 { 2500 struct bpf_prog_aux *aux; 2501 int i; 2502 2503 aux = container_of(work, struct bpf_prog_aux, work); 2504 #ifdef CONFIG_BPF_SYSCALL 2505 bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); 2506 #endif 2507 bpf_free_used_maps(aux); 2508 bpf_free_used_btfs(aux); 2509 if (bpf_prog_is_dev_bound(aux)) 2510 bpf_prog_offload_destroy(aux->prog); 2511 #ifdef CONFIG_PERF_EVENTS 2512 if (aux->prog->has_callchain_buf) 2513 put_callchain_buffers(); 2514 #endif 2515 if (aux->dst_trampoline) 2516 bpf_trampoline_put(aux->dst_trampoline); 2517 for (i = 0; i < aux->func_cnt; i++) { 2518 /* We can just unlink the subprog poke descriptor table as 2519 * it was originally linked to the main program and is also 2520 * released along with it. 2521 */ 2522 aux->func[i]->aux->poke_tab = NULL; 2523 bpf_jit_free(aux->func[i]); 2524 } 2525 if (aux->func_cnt) { 2526 kfree(aux->func); 2527 bpf_prog_unlock_free(aux->prog); 2528 } else { 2529 bpf_jit_free(aux->prog); 2530 } 2531 } 2532 2533 void bpf_prog_free(struct bpf_prog *fp) 2534 { 2535 struct bpf_prog_aux *aux = fp->aux; 2536 2537 if (aux->dst_prog) 2538 bpf_prog_put(aux->dst_prog); 2539 INIT_WORK(&aux->work, bpf_prog_free_deferred); 2540 schedule_work(&aux->work); 2541 } 2542 EXPORT_SYMBOL_GPL(bpf_prog_free); 2543 2544 /* RNG for unpriviledged user space with separated state from prandom_u32(). */ 2545 static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state); 2546 2547 void bpf_user_rnd_init_once(void) 2548 { 2549 prandom_init_once(&bpf_user_rnd_state); 2550 } 2551 2552 BPF_CALL_0(bpf_user_rnd_u32) 2553 { 2554 /* Should someone ever have the rather unwise idea to use some 2555 * of the registers passed into this function, then note that 2556 * this function is called from native eBPF and classic-to-eBPF 2557 * transformations. Register assignments from both sides are 2558 * different, f.e. classic always sets fn(ctx, A, X) here. 2559 */ 2560 struct rnd_state *state; 2561 u32 res; 2562 2563 state = &get_cpu_var(bpf_user_rnd_state); 2564 res = prandom_u32_state(state); 2565 put_cpu_var(bpf_user_rnd_state); 2566 2567 return res; 2568 } 2569 2570 BPF_CALL_0(bpf_get_raw_cpu_id) 2571 { 2572 return raw_smp_processor_id(); 2573 } 2574 2575 /* Weak definitions of helper functions in case we don't have bpf syscall. */ 2576 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; 2577 const struct bpf_func_proto bpf_map_update_elem_proto __weak; 2578 const struct bpf_func_proto bpf_map_delete_elem_proto __weak; 2579 const struct bpf_func_proto bpf_map_push_elem_proto __weak; 2580 const struct bpf_func_proto bpf_map_pop_elem_proto __weak; 2581 const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 2582 const struct bpf_func_proto bpf_spin_lock_proto __weak; 2583 const struct bpf_func_proto bpf_spin_unlock_proto __weak; 2584 const struct bpf_func_proto bpf_jiffies64_proto __weak; 2585 2586 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 2587 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; 2588 const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; 2589 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; 2590 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; 2591 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; 2592 2593 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; 2594 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; 2595 const struct bpf_func_proto bpf_get_current_comm_proto __weak; 2596 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; 2597 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak; 2598 const struct bpf_func_proto bpf_get_local_storage_proto __weak; 2599 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; 2600 const struct bpf_func_proto bpf_snprintf_btf_proto __weak; 2601 const struct bpf_func_proto bpf_seq_printf_btf_proto __weak; 2602 2603 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) 2604 { 2605 return NULL; 2606 } 2607 2608 const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void) 2609 { 2610 return NULL; 2611 } 2612 2613 u64 __weak 2614 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 2615 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 2616 { 2617 return -ENOTSUPP; 2618 } 2619 EXPORT_SYMBOL_GPL(bpf_event_output); 2620 2621 /* Always built-in helper functions. */ 2622 const struct bpf_func_proto bpf_tail_call_proto = { 2623 .func = NULL, 2624 .gpl_only = false, 2625 .ret_type = RET_VOID, 2626 .arg1_type = ARG_PTR_TO_CTX, 2627 .arg2_type = ARG_CONST_MAP_PTR, 2628 .arg3_type = ARG_ANYTHING, 2629 }; 2630 2631 /* Stub for JITs that only support cBPF. eBPF programs are interpreted. 2632 * It is encouraged to implement bpf_int_jit_compile() instead, so that 2633 * eBPF and implicitly also cBPF can get JITed! 2634 */ 2635 struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) 2636 { 2637 return prog; 2638 } 2639 2640 /* Stub for JITs that support eBPF. All cBPF code gets transformed into 2641 * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). 2642 */ 2643 void __weak bpf_jit_compile(struct bpf_prog *prog) 2644 { 2645 } 2646 2647 bool __weak bpf_helper_changes_pkt_data(void *func) 2648 { 2649 return false; 2650 } 2651 2652 /* Return TRUE if the JIT backend wants verifier to enable sub-register usage 2653 * analysis code and wants explicit zero extension inserted by verifier. 2654 * Otherwise, return FALSE. 2655 * 2656 * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if 2657 * you don't override this. JITs that don't want these extra insns can detect 2658 * them using insn_is_zext. 2659 */ 2660 bool __weak bpf_jit_needs_zext(void) 2661 { 2662 return false; 2663 } 2664 2665 bool __weak bpf_jit_supports_kfunc_call(void) 2666 { 2667 return false; 2668 } 2669 2670 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call 2671 * skb_copy_bits(), so provide a weak definition of it for NET-less config. 2672 */ 2673 int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, 2674 int len) 2675 { 2676 return -EFAULT; 2677 } 2678 2679 int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 2680 void *addr1, void *addr2) 2681 { 2682 return -ENOTSUPP; 2683 } 2684 2685 void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) 2686 { 2687 return ERR_PTR(-ENOTSUPP); 2688 } 2689 2690 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); 2691 EXPORT_SYMBOL(bpf_stats_enabled_key); 2692 2693 /* All definitions of tracepoints related to BPF. */ 2694 #define CREATE_TRACE_POINTS 2695 #include <linux/bpf_trace.h> 2696 2697 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); 2698 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx); 2699