1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/filter.h> 6 #include <linux/sort.h> 7 8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 9 10 /* non-recursive DFS pseudo code 11 * 1 procedure DFS-iterative(G,v): 12 * 2 label v as discovered 13 * 3 let S be a stack 14 * 4 S.push(v) 15 * 5 while S is not empty 16 * 6 t <- S.peek() 17 * 7 if t is what we're looking for: 18 * 8 return t 19 * 9 for all edges e in G.adjacentEdges(t) do 20 * 10 if edge e is already labelled 21 * 11 continue with the next edge 22 * 12 w <- G.adjacentVertex(t,e) 23 * 13 if vertex w is not discovered and not explored 24 * 14 label e as tree-edge 25 * 15 label w as discovered 26 * 16 S.push(w) 27 * 17 continue at 5 28 * 18 else if vertex w is discovered 29 * 19 label e as back-edge 30 * 20 else 31 * 21 // vertex w is explored 32 * 22 label e as forward- or cross-edge 33 * 23 label t as explored 34 * 24 S.pop() 35 * 36 * convention: 37 * 0x10 - discovered 38 * 0x11 - discovered and fall-through edge labelled 39 * 0x12 - discovered and fall-through and branch edges labelled 40 * 0x20 - explored 41 */ 42 43 enum { 44 DISCOVERED = 0x10, 45 EXPLORED = 0x20, 46 FALLTHROUGH = 1, 47 BRANCH = 2, 48 }; 49 50 51 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) 52 { 53 struct bpf_subprog_info *subprog; 54 55 subprog = bpf_find_containing_subprog(env, off); 56 subprog->changes_pkt_data = true; 57 } 58 59 static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) 60 { 61 struct bpf_subprog_info *subprog; 62 63 subprog = bpf_find_containing_subprog(env, off); 64 subprog->might_sleep = true; 65 } 66 67 static void mark_subprog_might_throw(struct bpf_verifier_env *env, int off) 68 { 69 struct bpf_subprog_info *subprog; 70 71 subprog = bpf_find_containing_subprog(env, off); 72 subprog->might_throw = true; 73 } 74 75 /* 't' is an index of a call-site. 76 * 'w' is a callee entry point. 77 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. 78 * Rely on DFS traversal order and absence of recursive calls to guarantee that 79 * callee's effect marks would be correct at that moment. 80 */ 81 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) 82 { 83 struct bpf_subprog_info *caller, *callee; 84 85 caller = bpf_find_containing_subprog(env, t); 86 callee = bpf_find_containing_subprog(env, w); 87 caller->changes_pkt_data |= callee->changes_pkt_data; 88 caller->might_sleep |= callee->might_sleep; 89 caller->might_throw |= callee->might_throw; 90 } 91 92 enum { 93 DONE_EXPLORING = 0, 94 KEEP_EXPLORING = 1, 95 }; 96 97 /* t, w, e - match pseudo-code above: 98 * t - index of current instruction 99 * w - next instruction 100 * e - edge 101 */ 102 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) 103 { 104 int *insn_stack = env->cfg.insn_stack; 105 int *insn_state = env->cfg.insn_state; 106 107 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 108 return DONE_EXPLORING; 109 110 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 111 return DONE_EXPLORING; 112 113 if (w < 0 || w >= env->prog->len) { 114 verbose_linfo(env, t, "%d: ", t); 115 verbose(env, "jump out of range from insn %d to %d\n", t, w); 116 return -EINVAL; 117 } 118 119 if (e == BRANCH) { 120 /* mark branch target for state pruning */ 121 mark_prune_point(env, w); 122 mark_jmp_point(env, w); 123 } 124 125 if (insn_state[w] == 0) { 126 /* tree-edge */ 127 insn_state[t] = DISCOVERED | e; 128 insn_state[w] = DISCOVERED; 129 if (env->cfg.cur_stack >= env->prog->len) 130 return -E2BIG; 131 insn_stack[env->cfg.cur_stack++] = w; 132 return KEEP_EXPLORING; 133 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 134 if (env->bpf_capable) 135 return DONE_EXPLORING; 136 verbose_linfo(env, t, "%d: ", t); 137 verbose_linfo(env, w, "%d: ", w); 138 verbose(env, "back-edge from insn %d to %d\n", t, w); 139 return -EINVAL; 140 } else if (insn_state[w] == EXPLORED) { 141 /* forward- or cross-edge */ 142 insn_state[t] = DISCOVERED | e; 143 } else { 144 verifier_bug(env, "insn state internal bug"); 145 return -EFAULT; 146 } 147 return DONE_EXPLORING; 148 } 149 150 static int visit_func_call_insn(int t, struct bpf_insn *insns, 151 struct bpf_verifier_env *env, 152 bool visit_callee) 153 { 154 int ret, insn_sz; 155 int w; 156 157 insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; 158 ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); 159 if (ret) 160 return ret; 161 162 mark_prune_point(env, t + insn_sz); 163 /* when we exit from subprog, we need to record non-linear history */ 164 mark_jmp_point(env, t + insn_sz); 165 166 if (visit_callee) { 167 w = t + insns[t].imm + 1; 168 mark_prune_point(env, t); 169 merge_callee_effects(env, t, w); 170 ret = push_insn(t, w, BRANCH, env); 171 } 172 return ret; 173 } 174 175 struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem) 176 { 177 size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]); 178 struct bpf_iarray *new; 179 180 new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT); 181 if (!new) { 182 /* this is what callers always want, so simplify the call site */ 183 kvfree(old); 184 return NULL; 185 } 186 187 new->cnt = n_elem; 188 return new; 189 } 190 191 static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items) 192 { 193 struct bpf_insn_array_value *value; 194 u32 i; 195 196 for (i = start; i <= end; i++) { 197 value = map->ops->map_lookup_elem(map, &i); 198 /* 199 * map_lookup_elem of an array map will never return an error, 200 * but not checking it makes some static analysers to worry 201 */ 202 if (IS_ERR(value)) 203 return PTR_ERR(value); 204 else if (!value) 205 return -EINVAL; 206 items[i - start] = value->xlated_off; 207 } 208 return 0; 209 } 210 211 static int cmp_ptr_to_u32(const void *a, const void *b) 212 { 213 return *(u32 *)a - *(u32 *)b; 214 } 215 216 static int sort_insn_array_uniq(u32 *items, int cnt) 217 { 218 int unique = 1; 219 int i; 220 221 sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL); 222 223 for (i = 1; i < cnt; i++) 224 if (items[i] != items[unique - 1]) 225 items[unique++] = items[i]; 226 227 return unique; 228 } 229 230 /* 231 * sort_unique({map[start], ..., map[end]}) into off 232 */ 233 int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off) 234 { 235 u32 n = end - start + 1; 236 int err; 237 238 err = copy_insn_array(map, start, end, off); 239 if (err) 240 return err; 241 242 return sort_insn_array_uniq(off, n); 243 } 244 245 /* 246 * Copy all unique offsets from the map 247 */ 248 static struct bpf_iarray *jt_from_map(struct bpf_map *map) 249 { 250 struct bpf_iarray *jt; 251 int err; 252 int n; 253 254 jt = bpf_iarray_realloc(NULL, map->max_entries); 255 if (!jt) 256 return ERR_PTR(-ENOMEM); 257 258 n = bpf_copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items); 259 if (n < 0) { 260 err = n; 261 goto err_free; 262 } 263 if (n == 0) { 264 err = -EINVAL; 265 goto err_free; 266 } 267 jt->cnt = n; 268 return jt; 269 270 err_free: 271 kvfree(jt); 272 return ERR_PTR(err); 273 } 274 275 /* 276 * Find and collect all maps which fit in the subprog. Return the result as one 277 * combined jump table in jt->items (allocated with kvcalloc) 278 */ 279 static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env, 280 int subprog_start, int subprog_end) 281 { 282 struct bpf_iarray *jt = NULL; 283 struct bpf_map *map; 284 struct bpf_iarray *jt_cur; 285 int i; 286 287 for (i = 0; i < env->insn_array_map_cnt; i++) { 288 /* 289 * TODO (when needed): collect only jump tables, not static keys 290 * or maps for indirect calls 291 */ 292 map = env->insn_array_maps[i]; 293 294 jt_cur = jt_from_map(map); 295 if (IS_ERR(jt_cur)) { 296 kvfree(jt); 297 return jt_cur; 298 } 299 300 /* 301 * This is enough to check one element. The full table is 302 * checked to fit inside the subprog later in create_jt() 303 */ 304 if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) { 305 u32 old_cnt = jt ? jt->cnt : 0; 306 jt = bpf_iarray_realloc(jt, old_cnt + jt_cur->cnt); 307 if (!jt) { 308 kvfree(jt_cur); 309 return ERR_PTR(-ENOMEM); 310 } 311 memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2); 312 } 313 314 kvfree(jt_cur); 315 } 316 317 if (!jt) { 318 verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start); 319 return ERR_PTR(-EINVAL); 320 } 321 322 jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt); 323 return jt; 324 } 325 326 static struct bpf_iarray * 327 create_jt(int t, struct bpf_verifier_env *env) 328 { 329 struct bpf_subprog_info *subprog; 330 int subprog_start, subprog_end; 331 struct bpf_iarray *jt; 332 int i; 333 334 subprog = bpf_find_containing_subprog(env, t); 335 subprog_start = subprog->start; 336 subprog_end = (subprog + 1)->start; 337 jt = jt_from_subprog(env, subprog_start, subprog_end); 338 if (IS_ERR(jt)) 339 return jt; 340 341 /* Check that the every element of the jump table fits within the given subprogram */ 342 for (i = 0; i < jt->cnt; i++) { 343 if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) { 344 verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n", 345 t, subprog_start, subprog_end); 346 kvfree(jt); 347 return ERR_PTR(-EINVAL); 348 } 349 } 350 351 return jt; 352 } 353 354 /* "conditional jump with N edges" */ 355 static int visit_gotox_insn(int t, struct bpf_verifier_env *env) 356 { 357 int *insn_stack = env->cfg.insn_stack; 358 int *insn_state = env->cfg.insn_state; 359 bool keep_exploring = false; 360 struct bpf_iarray *jt; 361 int i, w; 362 363 jt = env->insn_aux_data[t].jt; 364 if (!jt) { 365 jt = create_jt(t, env); 366 if (IS_ERR(jt)) 367 return PTR_ERR(jt); 368 369 env->insn_aux_data[t].jt = jt; 370 } 371 372 mark_prune_point(env, t); 373 for (i = 0; i < jt->cnt; i++) { 374 w = jt->items[i]; 375 if (w < 0 || w >= env->prog->len) { 376 verbose(env, "indirect jump out of range from insn %d to %d\n", t, w); 377 return -EINVAL; 378 } 379 380 mark_jmp_point(env, w); 381 382 /* EXPLORED || DISCOVERED */ 383 if (insn_state[w]) 384 continue; 385 386 if (env->cfg.cur_stack >= env->prog->len) 387 return -E2BIG; 388 389 insn_stack[env->cfg.cur_stack++] = w; 390 insn_state[w] |= DISCOVERED; 391 keep_exploring = true; 392 } 393 394 return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING; 395 } 396 397 /* 398 * Instructions that can abnormally return from a subprog (tail_call 399 * upon success, ld_{abs,ind} upon load failure) have a hidden exit 400 * that the verifier must account for. 401 */ 402 static int visit_abnormal_return_insn(struct bpf_verifier_env *env, int t) 403 { 404 struct bpf_subprog_info *subprog; 405 struct bpf_iarray *jt; 406 407 if (env->insn_aux_data[t].jt) 408 return 0; 409 410 jt = bpf_iarray_realloc(NULL, 2); 411 if (!jt) 412 return -ENOMEM; 413 414 subprog = bpf_find_containing_subprog(env, t); 415 jt->items[0] = t + 1; 416 jt->items[1] = subprog->exit_idx; 417 env->insn_aux_data[t].jt = jt; 418 return 0; 419 } 420 421 /* Visits the instruction at index t and returns one of the following: 422 * < 0 - an error occurred 423 * DONE_EXPLORING - the instruction was fully explored 424 * KEEP_EXPLORING - there is still work to be done before it is fully explored 425 */ 426 static int visit_insn(int t, struct bpf_verifier_env *env) 427 { 428 struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; 429 int ret, off, insn_sz; 430 431 if (bpf_pseudo_func(insn)) 432 return visit_func_call_insn(t, insns, env, true); 433 434 /* All non-branch instructions have a single fall-through edge. */ 435 if (BPF_CLASS(insn->code) != BPF_JMP && 436 BPF_CLASS(insn->code) != BPF_JMP32) { 437 if (BPF_CLASS(insn->code) == BPF_LD && 438 (BPF_MODE(insn->code) == BPF_ABS || 439 BPF_MODE(insn->code) == BPF_IND)) { 440 ret = visit_abnormal_return_insn(env, t); 441 if (ret) 442 return ret; 443 } 444 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; 445 return push_insn(t, t + insn_sz, FALLTHROUGH, env); 446 } 447 448 switch (BPF_OP(insn->code)) { 449 case BPF_EXIT: 450 return DONE_EXPLORING; 451 452 case BPF_CALL: 453 if (bpf_is_async_callback_calling_insn(insn)) 454 /* Mark this call insn as a prune point to trigger 455 * is_state_visited() check before call itself is 456 * processed by __check_func_call(). Otherwise new 457 * async state will be pushed for further exploration. 458 */ 459 mark_prune_point(env, t); 460 /* For functions that invoke callbacks it is not known how many times 461 * callback would be called. Verifier models callback calling functions 462 * by repeatedly visiting callback bodies and returning to origin call 463 * instruction. 464 * In order to stop such iteration verifier needs to identify when a 465 * state identical some state from a previous iteration is reached. 466 * Check below forces creation of checkpoint before callback calling 467 * instruction to allow search for such identical states. 468 */ 469 if (bpf_is_sync_callback_calling_insn(insn)) { 470 mark_calls_callback(env, t); 471 mark_force_checkpoint(env, t); 472 mark_prune_point(env, t); 473 mark_jmp_point(env, t); 474 } 475 if (bpf_helper_call(insn)) { 476 const struct bpf_func_proto *fp; 477 478 ret = bpf_get_helper_proto(env, insn->imm, &fp); 479 /* If called in a non-sleepable context program will be 480 * rejected anyway, so we should end up with precise 481 * sleepable marks on subprogs, except for dead code 482 * elimination. 483 */ 484 if (ret == 0 && fp->might_sleep) 485 mark_subprog_might_sleep(env, t); 486 if (bpf_helper_changes_pkt_data(insn->imm)) 487 mark_subprog_changes_pkt_data(env, t); 488 if (insn->imm == BPF_FUNC_tail_call) { 489 ret = visit_abnormal_return_insn(env, t); 490 if (ret) 491 return ret; 492 } 493 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 494 struct bpf_kfunc_call_arg_meta meta; 495 496 ret = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta); 497 if (ret == 0 && bpf_is_iter_next_kfunc(&meta)) { 498 mark_prune_point(env, t); 499 /* Checking and saving state checkpoints at iter_next() call 500 * is crucial for fast convergence of open-coded iterator loop 501 * logic, so we need to force it. If we don't do that, 502 * is_state_visited() might skip saving a checkpoint, causing 503 * unnecessarily long sequence of not checkpointed 504 * instructions and jumps, leading to exhaustion of jump 505 * history buffer, and potentially other undesired outcomes. 506 * It is expected that with correct open-coded iterators 507 * convergence will happen quickly, so we don't run a risk of 508 * exhausting memory. 509 */ 510 mark_force_checkpoint(env, t); 511 } 512 /* Same as helpers, if called in a non-sleepable context 513 * program will be rejected anyway, so we should end up 514 * with precise sleepable marks on subprogs, except for 515 * dead code elimination. 516 */ 517 if (ret == 0 && bpf_is_kfunc_sleepable(&meta)) 518 mark_subprog_might_sleep(env, t); 519 if (ret == 0 && bpf_is_kfunc_pkt_changing(&meta)) 520 mark_subprog_changes_pkt_data(env, t); 521 if (ret == 0 && bpf_is_throw_kfunc(insn)) 522 mark_subprog_might_throw(env, t); 523 } 524 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); 525 526 case BPF_JA: 527 if (BPF_SRC(insn->code) == BPF_X) 528 return visit_gotox_insn(t, env); 529 530 if (BPF_CLASS(insn->code) == BPF_JMP) 531 off = insn->off; 532 else 533 off = insn->imm; 534 535 /* unconditional jump with single edge */ 536 ret = push_insn(t, t + off + 1, FALLTHROUGH, env); 537 if (ret) 538 return ret; 539 540 mark_prune_point(env, t + off + 1); 541 mark_jmp_point(env, t + off + 1); 542 543 return ret; 544 545 default: 546 /* conditional jump with two edges */ 547 mark_prune_point(env, t); 548 if (bpf_is_may_goto_insn(insn)) 549 mark_force_checkpoint(env, t); 550 551 ret = push_insn(t, t + 1, FALLTHROUGH, env); 552 if (ret) 553 return ret; 554 555 return push_insn(t, t + insn->off + 1, BRANCH, env); 556 } 557 } 558 559 /* non-recursive depth-first-search to detect loops in BPF program 560 * loop == back-edge in directed graph 561 */ 562 int bpf_check_cfg(struct bpf_verifier_env *env) 563 { 564 int insn_cnt = env->prog->len; 565 int *insn_stack, *insn_state; 566 int ex_insn_beg, i, ret = 0; 567 568 insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt, 569 GFP_KERNEL_ACCOUNT); 570 if (!insn_state) 571 return -ENOMEM; 572 573 insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt, 574 GFP_KERNEL_ACCOUNT); 575 if (!insn_stack) { 576 kvfree(insn_state); 577 return -ENOMEM; 578 } 579 580 ex_insn_beg = env->exception_callback_subprog 581 ? env->subprog_info[env->exception_callback_subprog].start 582 : 0; 583 584 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 585 insn_stack[0] = 0; /* 0 is the first instruction */ 586 env->cfg.cur_stack = 1; 587 588 walk_cfg: 589 while (env->cfg.cur_stack > 0) { 590 int t = insn_stack[env->cfg.cur_stack - 1]; 591 592 ret = visit_insn(t, env); 593 switch (ret) { 594 case DONE_EXPLORING: 595 insn_state[t] = EXPLORED; 596 env->cfg.cur_stack--; 597 break; 598 case KEEP_EXPLORING: 599 break; 600 default: 601 if (ret > 0) { 602 verifier_bug(env, "visit_insn internal bug"); 603 ret = -EFAULT; 604 } 605 goto err_free; 606 } 607 } 608 609 if (env->cfg.cur_stack < 0) { 610 verifier_bug(env, "pop stack internal bug"); 611 ret = -EFAULT; 612 goto err_free; 613 } 614 615 if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) { 616 insn_state[ex_insn_beg] = DISCOVERED; 617 insn_stack[0] = ex_insn_beg; 618 env->cfg.cur_stack = 1; 619 goto walk_cfg; 620 } 621 622 for (i = 0; i < insn_cnt; i++) { 623 struct bpf_insn *insn = &env->prog->insnsi[i]; 624 625 if (insn_state[i] != EXPLORED) { 626 verbose(env, "unreachable insn %d\n", i); 627 ret = -EINVAL; 628 goto err_free; 629 } 630 if (bpf_is_ldimm64(insn)) { 631 if (insn_state[i + 1] != 0) { 632 verbose(env, "jump into the middle of ldimm64 insn %d\n", i); 633 ret = -EINVAL; 634 goto err_free; 635 } 636 i++; /* skip second half of ldimm64 */ 637 } 638 } 639 ret = 0; /* cfg looks good */ 640 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; 641 env->prog->aux->might_sleep = env->subprog_info[0].might_sleep; 642 643 err_free: 644 kvfree(insn_state); 645 kvfree(insn_stack); 646 env->cfg.insn_state = env->cfg.insn_stack = NULL; 647 return ret; 648 } 649 650 /* 651 * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range 652 * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start) 653 * with indices of 'i' instructions in postorder. 654 */ 655 int bpf_compute_postorder(struct bpf_verifier_env *env) 656 { 657 u32 cur_postorder, i, top, stack_sz, s; 658 int *stack = NULL, *postorder = NULL, *state = NULL; 659 struct bpf_iarray *succ; 660 661 postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 662 state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 663 stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 664 if (!postorder || !state || !stack) { 665 kvfree(postorder); 666 kvfree(state); 667 kvfree(stack); 668 return -ENOMEM; 669 } 670 cur_postorder = 0; 671 for (i = 0; i < env->subprog_cnt; i++) { 672 env->subprog_info[i].postorder_start = cur_postorder; 673 stack[0] = env->subprog_info[i].start; 674 stack_sz = 1; 675 do { 676 top = stack[stack_sz - 1]; 677 state[top] |= DISCOVERED; 678 if (state[top] & EXPLORED) { 679 postorder[cur_postorder++] = top; 680 stack_sz--; 681 continue; 682 } 683 succ = bpf_insn_successors(env, top); 684 for (s = 0; s < succ->cnt; ++s) { 685 if (!state[succ->items[s]]) { 686 stack[stack_sz++] = succ->items[s]; 687 state[succ->items[s]] |= DISCOVERED; 688 } 689 } 690 state[top] |= EXPLORED; 691 } while (stack_sz); 692 } 693 env->subprog_info[i].postorder_start = cur_postorder; 694 env->cfg.insn_postorder = postorder; 695 env->cfg.cur_postorder = cur_postorder; 696 kvfree(stack); 697 kvfree(state); 698 return 0; 699 } 700 701 /* 702 * Compute strongly connected components (SCCs) on the CFG. 703 * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. 704 * If instruction is a sole member of its SCC and there are no self edges, 705 * assign it SCC number of zero. 706 * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation. 707 */ 708 int bpf_compute_scc(struct bpf_verifier_env *env) 709 { 710 const u32 NOT_ON_STACK = U32_MAX; 711 712 struct bpf_insn_aux_data *aux = env->insn_aux_data; 713 const u32 insn_cnt = env->prog->len; 714 int stack_sz, dfs_sz, err = 0; 715 u32 *stack, *pre, *low, *dfs; 716 u32 i, j, t, w; 717 u32 next_preorder_num; 718 u32 next_scc_id; 719 bool assign_scc; 720 struct bpf_iarray *succ; 721 722 next_preorder_num = 1; 723 next_scc_id = 1; 724 /* 725 * - 'stack' accumulates vertices in DFS order, see invariant comment below; 726 * - 'pre[t] == p' => preorder number of vertex 't' is 'p'; 727 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; 728 * - 'dfs' DFS traversal stack, used to emulate explicit recursion. 729 */ 730 stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 731 pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 732 low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 733 dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT); 734 if (!stack || !pre || !low || !dfs) { 735 err = -ENOMEM; 736 goto exit; 737 } 738 /* 739 * References: 740 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms" 741 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components" 742 * 743 * The algorithm maintains the following invariant: 744 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]'; 745 * - then, vertex 'u' remains on stack while vertex 'v' is on stack. 746 * 747 * Consequently: 748 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u', 749 * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack, 750 * and thus there is an SCC (loop) containing both 'u' and 'v'. 751 * - If 'low[v] == pre[v]', loops containing 'v' have been explored, 752 * and 'v' can be considered the root of some SCC. 753 * 754 * Here is a pseudo-code for an explicitly recursive version of the algorithm: 755 * 756 * NOT_ON_STACK = insn_cnt + 1 757 * pre = [0] * insn_cnt 758 * low = [0] * insn_cnt 759 * scc = [0] * insn_cnt 760 * stack = [] 761 * 762 * next_preorder_num = 1 763 * next_scc_id = 1 764 * 765 * def recur(w): 766 * nonlocal next_preorder_num 767 * nonlocal next_scc_id 768 * 769 * pre[w] = next_preorder_num 770 * low[w] = next_preorder_num 771 * next_preorder_num += 1 772 * stack.append(w) 773 * for s in successors(w): 774 * # Note: for classic algorithm the block below should look as: 775 * # 776 * # if pre[s] == 0: 777 * # recur(s) 778 * # low[w] = min(low[w], low[s]) 779 * # elif low[s] != NOT_ON_STACK: 780 * # low[w] = min(low[w], pre[s]) 781 * # 782 * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])' 783 * # does not break the invariant and makes iterative version of the algorithm 784 * # simpler. See 'Algorithm #3' from [2]. 785 * 786 * # 's' not yet visited 787 * if pre[s] == 0: 788 * recur(s) 789 * # if 's' is on stack, pick lowest reachable preorder number from it; 790 * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]', 791 * # so 'min' would be a noop. 792 * low[w] = min(low[w], low[s]) 793 * 794 * if low[w] == pre[w]: 795 * # 'w' is the root of an SCC, pop all vertices 796 * # below 'w' on stack and assign same SCC to them. 797 * while True: 798 * t = stack.pop() 799 * low[t] = NOT_ON_STACK 800 * scc[t] = next_scc_id 801 * if t == w: 802 * break 803 * next_scc_id += 1 804 * 805 * for i in range(0, insn_cnt): 806 * if pre[i] == 0: 807 * recur(i) 808 * 809 * Below implementation replaces explicit recursion with array 'dfs'. 810 */ 811 for (i = 0; i < insn_cnt; i++) { 812 if (pre[i]) 813 continue; 814 stack_sz = 0; 815 dfs_sz = 1; 816 dfs[0] = i; 817 dfs_continue: 818 while (dfs_sz) { 819 w = dfs[dfs_sz - 1]; 820 if (pre[w] == 0) { 821 low[w] = next_preorder_num; 822 pre[w] = next_preorder_num; 823 next_preorder_num++; 824 stack[stack_sz++] = w; 825 } 826 /* Visit 'w' successors */ 827 succ = bpf_insn_successors(env, w); 828 for (j = 0; j < succ->cnt; ++j) { 829 if (pre[succ->items[j]]) { 830 low[w] = min(low[w], low[succ->items[j]]); 831 } else { 832 dfs[dfs_sz++] = succ->items[j]; 833 goto dfs_continue; 834 } 835 } 836 /* 837 * Preserve the invariant: if some vertex above in the stack 838 * is reachable from 'w', keep 'w' on the stack. 839 */ 840 if (low[w] < pre[w]) { 841 dfs_sz--; 842 goto dfs_continue; 843 } 844 /* 845 * Assign SCC number only if component has two or more elements, 846 * or if component has a self reference, or if instruction is a 847 * callback calling function (implicit loop). 848 */ 849 assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */ 850 for (j = 0; j < succ->cnt; ++j) { /* self reference? */ 851 if (succ->items[j] == w) { 852 assign_scc = true; 853 break; 854 } 855 } 856 if (bpf_calls_callback(env, w)) /* implicit loop? */ 857 assign_scc = true; 858 /* Pop component elements from stack */ 859 do { 860 t = stack[--stack_sz]; 861 low[t] = NOT_ON_STACK; 862 if (assign_scc) 863 aux[t].scc = next_scc_id; 864 } while (t != w); 865 if (assign_scc) 866 next_scc_id++; 867 dfs_sz--; 868 } 869 } 870 env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id, 871 GFP_KERNEL_ACCOUNT); 872 if (!env->scc_info) { 873 err = -ENOMEM; 874 goto exit; 875 } 876 env->scc_cnt = next_scc_id; 877 exit: 878 kvfree(stack); 879 kvfree(pre); 880 kvfree(low); 881 kvfree(dfs); 882 return err; 883 } 884