1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/filter.h> 6 #include <linux/sort.h> 7 8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 9 10 /* non-recursive DFS pseudo code 11 * 1 procedure DFS-iterative(G,v): 12 * 2 label v as discovered 13 * 3 let S be a stack 14 * 4 S.push(v) 15 * 5 while S is not empty 16 * 6 t <- S.peek() 17 * 7 if t is what we're looking for: 18 * 8 return t 19 * 9 for all edges e in G.adjacentEdges(t) do 20 * 10 if edge e is already labelled 21 * 11 continue with the next edge 22 * 12 w <- G.adjacentVertex(t,e) 23 * 13 if vertex w is not discovered and not explored 24 * 14 label e as tree-edge 25 * 15 label w as discovered 26 * 16 S.push(w) 27 * 17 continue at 5 28 * 18 else if vertex w is discovered 29 * 19 label e as back-edge 30 * 20 else 31 * 21 // vertex w is explored 32 * 22 label e as forward- or cross-edge 33 * 23 label t as explored 34 * 24 S.pop() 35 * 36 * convention: 37 * 0x10 - discovered 38 * 0x11 - discovered and fall-through edge labelled 39 * 0x12 - discovered and fall-through and branch edges labelled 40 * 0x20 - explored 41 */ 42 43 enum { 44 DISCOVERED = 0x10, 45 EXPLORED = 0x20, 46 FALLTHROUGH = 1, 47 BRANCH = 2, 48 }; 49 50 51 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) 52 { 53 struct bpf_subprog_info *subprog; 54 55 subprog = bpf_find_containing_subprog(env, off); 56 subprog->changes_pkt_data = true; 57 } 58 59 static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) 60 { 61 struct bpf_subprog_info *subprog; 62 63 subprog = bpf_find_containing_subprog(env, off); 64 subprog->might_sleep = true; 65 } 66 67 /* 't' is an index of a call-site. 68 * 'w' is a callee entry point. 69 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. 70 * Rely on DFS traversal order and absence of recursive calls to guarantee that 71 * callee's change_pkt_data marks would be correct at that moment. 72 */ 73 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) 74 { 75 struct bpf_subprog_info *caller, *callee; 76 77 caller = bpf_find_containing_subprog(env, t); 78 callee = bpf_find_containing_subprog(env, w); 79 caller->changes_pkt_data |= callee->changes_pkt_data; 80 caller->might_sleep |= callee->might_sleep; 81 } 82 83 enum { 84 DONE_EXPLORING = 0, 85 KEEP_EXPLORING = 1, 86 }; 87 88 /* t, w, e - match pseudo-code above: 89 * t - index of current instruction 90 * w - next instruction 91 * e - edge 92 */ 93 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) 94 { 95 int *insn_stack = env->cfg.insn_stack; 96 int *insn_state = env->cfg.insn_state; 97 98 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 99 return DONE_EXPLORING; 100 101 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 102 return DONE_EXPLORING; 103 104 if (w < 0 || w >= env->prog->len) { 105 verbose_linfo(env, t, "%d: ", t); 106 verbose(env, "jump out of range from insn %d to %d\n", t, w); 107 return -EINVAL; 108 } 109 110 if (e == BRANCH) { 111 /* mark branch target for state pruning */ 112 mark_prune_point(env, w); 113 mark_jmp_point(env, w); 114 } 115 116 if (insn_state[w] == 0) { 117 /* tree-edge */ 118 insn_state[t] = DISCOVERED | e; 119 insn_state[w] = DISCOVERED; 120 if (env->cfg.cur_stack >= env->prog->len) 121 return -E2BIG; 122 insn_stack[env->cfg.cur_stack++] = w; 123 return KEEP_EXPLORING; 124 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 125 if (env->bpf_capable) 126 return DONE_EXPLORING; 127 verbose_linfo(env, t, "%d: ", t); 128 verbose_linfo(env, w, "%d: ", w); 129 verbose(env, "back-edge from insn %d to %d\n", t, w); 130 return -EINVAL; 131 } else if (insn_state[w] == EXPLORED) { 132 /* forward- or cross-edge */ 133 insn_state[t] = DISCOVERED | e; 134 } else { 135 verifier_bug(env, "insn state internal bug"); 136 return -EFAULT; 137 } 138 return DONE_EXPLORING; 139 } 140 141 static int visit_func_call_insn(int t, struct bpf_insn *insns, 142 struct bpf_verifier_env *env, 143 bool visit_callee) 144 { 145 int ret, insn_sz; 146 int w; 147 148 insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; 149 ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); 150 if (ret) 151 return ret; 152 153 mark_prune_point(env, t + insn_sz); 154 /* when we exit from subprog, we need to record non-linear history */ 155 mark_jmp_point(env, t + insn_sz); 156 157 if (visit_callee) { 158 w = t + insns[t].imm + 1; 159 mark_prune_point(env, t); 160 merge_callee_effects(env, t, w); 161 ret = push_insn(t, w, BRANCH, env); 162 } 163 return ret; 164 } 165 166 struct bpf_iarray *bpf_iarray_realloc(struct bpf_iarray *old, size_t n_elem) 167 { 168 size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]); 169 struct bpf_iarray *new; 170 171 new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT); 172 if (!new) { 173 /* this is what callers always want, so simplify the call site */ 174 kvfree(old); 175 return NULL; 176 } 177 178 new->cnt = n_elem; 179 return new; 180 } 181 182 static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items) 183 { 184 struct bpf_insn_array_value *value; 185 u32 i; 186 187 for (i = start; i <= end; i++) { 188 value = map->ops->map_lookup_elem(map, &i); 189 /* 190 * map_lookup_elem of an array map will never return an error, 191 * but not checking it makes some static analysers to worry 192 */ 193 if (IS_ERR(value)) 194 return PTR_ERR(value); 195 else if (!value) 196 return -EINVAL; 197 items[i - start] = value->xlated_off; 198 } 199 return 0; 200 } 201 202 static int cmp_ptr_to_u32(const void *a, const void *b) 203 { 204 return *(u32 *)a - *(u32 *)b; 205 } 206 207 static int sort_insn_array_uniq(u32 *items, int cnt) 208 { 209 int unique = 1; 210 int i; 211 212 sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL); 213 214 for (i = 1; i < cnt; i++) 215 if (items[i] != items[unique - 1]) 216 items[unique++] = items[i]; 217 218 return unique; 219 } 220 221 /* 222 * sort_unique({map[start], ..., map[end]}) into off 223 */ 224 int bpf_copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off) 225 { 226 u32 n = end - start + 1; 227 int err; 228 229 err = copy_insn_array(map, start, end, off); 230 if (err) 231 return err; 232 233 return sort_insn_array_uniq(off, n); 234 } 235 236 /* 237 * Copy all unique offsets from the map 238 */ 239 static struct bpf_iarray *jt_from_map(struct bpf_map *map) 240 { 241 struct bpf_iarray *jt; 242 int err; 243 int n; 244 245 jt = bpf_iarray_realloc(NULL, map->max_entries); 246 if (!jt) 247 return ERR_PTR(-ENOMEM); 248 249 n = bpf_copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items); 250 if (n < 0) { 251 err = n; 252 goto err_free; 253 } 254 if (n == 0) { 255 err = -EINVAL; 256 goto err_free; 257 } 258 jt->cnt = n; 259 return jt; 260 261 err_free: 262 kvfree(jt); 263 return ERR_PTR(err); 264 } 265 266 /* 267 * Find and collect all maps which fit in the subprog. Return the result as one 268 * combined jump table in jt->items (allocated with kvcalloc) 269 */ 270 static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env, 271 int subprog_start, int subprog_end) 272 { 273 struct bpf_iarray *jt = NULL; 274 struct bpf_map *map; 275 struct bpf_iarray *jt_cur; 276 int i; 277 278 for (i = 0; i < env->insn_array_map_cnt; i++) { 279 /* 280 * TODO (when needed): collect only jump tables, not static keys 281 * or maps for indirect calls 282 */ 283 map = env->insn_array_maps[i]; 284 285 jt_cur = jt_from_map(map); 286 if (IS_ERR(jt_cur)) { 287 kvfree(jt); 288 return jt_cur; 289 } 290 291 /* 292 * This is enough to check one element. The full table is 293 * checked to fit inside the subprog later in create_jt() 294 */ 295 if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) { 296 u32 old_cnt = jt ? jt->cnt : 0; 297 jt = bpf_iarray_realloc(jt, old_cnt + jt_cur->cnt); 298 if (!jt) { 299 kvfree(jt_cur); 300 return ERR_PTR(-ENOMEM); 301 } 302 memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2); 303 } 304 305 kvfree(jt_cur); 306 } 307 308 if (!jt) { 309 verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start); 310 return ERR_PTR(-EINVAL); 311 } 312 313 jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt); 314 return jt; 315 } 316 317 static struct bpf_iarray * 318 create_jt(int t, struct bpf_verifier_env *env) 319 { 320 struct bpf_subprog_info *subprog; 321 int subprog_start, subprog_end; 322 struct bpf_iarray *jt; 323 int i; 324 325 subprog = bpf_find_containing_subprog(env, t); 326 subprog_start = subprog->start; 327 subprog_end = (subprog + 1)->start; 328 jt = jt_from_subprog(env, subprog_start, subprog_end); 329 if (IS_ERR(jt)) 330 return jt; 331 332 /* Check that the every element of the jump table fits within the given subprogram */ 333 for (i = 0; i < jt->cnt; i++) { 334 if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) { 335 verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n", 336 t, subprog_start, subprog_end); 337 kvfree(jt); 338 return ERR_PTR(-EINVAL); 339 } 340 } 341 342 return jt; 343 } 344 345 /* "conditional jump with N edges" */ 346 static int visit_gotox_insn(int t, struct bpf_verifier_env *env) 347 { 348 int *insn_stack = env->cfg.insn_stack; 349 int *insn_state = env->cfg.insn_state; 350 bool keep_exploring = false; 351 struct bpf_iarray *jt; 352 int i, w; 353 354 jt = env->insn_aux_data[t].jt; 355 if (!jt) { 356 jt = create_jt(t, env); 357 if (IS_ERR(jt)) 358 return PTR_ERR(jt); 359 360 env->insn_aux_data[t].jt = jt; 361 } 362 363 mark_prune_point(env, t); 364 for (i = 0; i < jt->cnt; i++) { 365 w = jt->items[i]; 366 if (w < 0 || w >= env->prog->len) { 367 verbose(env, "indirect jump out of range from insn %d to %d\n", t, w); 368 return -EINVAL; 369 } 370 371 mark_jmp_point(env, w); 372 373 /* EXPLORED || DISCOVERED */ 374 if (insn_state[w]) 375 continue; 376 377 if (env->cfg.cur_stack >= env->prog->len) 378 return -E2BIG; 379 380 insn_stack[env->cfg.cur_stack++] = w; 381 insn_state[w] |= DISCOVERED; 382 keep_exploring = true; 383 } 384 385 return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING; 386 } 387 388 /* 389 * Instructions that can abnormally return from a subprog (tail_call 390 * upon success, ld_{abs,ind} upon load failure) have a hidden exit 391 * that the verifier must account for. 392 */ 393 static int visit_abnormal_return_insn(struct bpf_verifier_env *env, int t) 394 { 395 struct bpf_subprog_info *subprog; 396 struct bpf_iarray *jt; 397 398 if (env->insn_aux_data[t].jt) 399 return 0; 400 401 jt = bpf_iarray_realloc(NULL, 2); 402 if (!jt) 403 return -ENOMEM; 404 405 subprog = bpf_find_containing_subprog(env, t); 406 jt->items[0] = t + 1; 407 jt->items[1] = subprog->exit_idx; 408 env->insn_aux_data[t].jt = jt; 409 return 0; 410 } 411 412 /* Visits the instruction at index t and returns one of the following: 413 * < 0 - an error occurred 414 * DONE_EXPLORING - the instruction was fully explored 415 * KEEP_EXPLORING - there is still work to be done before it is fully explored 416 */ 417 static int visit_insn(int t, struct bpf_verifier_env *env) 418 { 419 struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; 420 int ret, off, insn_sz; 421 422 if (bpf_pseudo_func(insn)) 423 return visit_func_call_insn(t, insns, env, true); 424 425 /* All non-branch instructions have a single fall-through edge. */ 426 if (BPF_CLASS(insn->code) != BPF_JMP && 427 BPF_CLASS(insn->code) != BPF_JMP32) { 428 if (BPF_CLASS(insn->code) == BPF_LD && 429 (BPF_MODE(insn->code) == BPF_ABS || 430 BPF_MODE(insn->code) == BPF_IND)) { 431 ret = visit_abnormal_return_insn(env, t); 432 if (ret) 433 return ret; 434 } 435 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; 436 return push_insn(t, t + insn_sz, FALLTHROUGH, env); 437 } 438 439 switch (BPF_OP(insn->code)) { 440 case BPF_EXIT: 441 return DONE_EXPLORING; 442 443 case BPF_CALL: 444 if (bpf_is_async_callback_calling_insn(insn)) 445 /* Mark this call insn as a prune point to trigger 446 * is_state_visited() check before call itself is 447 * processed by __check_func_call(). Otherwise new 448 * async state will be pushed for further exploration. 449 */ 450 mark_prune_point(env, t); 451 /* For functions that invoke callbacks it is not known how many times 452 * callback would be called. Verifier models callback calling functions 453 * by repeatedly visiting callback bodies and returning to origin call 454 * instruction. 455 * In order to stop such iteration verifier needs to identify when a 456 * state identical some state from a previous iteration is reached. 457 * Check below forces creation of checkpoint before callback calling 458 * instruction to allow search for such identical states. 459 */ 460 if (bpf_is_sync_callback_calling_insn(insn)) { 461 mark_calls_callback(env, t); 462 mark_force_checkpoint(env, t); 463 mark_prune_point(env, t); 464 mark_jmp_point(env, t); 465 } 466 if (bpf_helper_call(insn)) { 467 const struct bpf_func_proto *fp; 468 469 ret = bpf_get_helper_proto(env, insn->imm, &fp); 470 /* If called in a non-sleepable context program will be 471 * rejected anyway, so we should end up with precise 472 * sleepable marks on subprogs, except for dead code 473 * elimination. 474 */ 475 if (ret == 0 && fp->might_sleep) 476 mark_subprog_might_sleep(env, t); 477 if (bpf_helper_changes_pkt_data(insn->imm)) 478 mark_subprog_changes_pkt_data(env, t); 479 if (insn->imm == BPF_FUNC_tail_call) { 480 ret = visit_abnormal_return_insn(env, t); 481 if (ret) 482 return ret; 483 } 484 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 485 struct bpf_kfunc_call_arg_meta meta; 486 487 ret = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta); 488 if (ret == 0 && bpf_is_iter_next_kfunc(&meta)) { 489 mark_prune_point(env, t); 490 /* Checking and saving state checkpoints at iter_next() call 491 * is crucial for fast convergence of open-coded iterator loop 492 * logic, so we need to force it. If we don't do that, 493 * is_state_visited() might skip saving a checkpoint, causing 494 * unnecessarily long sequence of not checkpointed 495 * instructions and jumps, leading to exhaustion of jump 496 * history buffer, and potentially other undesired outcomes. 497 * It is expected that with correct open-coded iterators 498 * convergence will happen quickly, so we don't run a risk of 499 * exhausting memory. 500 */ 501 mark_force_checkpoint(env, t); 502 } 503 /* Same as helpers, if called in a non-sleepable context 504 * program will be rejected anyway, so we should end up 505 * with precise sleepable marks on subprogs, except for 506 * dead code elimination. 507 */ 508 if (ret == 0 && bpf_is_kfunc_sleepable(&meta)) 509 mark_subprog_might_sleep(env, t); 510 if (ret == 0 && bpf_is_kfunc_pkt_changing(&meta)) 511 mark_subprog_changes_pkt_data(env, t); 512 } 513 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); 514 515 case BPF_JA: 516 if (BPF_SRC(insn->code) == BPF_X) 517 return visit_gotox_insn(t, env); 518 519 if (BPF_CLASS(insn->code) == BPF_JMP) 520 off = insn->off; 521 else 522 off = insn->imm; 523 524 /* unconditional jump with single edge */ 525 ret = push_insn(t, t + off + 1, FALLTHROUGH, env); 526 if (ret) 527 return ret; 528 529 mark_prune_point(env, t + off + 1); 530 mark_jmp_point(env, t + off + 1); 531 532 return ret; 533 534 default: 535 /* conditional jump with two edges */ 536 mark_prune_point(env, t); 537 if (bpf_is_may_goto_insn(insn)) 538 mark_force_checkpoint(env, t); 539 540 ret = push_insn(t, t + 1, FALLTHROUGH, env); 541 if (ret) 542 return ret; 543 544 return push_insn(t, t + insn->off + 1, BRANCH, env); 545 } 546 } 547 548 /* non-recursive depth-first-search to detect loops in BPF program 549 * loop == back-edge in directed graph 550 */ 551 int bpf_check_cfg(struct bpf_verifier_env *env) 552 { 553 int insn_cnt = env->prog->len; 554 int *insn_stack, *insn_state; 555 int ex_insn_beg, i, ret = 0; 556 557 insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt, 558 GFP_KERNEL_ACCOUNT); 559 if (!insn_state) 560 return -ENOMEM; 561 562 insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt, 563 GFP_KERNEL_ACCOUNT); 564 if (!insn_stack) { 565 kvfree(insn_state); 566 return -ENOMEM; 567 } 568 569 ex_insn_beg = env->exception_callback_subprog 570 ? env->subprog_info[env->exception_callback_subprog].start 571 : 0; 572 573 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 574 insn_stack[0] = 0; /* 0 is the first instruction */ 575 env->cfg.cur_stack = 1; 576 577 walk_cfg: 578 while (env->cfg.cur_stack > 0) { 579 int t = insn_stack[env->cfg.cur_stack - 1]; 580 581 ret = visit_insn(t, env); 582 switch (ret) { 583 case DONE_EXPLORING: 584 insn_state[t] = EXPLORED; 585 env->cfg.cur_stack--; 586 break; 587 case KEEP_EXPLORING: 588 break; 589 default: 590 if (ret > 0) { 591 verifier_bug(env, "visit_insn internal bug"); 592 ret = -EFAULT; 593 } 594 goto err_free; 595 } 596 } 597 598 if (env->cfg.cur_stack < 0) { 599 verifier_bug(env, "pop stack internal bug"); 600 ret = -EFAULT; 601 goto err_free; 602 } 603 604 if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) { 605 insn_state[ex_insn_beg] = DISCOVERED; 606 insn_stack[0] = ex_insn_beg; 607 env->cfg.cur_stack = 1; 608 goto walk_cfg; 609 } 610 611 for (i = 0; i < insn_cnt; i++) { 612 struct bpf_insn *insn = &env->prog->insnsi[i]; 613 614 if (insn_state[i] != EXPLORED) { 615 verbose(env, "unreachable insn %d\n", i); 616 ret = -EINVAL; 617 goto err_free; 618 } 619 if (bpf_is_ldimm64(insn)) { 620 if (insn_state[i + 1] != 0) { 621 verbose(env, "jump into the middle of ldimm64 insn %d\n", i); 622 ret = -EINVAL; 623 goto err_free; 624 } 625 i++; /* skip second half of ldimm64 */ 626 } 627 } 628 ret = 0; /* cfg looks good */ 629 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; 630 env->prog->aux->might_sleep = env->subprog_info[0].might_sleep; 631 632 err_free: 633 kvfree(insn_state); 634 kvfree(insn_stack); 635 env->cfg.insn_state = env->cfg.insn_stack = NULL; 636 return ret; 637 } 638 639 /* 640 * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range 641 * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start) 642 * with indices of 'i' instructions in postorder. 643 */ 644 int bpf_compute_postorder(struct bpf_verifier_env *env) 645 { 646 u32 cur_postorder, i, top, stack_sz, s; 647 int *stack = NULL, *postorder = NULL, *state = NULL; 648 struct bpf_iarray *succ; 649 650 postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 651 state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 652 stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 653 if (!postorder || !state || !stack) { 654 kvfree(postorder); 655 kvfree(state); 656 kvfree(stack); 657 return -ENOMEM; 658 } 659 cur_postorder = 0; 660 for (i = 0; i < env->subprog_cnt; i++) { 661 env->subprog_info[i].postorder_start = cur_postorder; 662 stack[0] = env->subprog_info[i].start; 663 stack_sz = 1; 664 do { 665 top = stack[stack_sz - 1]; 666 state[top] |= DISCOVERED; 667 if (state[top] & EXPLORED) { 668 postorder[cur_postorder++] = top; 669 stack_sz--; 670 continue; 671 } 672 succ = bpf_insn_successors(env, top); 673 for (s = 0; s < succ->cnt; ++s) { 674 if (!state[succ->items[s]]) { 675 stack[stack_sz++] = succ->items[s]; 676 state[succ->items[s]] |= DISCOVERED; 677 } 678 } 679 state[top] |= EXPLORED; 680 } while (stack_sz); 681 } 682 env->subprog_info[i].postorder_start = cur_postorder; 683 env->cfg.insn_postorder = postorder; 684 env->cfg.cur_postorder = cur_postorder; 685 kvfree(stack); 686 kvfree(state); 687 return 0; 688 } 689 690 /* 691 * Compute strongly connected components (SCCs) on the CFG. 692 * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. 693 * If instruction is a sole member of its SCC and there are no self edges, 694 * assign it SCC number of zero. 695 * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation. 696 */ 697 int bpf_compute_scc(struct bpf_verifier_env *env) 698 { 699 const u32 NOT_ON_STACK = U32_MAX; 700 701 struct bpf_insn_aux_data *aux = env->insn_aux_data; 702 const u32 insn_cnt = env->prog->len; 703 int stack_sz, dfs_sz, err = 0; 704 u32 *stack, *pre, *low, *dfs; 705 u32 i, j, t, w; 706 u32 next_preorder_num; 707 u32 next_scc_id; 708 bool assign_scc; 709 struct bpf_iarray *succ; 710 711 next_preorder_num = 1; 712 next_scc_id = 1; 713 /* 714 * - 'stack' accumulates vertices in DFS order, see invariant comment below; 715 * - 'pre[t] == p' => preorder number of vertex 't' is 'p'; 716 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; 717 * - 'dfs' DFS traversal stack, used to emulate explicit recursion. 718 */ 719 stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 720 pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 721 low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 722 dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT); 723 if (!stack || !pre || !low || !dfs) { 724 err = -ENOMEM; 725 goto exit; 726 } 727 /* 728 * References: 729 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms" 730 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components" 731 * 732 * The algorithm maintains the following invariant: 733 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]'; 734 * - then, vertex 'u' remains on stack while vertex 'v' is on stack. 735 * 736 * Consequently: 737 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u', 738 * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack, 739 * and thus there is an SCC (loop) containing both 'u' and 'v'. 740 * - If 'low[v] == pre[v]', loops containing 'v' have been explored, 741 * and 'v' can be considered the root of some SCC. 742 * 743 * Here is a pseudo-code for an explicitly recursive version of the algorithm: 744 * 745 * NOT_ON_STACK = insn_cnt + 1 746 * pre = [0] * insn_cnt 747 * low = [0] * insn_cnt 748 * scc = [0] * insn_cnt 749 * stack = [] 750 * 751 * next_preorder_num = 1 752 * next_scc_id = 1 753 * 754 * def recur(w): 755 * nonlocal next_preorder_num 756 * nonlocal next_scc_id 757 * 758 * pre[w] = next_preorder_num 759 * low[w] = next_preorder_num 760 * next_preorder_num += 1 761 * stack.append(w) 762 * for s in successors(w): 763 * # Note: for classic algorithm the block below should look as: 764 * # 765 * # if pre[s] == 0: 766 * # recur(s) 767 * # low[w] = min(low[w], low[s]) 768 * # elif low[s] != NOT_ON_STACK: 769 * # low[w] = min(low[w], pre[s]) 770 * # 771 * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])' 772 * # does not break the invariant and makes iterative version of the algorithm 773 * # simpler. See 'Algorithm #3' from [2]. 774 * 775 * # 's' not yet visited 776 * if pre[s] == 0: 777 * recur(s) 778 * # if 's' is on stack, pick lowest reachable preorder number from it; 779 * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]', 780 * # so 'min' would be a noop. 781 * low[w] = min(low[w], low[s]) 782 * 783 * if low[w] == pre[w]: 784 * # 'w' is the root of an SCC, pop all vertices 785 * # below 'w' on stack and assign same SCC to them. 786 * while True: 787 * t = stack.pop() 788 * low[t] = NOT_ON_STACK 789 * scc[t] = next_scc_id 790 * if t == w: 791 * break 792 * next_scc_id += 1 793 * 794 * for i in range(0, insn_cnt): 795 * if pre[i] == 0: 796 * recur(i) 797 * 798 * Below implementation replaces explicit recursion with array 'dfs'. 799 */ 800 for (i = 0; i < insn_cnt; i++) { 801 if (pre[i]) 802 continue; 803 stack_sz = 0; 804 dfs_sz = 1; 805 dfs[0] = i; 806 dfs_continue: 807 while (dfs_sz) { 808 w = dfs[dfs_sz - 1]; 809 if (pre[w] == 0) { 810 low[w] = next_preorder_num; 811 pre[w] = next_preorder_num; 812 next_preorder_num++; 813 stack[stack_sz++] = w; 814 } 815 /* Visit 'w' successors */ 816 succ = bpf_insn_successors(env, w); 817 for (j = 0; j < succ->cnt; ++j) { 818 if (pre[succ->items[j]]) { 819 low[w] = min(low[w], low[succ->items[j]]); 820 } else { 821 dfs[dfs_sz++] = succ->items[j]; 822 goto dfs_continue; 823 } 824 } 825 /* 826 * Preserve the invariant: if some vertex above in the stack 827 * is reachable from 'w', keep 'w' on the stack. 828 */ 829 if (low[w] < pre[w]) { 830 dfs_sz--; 831 goto dfs_continue; 832 } 833 /* 834 * Assign SCC number only if component has two or more elements, 835 * or if component has a self reference, or if instruction is a 836 * callback calling function (implicit loop). 837 */ 838 assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */ 839 for (j = 0; j < succ->cnt; ++j) { /* self reference? */ 840 if (succ->items[j] == w) { 841 assign_scc = true; 842 break; 843 } 844 } 845 if (bpf_calls_callback(env, w)) /* implicit loop? */ 846 assign_scc = true; 847 /* Pop component elements from stack */ 848 do { 849 t = stack[--stack_sz]; 850 low[t] = NOT_ON_STACK; 851 if (assign_scc) 852 aux[t].scc = next_scc_id; 853 } while (t != w); 854 if (assign_scc) 855 next_scc_id++; 856 dfs_sz--; 857 } 858 } 859 env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id, 860 GFP_KERNEL_ACCOUNT); 861 if (!env->scc_info) { 862 err = -ENOMEM; 863 goto exit; 864 } 865 env->scc_cnt = next_scc_id; 866 exit: 867 kvfree(stack); 868 kvfree(pre); 869 kvfree(low); 870 kvfree(dfs); 871 return err; 872 } 873