1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/filter.h> 6 #include <linux/bitmap.h> 7 8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 9 10 /* for any branch, call, exit record the history of jmps in the given state */ 11 int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, 12 int insn_flags, int spi, int frame, u64 linked_regs) 13 { 14 u32 cnt = cur->jmp_history_cnt; 15 struct bpf_jmp_history_entry *p; 16 size_t alloc_size; 17 18 /* combine instruction flags if we already recorded this instruction */ 19 if (env->cur_hist_ent) { 20 /* atomic instructions push insn_flags twice, for READ and 21 * WRITE sides, but they should agree on stack slot 22 */ 23 verifier_bug_if((env->cur_hist_ent->flags & insn_flags) && 24 (env->cur_hist_ent->flags & insn_flags) != insn_flags, 25 env, "insn history: insn_idx %d cur flags %x new flags %x", 26 env->insn_idx, env->cur_hist_ent->flags, insn_flags); 27 env->cur_hist_ent->flags |= insn_flags; 28 env->cur_hist_ent->spi = spi; 29 env->cur_hist_ent->frame = frame; 30 verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env, 31 "insn history: insn_idx %d linked_regs: %#llx", 32 env->insn_idx, env->cur_hist_ent->linked_regs); 33 env->cur_hist_ent->linked_regs = linked_regs; 34 return 0; 35 } 36 37 cnt++; 38 alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); 39 p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT); 40 if (!p) 41 return -ENOMEM; 42 cur->jmp_history = p; 43 44 p = &cur->jmp_history[cnt - 1]; 45 p->idx = env->insn_idx; 46 p->prev_idx = env->prev_insn_idx; 47 p->flags = insn_flags; 48 p->spi = spi; 49 p->frame = frame; 50 p->linked_regs = linked_regs; 51 cur->jmp_history_cnt = cnt; 52 env->cur_hist_ent = p; 53 54 return 0; 55 } 56 57 static bool is_atomic_load_insn(const struct bpf_insn *insn) 58 { 59 return BPF_CLASS(insn->code) == BPF_STX && 60 BPF_MODE(insn->code) == BPF_ATOMIC && 61 insn->imm == BPF_LOAD_ACQ; 62 } 63 64 static bool is_atomic_fetch_insn(const struct bpf_insn *insn) 65 { 66 return BPF_CLASS(insn->code) == BPF_STX && 67 BPF_MODE(insn->code) == BPF_ATOMIC && 68 (insn->imm & BPF_FETCH); 69 } 70 71 /* Backtrack one insn at a time. If idx is not at the top of recorded 72 * history then previous instruction came from straight line execution. 73 * Return -ENOENT if we exhausted all instructions within given state. 74 * 75 * It's legal to have a bit of a looping with the same starting and ending 76 * insn index within the same state, e.g.: 3->4->5->3, so just because current 77 * instruction index is the same as state's first_idx doesn't mean we are 78 * done. If there is still some jump history left, we should keep going. We 79 * need to take into account that we might have a jump history between given 80 * state's parent and itself, due to checkpointing. In this case, we'll have 81 * history entry recording a jump from last instruction of parent state and 82 * first instruction of given state. 83 */ 84 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, 85 u32 *history) 86 { 87 u32 cnt = *history; 88 89 if (i == st->first_insn_idx) { 90 if (cnt == 0) 91 return -ENOENT; 92 if (cnt == 1 && st->jmp_history[0].idx == i) 93 return -ENOENT; 94 } 95 96 if (cnt && st->jmp_history[cnt - 1].idx == i) { 97 i = st->jmp_history[cnt - 1].prev_idx; 98 (*history)--; 99 } else { 100 i--; 101 } 102 return i; 103 } 104 105 static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, 106 u32 hist_end, int insn_idx) 107 { 108 if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) 109 return &st->jmp_history[hist_end - 1]; 110 return NULL; 111 } 112 113 static inline void bt_init(struct backtrack_state *bt, u32 frame) 114 { 115 bt->frame = frame; 116 } 117 118 static inline void bt_reset(struct backtrack_state *bt) 119 { 120 struct bpf_verifier_env *env = bt->env; 121 122 memset(bt, 0, sizeof(*bt)); 123 bt->env = env; 124 } 125 126 static inline u32 bt_empty(struct backtrack_state *bt) 127 { 128 u64 mask = 0; 129 int i; 130 131 for (i = 0; i <= bt->frame; i++) 132 mask |= bt->reg_masks[i] | bt->stack_masks[i] | bt->stack_arg_masks[i]; 133 134 return mask == 0; 135 } 136 137 static inline void bt_clear_frame_stack_arg_slot(struct backtrack_state *bt, u32 frame, u32 slot) 138 { 139 bt->stack_arg_masks[frame] &= ~(1 << slot); 140 } 141 142 static inline bool bt_is_frame_stack_arg_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) 143 { 144 return bt->stack_arg_masks[frame] & (1 << slot); 145 } 146 147 static inline int bt_subprog_enter(struct backtrack_state *bt) 148 { 149 if (bt->frame == MAX_CALL_FRAMES - 1) { 150 verifier_bug(bt->env, "subprog enter from frame %d", bt->frame); 151 return -EFAULT; 152 } 153 bt->frame++; 154 return 0; 155 } 156 157 static inline int bt_subprog_exit(struct backtrack_state *bt) 158 { 159 if (bt->frame == 0) { 160 verifier_bug(bt->env, "subprog exit from frame 0"); 161 return -EFAULT; 162 } 163 bt->frame--; 164 return 0; 165 } 166 167 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) 168 { 169 bt->reg_masks[frame] &= ~(1 << reg); 170 } 171 172 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg) 173 { 174 bpf_bt_set_frame_reg(bt, bt->frame, reg); 175 } 176 177 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg) 178 { 179 bt_clear_frame_reg(bt, bt->frame, reg); 180 } 181 182 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) 183 { 184 bt->stack_masks[frame] &= ~(1ull << slot); 185 } 186 187 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame) 188 { 189 return bt->reg_masks[frame]; 190 } 191 192 static inline u32 bt_reg_mask(struct backtrack_state *bt) 193 { 194 return bt->reg_masks[bt->frame]; 195 } 196 197 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame) 198 { 199 return bt->stack_masks[frame]; 200 } 201 202 static inline u64 bt_stack_mask(struct backtrack_state *bt) 203 { 204 return bt->stack_masks[bt->frame]; 205 } 206 207 static inline u8 bt_stack_arg_mask(struct backtrack_state *bt) 208 { 209 return bt->stack_arg_masks[bt->frame]; 210 } 211 212 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) 213 { 214 return bt->reg_masks[bt->frame] & (1 << reg); 215 } 216 217 218 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */ 219 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask) 220 { 221 DECLARE_BITMAP(mask, 64); 222 bool first = true; 223 int i, n; 224 225 buf[0] = '\0'; 226 227 bitmap_from_u64(mask, reg_mask); 228 for_each_set_bit(i, mask, 32) { 229 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i); 230 first = false; 231 buf += n; 232 buf_sz -= n; 233 if (buf_sz < 0) 234 break; 235 } 236 } 237 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */ 238 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) 239 { 240 DECLARE_BITMAP(mask, 64); 241 bool first = true; 242 int i, n; 243 244 buf[0] = '\0'; 245 246 bitmap_from_u64(mask, stack_mask); 247 for_each_set_bit(i, mask, 64) { 248 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8); 249 first = false; 250 buf += n; 251 buf_sz -= n; 252 if (buf_sz < 0) 253 break; 254 } 255 } 256 257 258 /* For given verifier state backtrack_insn() is called from the last insn to 259 * the first insn. Its purpose is to compute a bitmask of registers and 260 * stack slots that needs precision in the parent verifier state. 261 * 262 * @idx is an index of the instruction we are currently processing; 263 * @subseq_idx is an index of the subsequent instruction that: 264 * - *would be* executed next, if jump history is viewed in forward order; 265 * - *was* processed previously during backtracking. 266 */ 267 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, 268 struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) 269 { 270 struct bpf_insn *insn = env->prog->insnsi + idx; 271 u8 class = BPF_CLASS(insn->code); 272 u8 opcode = BPF_OP(insn->code); 273 u8 mode = BPF_MODE(insn->code); 274 u32 dreg = insn->dst_reg; 275 u32 sreg = insn->src_reg; 276 u32 spi, i, fr; 277 278 if (insn->code == 0) 279 return 0; 280 if (env->log.level & BPF_LOG_LEVEL2) { 281 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt)); 282 verbose(env, "mark_precise: frame%d: regs=%s ", 283 bt->frame, env->tmp_str_buf); 284 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); 285 verbose(env, "stack=%s before ", env->tmp_str_buf); 286 verbose(env, "%d: ", idx); 287 bpf_verbose_insn(env, insn); 288 } 289 290 /* If there is a history record that some registers gained range at this insn, 291 * propagate precision marks to those registers, so that bt_is_reg_set() 292 * accounts for these registers. 293 */ 294 bpf_bt_sync_linked_regs(bt, hist); 295 296 if (class == BPF_ALU || class == BPF_ALU64) { 297 if (!bt_is_reg_set(bt, dreg)) 298 return 0; 299 if (opcode == BPF_END || opcode == BPF_NEG) { 300 /* sreg is reserved and unused 301 * dreg still need precision before this insn 302 */ 303 return 0; 304 } else if (opcode == BPF_MOV) { 305 if (BPF_SRC(insn->code) == BPF_X) { 306 /* dreg = sreg or dreg = (s8, s16, s32)sreg 307 * dreg needs precision after this insn 308 * sreg needs precision before this insn 309 */ 310 bt_clear_reg(bt, dreg); 311 if (sreg != BPF_REG_FP) 312 bt_set_reg(bt, sreg); 313 } else { 314 /* dreg = K 315 * dreg needs precision after this insn. 316 * Corresponding register is already marked 317 * as precise=true in this verifier state. 318 * No further markings in parent are necessary 319 */ 320 bt_clear_reg(bt, dreg); 321 } 322 } else { 323 if (BPF_SRC(insn->code) == BPF_X) { 324 /* dreg += sreg 325 * both dreg and sreg need precision 326 * before this insn 327 */ 328 if (sreg != BPF_REG_FP) 329 bt_set_reg(bt, sreg); 330 } /* else dreg += K 331 * dreg still needs precision before this insn 332 */ 333 } 334 } else if (class == BPF_LDX || 335 is_atomic_load_insn(insn) || 336 is_atomic_fetch_insn(insn)) { 337 u32 load_reg = dreg; 338 339 /* 340 * Atomic fetch operation writes the old value into 341 * a register (sreg or r0) and if it was tracked for 342 * precision, propagate to the stack slot like we do 343 * in regular ldx. 344 */ 345 if (is_atomic_fetch_insn(insn)) 346 load_reg = insn->imm == BPF_CMPXCHG ? 347 BPF_REG_0 : sreg; 348 349 if (!bt_is_reg_set(bt, load_reg)) 350 return 0; 351 bt_clear_reg(bt, load_reg); 352 353 if (hist && hist->flags & INSN_F_STACK_ARG_ACCESS) { 354 spi = hist->spi; 355 /* 356 * Stack arg read: callee reads from r11+off, but 357 * the data lives in the caller's stack_arg_regs. 358 * Set the mask in the caller frame so precision 359 * is marked in the caller's slot at the callee 360 * entry checkpoint. 361 */ 362 bt_set_frame_stack_arg_slot(bt, bt->frame - 1, spi); 363 return 0; 364 } 365 366 /* scalars can only be spilled into stack w/o losing precision. 367 * Load from any other memory can be zero extended. 368 * The desire to keep that precision is already indicated 369 * by 'precise' mark in corresponding register of this state. 370 * No further tracking necessary. 371 */ 372 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) 373 return 0; 374 /* dreg = *(u64 *)[fp - off] was a fill from the stack. 375 * that [fp - off] slot contains scalar that needs to be 376 * tracked with precision 377 */ 378 spi = hist->spi; 379 fr = hist->frame; 380 bpf_bt_set_frame_slot(bt, fr, spi); 381 } else if (class == BPF_STX || class == BPF_ST) { 382 if (bt_is_reg_set(bt, dreg)) 383 /* stx & st shouldn't be using _scalar_ dst_reg 384 * to access memory. It means backtracking 385 * encountered a case of pointer subtraction. 386 */ 387 return -ENOTSUPP; 388 389 if (hist && hist->flags & INSN_F_STACK_ARG_ACCESS) { 390 spi = hist->spi; 391 if (!bt_is_frame_stack_arg_slot_set(bt, bt->frame, spi)) 392 return 0; 393 bt_clear_frame_stack_arg_slot(bt, bt->frame, spi); 394 if (class == BPF_STX) 395 bt_set_reg(bt, sreg); 396 return 0; 397 } 398 399 /* scalars can only be spilled into stack */ 400 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) 401 return 0; 402 spi = hist->spi; 403 fr = hist->frame; 404 if (!bt_is_frame_slot_set(bt, fr, spi)) 405 return 0; 406 bt_clear_frame_slot(bt, fr, spi); 407 if (class == BPF_STX) 408 bt_set_reg(bt, sreg); 409 } else if (class == BPF_JMP || class == BPF_JMP32) { 410 if (bpf_pseudo_call(insn)) { 411 int subprog_insn_idx, subprog; 412 413 subprog_insn_idx = idx + insn->imm + 1; 414 subprog = bpf_find_subprog(env, subprog_insn_idx); 415 if (subprog < 0) 416 return -EFAULT; 417 418 if (bpf_subprog_is_global(env, subprog)) { 419 /* check that jump history doesn't have any 420 * extra instructions from subprog; the next 421 * instruction after call to global subprog 422 * should be literally next instruction in 423 * caller program 424 */ 425 verifier_bug_if(idx + 1 != subseq_idx, env, 426 "extra insn from subprog"); 427 /* r1-r5 are invalidated after subprog call, 428 * so for global func call it shouldn't be set 429 * anymore 430 */ 431 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 432 verifier_bug(env, "global subprog unexpected regs %x", 433 bt_reg_mask(bt)); 434 return -EFAULT; 435 } 436 /* global subprog always sets R0 */ 437 bt_clear_reg(bt, BPF_REG_0); 438 return 0; 439 } else { 440 /* static subprog call instruction, which 441 * means that we are exiting current subprog, 442 * so only r1-r5 could be still requested as 443 * precise, r0 and r6-r10 or any stack slot in 444 * the current frame should be zero by now 445 */ 446 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 447 verifier_bug(env, "static subprog unexpected regs %x", 448 bt_reg_mask(bt)); 449 return -EFAULT; 450 } 451 /* we are now tracking register spills correctly, 452 * so any instance of leftover slots is a bug 453 */ 454 if (bt_stack_mask(bt) != 0) { 455 verifier_bug(env, 456 "static subprog leftover stack slots %llx", 457 bt_stack_mask(bt)); 458 return -EFAULT; 459 } 460 /* propagate r1-r5 to the caller */ 461 for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 462 if (bt_is_reg_set(bt, i)) { 463 bt_clear_reg(bt, i); 464 bpf_bt_set_frame_reg(bt, bt->frame - 1, i); 465 } 466 } 467 if (bt_stack_arg_mask(bt)) { 468 verifier_bug(env, 469 "static subprog leftover stack arg slots %x", 470 bt_stack_arg_mask(bt)); 471 return -EFAULT; 472 } 473 if (bt_subprog_exit(bt)) 474 return -EFAULT; 475 return 0; 476 } 477 } else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { 478 /* exit from callback subprog to callback-calling helper or 479 * kfunc call. Use idx/subseq_idx check to discern it from 480 * straight line code backtracking. 481 * Unlike the subprog call handling above, we shouldn't 482 * propagate precision of r1-r5 (if any requested), as they are 483 * not actually arguments passed directly to callback subprogs 484 */ 485 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 486 verifier_bug(env, "callback unexpected regs %x", 487 bt_reg_mask(bt)); 488 return -EFAULT; 489 } 490 if (bt_stack_mask(bt) != 0) { 491 verifier_bug(env, "callback leftover stack slots %llx", 492 bt_stack_mask(bt)); 493 return -EFAULT; 494 } 495 /* clear r1-r5 in callback subprog's mask */ 496 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 497 bt_clear_reg(bt, i); 498 if (bt_subprog_exit(bt)) 499 return -EFAULT; 500 return 0; 501 } else if (opcode == BPF_CALL) { 502 /* kfunc with imm==0 is invalid and fixup_kfunc_call will 503 * catch this error later. Make backtracking conservative 504 * with ENOTSUPP. 505 */ 506 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) 507 return -ENOTSUPP; 508 /* regular helper call sets R0 */ 509 bt_clear_reg(bt, BPF_REG_0); 510 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 511 /* if backtracking was looking for registers R1-R5 512 * they should have been found already. 513 */ 514 verifier_bug(env, "backtracking call unexpected regs %x", 515 bt_reg_mask(bt)); 516 return -EFAULT; 517 } 518 if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call 519 && subseq_idx - idx != 1) { 520 if (bt_subprog_enter(bt)) 521 return -EFAULT; 522 } 523 } else if (opcode == BPF_EXIT) { 524 bool r0_precise; 525 526 /* Backtracking to a nested function call, 'idx' is a part of 527 * the inner frame 'subseq_idx' is a part of the outer frame. 528 * In case of a regular function call, instructions giving 529 * precision to registers R1-R5 should have been found already. 530 * In case of a callback, it is ok to have R1-R5 marked for 531 * backtracking, as these registers are set by the function 532 * invoking callback. 533 */ 534 if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx)) 535 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 536 bt_clear_reg(bt, i); 537 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 538 verifier_bug(env, "backtracking exit unexpected regs %x", 539 bt_reg_mask(bt)); 540 return -EFAULT; 541 } 542 543 /* BPF_EXIT in subprog or callback always returns 544 * right after the call instruction, so by checking 545 * whether the instruction at subseq_idx-1 is subprog 546 * call or not we can distinguish actual exit from 547 * *subprog* from exit from *callback*. In the former 548 * case, we need to propagate r0 precision, if 549 * necessary. In the former we never do that. 550 */ 551 r0_precise = subseq_idx - 1 >= 0 && 552 bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) && 553 bt_is_reg_set(bt, BPF_REG_0); 554 555 bt_clear_reg(bt, BPF_REG_0); 556 if (bt_subprog_enter(bt)) 557 return -EFAULT; 558 559 if (r0_precise) 560 bt_set_reg(bt, BPF_REG_0); 561 /* r6-r9 and stack slots will stay set in caller frame 562 * bitmasks until we return back from callee(s) 563 */ 564 return 0; 565 } else if (BPF_SRC(insn->code) == BPF_X) { 566 if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg)) 567 return 0; 568 /* dreg <cond> sreg 569 * Both dreg and sreg need precision before 570 * this insn. If only sreg was marked precise 571 * before it would be equally necessary to 572 * propagate it to dreg. 573 */ 574 if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK)) 575 bt_set_reg(bt, sreg); 576 if (!hist || !(hist->flags & INSN_F_DST_REG_STACK)) 577 bt_set_reg(bt, dreg); 578 } else if (BPF_SRC(insn->code) == BPF_K) { 579 /* dreg <cond> K 580 * Only dreg still needs precision before 581 * this insn, so for the K-based conditional 582 * there is nothing new to be marked. 583 */ 584 } 585 } else if (class == BPF_LD) { 586 if (!bt_is_reg_set(bt, dreg)) 587 return 0; 588 bt_clear_reg(bt, dreg); 589 /* It's ld_imm64 or ld_abs or ld_ind. 590 * For ld_imm64 no further tracking of precision 591 * into parent is necessary 592 */ 593 if (mode == BPF_IND || mode == BPF_ABS) 594 /* to be analyzed */ 595 return -ENOTSUPP; 596 } 597 /* Propagate precision marks to linked registers, to account for 598 * registers marked as precise in this function. 599 */ 600 bpf_bt_sync_linked_regs(bt, hist); 601 return 0; 602 } 603 604 /* the scalar precision tracking algorithm: 605 * . at the start all registers have precise=false. 606 * . scalar ranges are tracked as normal through alu and jmp insns. 607 * . once precise value of the scalar register is used in: 608 * . ptr + scalar alu 609 * . if (scalar cond K|scalar) 610 * . helper_call(.., scalar, ...) where ARG_CONST is expected 611 * backtrack through the verifier states and mark all registers and 612 * stack slots with spilled constants that these scalar registers 613 * should be precise. 614 * . during state pruning two registers (or spilled stack slots) 615 * are equivalent if both are not precise. 616 * 617 * Note the verifier cannot simply walk register parentage chain, 618 * since many different registers and stack slots could have been 619 * used to compute single precise scalar. 620 * 621 * The approach of starting with precise=true for all registers and then 622 * backtrack to mark a register as not precise when the verifier detects 623 * that program doesn't care about specific value (e.g., when helper 624 * takes register as ARG_ANYTHING parameter) is not safe. 625 * 626 * It's ok to walk single parentage chain of the verifier states. 627 * It's possible that this backtracking will go all the way till 1st insn. 628 * All other branches will be explored for needing precision later. 629 * 630 * The backtracking needs to deal with cases like: 631 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) 632 * r9 -= r8 633 * r5 = r9 634 * if r5 > 0x79f goto pc+7 635 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) 636 * r5 += 1 637 * ... 638 * call bpf_perf_event_output#25 639 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO 640 * 641 * and this case: 642 * r6 = 1 643 * call foo // uses callee's r6 inside to compute r0 644 * r0 += r6 645 * if r0 == 0 goto 646 * 647 * to track above reg_mask/stack_mask needs to be independent for each frame. 648 * 649 * Also if parent's curframe > frame where backtracking started, 650 * the verifier need to mark registers in both frames, otherwise callees 651 * may incorrectly prune callers. This is similar to 652 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") 653 * 654 * For now backtracking falls back into conservative marking. 655 */ 656 void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env, 657 struct bpf_verifier_state *st) 658 { 659 struct bpf_func_state *func; 660 struct bpf_reg_state *reg; 661 int i, j; 662 663 if (env->log.level & BPF_LOG_LEVEL2) { 664 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n", 665 st->curframe); 666 } 667 668 /* big hammer: mark all scalars precise in this path. 669 * pop_stack may still get !precise scalars. 670 * We also skip current state and go straight to first parent state, 671 * because precision markings in current non-checkpointed state are 672 * not needed. See why in the comment in __mark_chain_precision below. 673 */ 674 for (st = st->parent; st; st = st->parent) { 675 for (i = 0; i <= st->curframe; i++) { 676 func = st->frame[i]; 677 for (j = 0; j < BPF_REG_FP; j++) { 678 reg = &func->regs[j]; 679 if (reg->type != SCALAR_VALUE || reg->precise) 680 continue; 681 reg->precise = true; 682 if (env->log.level & BPF_LOG_LEVEL2) { 683 verbose(env, "force_precise: frame%d: forcing r%d to be precise\n", 684 i, j); 685 } 686 } 687 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 688 if (!bpf_is_spilled_reg(&func->stack[j])) 689 continue; 690 reg = &func->stack[j].spilled_ptr; 691 if (reg->type != SCALAR_VALUE || reg->precise) 692 continue; 693 reg->precise = true; 694 if (env->log.level & BPF_LOG_LEVEL2) { 695 verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n", 696 i, -(j + 1) * 8); 697 } 698 } 699 } 700 } 701 } 702 703 /* 704 * bpf_mark_chain_precision() backtracks BPF program instruction sequence and 705 * chain of verifier states making sure that register *regno* (if regno >= 0) 706 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked 707 * SCALARS, as well as any other registers and slots that contribute to 708 * a tracked state of given registers/stack slots, depending on specific BPF 709 * assembly instructions (see backtrack_insns() for exact instruction handling 710 * logic). This backtracking relies on recorded jmp_history and is able to 711 * traverse entire chain of parent states. This process ends only when all the 712 * necessary registers/slots and their transitive dependencies are marked as 713 * precise. 714 * 715 * One important and subtle aspect is that precise marks *do not matter* in 716 * the currently verified state (current state). It is important to understand 717 * why this is the case. 718 * 719 * First, note that current state is the state that is not yet "checkpointed", 720 * i.e., it is not yet put into env->explored_states, and it has no children 721 * states as well. It's ephemeral, and can end up either a) being discarded if 722 * compatible explored state is found at some point or BPF_EXIT instruction is 723 * reached or b) checkpointed and put into env->explored_states, branching out 724 * into one or more children states. 725 * 726 * In the former case, precise markings in current state are completely 727 * ignored by state comparison code (see regsafe() for details). Only 728 * checkpointed ("old") state precise markings are important, and if old 729 * state's register/slot is precise, regsafe() assumes current state's 730 * register/slot as precise and checks value ranges exactly and precisely. If 731 * states turn out to be compatible, current state's necessary precise 732 * markings and any required parent states' precise markings are enforced 733 * after the fact with propagate_precision() logic, after the fact. But it's 734 * important to realize that in this case, even after marking current state 735 * registers/slots as precise, we immediately discard current state. So what 736 * actually matters is any of the precise markings propagated into current 737 * state's parent states, which are always checkpointed (due to b) case above). 738 * As such, for scenario a) it doesn't matter if current state has precise 739 * markings set or not. 740 * 741 * Now, for the scenario b), checkpointing and forking into child(ren) 742 * state(s). Note that before current state gets to checkpointing step, any 743 * processed instruction always assumes precise SCALAR register/slot 744 * knowledge: if precise value or range is useful to prune jump branch, BPF 745 * verifier takes this opportunity enthusiastically. Similarly, when 746 * register's value is used to calculate offset or memory address, exact 747 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to 748 * what we mentioned above about state comparison ignoring precise markings 749 * during state comparison, BPF verifier ignores and also assumes precise 750 * markings *at will* during instruction verification process. But as verifier 751 * assumes precision, it also propagates any precision dependencies across 752 * parent states, which are not yet finalized, so can be further restricted 753 * based on new knowledge gained from restrictions enforced by their children 754 * states. This is so that once those parent states are finalized, i.e., when 755 * they have no more active children state, state comparison logic in 756 * is_state_visited() would enforce strict and precise SCALAR ranges, if 757 * required for correctness. 758 * 759 * To build a bit more intuition, note also that once a state is checkpointed, 760 * the path we took to get to that state is not important. This is crucial 761 * property for state pruning. When state is checkpointed and finalized at 762 * some instruction index, it can be correctly and safely used to "short 763 * circuit" any *compatible* state that reaches exactly the same instruction 764 * index. I.e., if we jumped to that instruction from a completely different 765 * code path than original finalized state was derived from, it doesn't 766 * matter, current state can be discarded because from that instruction 767 * forward having a compatible state will ensure we will safely reach the 768 * exit. States describe preconditions for further exploration, but completely 769 * forget the history of how we got here. 770 * 771 * This also means that even if we needed precise SCALAR range to get to 772 * finalized state, but from that point forward *that same* SCALAR register is 773 * never used in a precise context (i.e., it's precise value is not needed for 774 * correctness), it's correct and safe to mark such register as "imprecise" 775 * (i.e., precise marking set to false). This is what we rely on when we do 776 * not set precise marking in current state. If no child state requires 777 * precision for any given SCALAR register, it's safe to dictate that it can 778 * be imprecise. If any child state does require this register to be precise, 779 * we'll mark it precise later retroactively during precise markings 780 * propagation from child state to parent states. 781 * 782 * Skipping precise marking setting in current state is a mild version of 783 * relying on the above observation. But we can utilize this property even 784 * more aggressively by proactively forgetting any precise marking in the 785 * current state (which we inherited from the parent state), right before we 786 * checkpoint it and branch off into new child state. This is done by 787 * mark_all_scalars_imprecise() to hopefully get more permissive and generic 788 * finalized states which help in short circuiting more future states. 789 */ 790 int bpf_mark_chain_precision(struct bpf_verifier_env *env, 791 struct bpf_verifier_state *starting_state, 792 int regno, 793 bool *changed) 794 { 795 struct bpf_verifier_state *st = starting_state; 796 struct backtrack_state *bt = &env->bt; 797 int first_idx = st->first_insn_idx; 798 int last_idx = starting_state->insn_idx; 799 int subseq_idx = -1; 800 struct bpf_func_state *func; 801 bool tmp, skip_first = true; 802 struct bpf_reg_state *reg; 803 int i, fr, err; 804 805 if (!env->bpf_capable) 806 return 0; 807 808 changed = changed ?: &tmp; 809 /* set frame number from which we are starting to backtrack */ 810 bt_init(bt, starting_state->curframe); 811 812 /* Do sanity checks against current state of register and/or stack 813 * slot, but don't set precise flag in current state, as precision 814 * tracking in the current state is unnecessary. 815 */ 816 func = st->frame[bt->frame]; 817 if (regno >= 0) { 818 reg = &func->regs[regno]; 819 if (reg->type != SCALAR_VALUE) { 820 verifier_bug(env, "backtracking misuse"); 821 return -EFAULT; 822 } 823 bt_set_reg(bt, regno); 824 } 825 826 if (bt_empty(bt)) 827 return 0; 828 829 for (;;) { 830 DECLARE_BITMAP(mask, 64); 831 u32 history = st->jmp_history_cnt; 832 struct bpf_jmp_history_entry *hist; 833 834 if (env->log.level & BPF_LOG_LEVEL2) { 835 verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", 836 bt->frame, last_idx, first_idx, subseq_idx); 837 } 838 839 if (last_idx < 0) { 840 /* we are at the entry into subprog, which 841 * is expected for global funcs, but only if 842 * requested precise registers are R1-R5 843 * (which are global func's input arguments) 844 */ 845 if (st->curframe == 0 && 846 st->frame[0]->subprogno > 0 && 847 st->frame[0]->callsite == BPF_MAIN_FUNC && 848 bt_stack_mask(bt) == 0 && 849 (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) { 850 bitmap_from_u64(mask, bt_reg_mask(bt)); 851 for_each_set_bit(i, mask, 32) { 852 reg = &st->frame[0]->regs[i]; 853 bt_clear_reg(bt, i); 854 if (reg->type == SCALAR_VALUE) { 855 reg->precise = true; 856 *changed = true; 857 } 858 } 859 return 0; 860 } 861 862 verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx", 863 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt)); 864 return -EFAULT; 865 } 866 867 for (i = last_idx;;) { 868 if (skip_first) { 869 err = 0; 870 skip_first = false; 871 } else { 872 hist = get_jmp_hist_entry(st, history, i); 873 err = backtrack_insn(env, i, subseq_idx, hist, bt); 874 } 875 if (err == -ENOTSUPP) { 876 bpf_mark_all_scalars_precise(env, starting_state); 877 bt_reset(bt); 878 return 0; 879 } else if (err) { 880 return err; 881 } 882 if (bt_empty(bt)) 883 /* Found assignment(s) into tracked register in this state. 884 * Since this state is already marked, just return. 885 * Nothing to be tracked further in the parent state. 886 */ 887 return 0; 888 subseq_idx = i; 889 i = get_prev_insn_idx(st, i, &history); 890 if (i == -ENOENT) 891 break; 892 if (i >= env->prog->len) { 893 /* This can happen if backtracking reached insn 0 894 * and there are still reg_mask or stack_mask 895 * to backtrack. 896 * It means the backtracking missed the spot where 897 * particular register was initialized with a constant. 898 */ 899 verifier_bug(env, "backtracking idx %d", i); 900 return -EFAULT; 901 } 902 } 903 st = st->parent; 904 if (!st) 905 break; 906 907 for (fr = bt->frame; fr >= 0; fr--) { 908 func = st->frame[fr]; 909 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); 910 for_each_set_bit(i, mask, 32) { 911 reg = &func->regs[i]; 912 if (reg->type != SCALAR_VALUE) { 913 bt_clear_frame_reg(bt, fr, i); 914 continue; 915 } 916 if (reg->precise) { 917 bt_clear_frame_reg(bt, fr, i); 918 } else { 919 reg->precise = true; 920 *changed = true; 921 } 922 } 923 924 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); 925 for_each_set_bit(i, mask, 64) { 926 if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE, 927 env, "stack slot %d, total slots %d", 928 i, func->allocated_stack / BPF_REG_SIZE)) 929 return -EFAULT; 930 931 if (!bpf_is_spilled_scalar_reg(&func->stack[i])) { 932 bt_clear_frame_slot(bt, fr, i); 933 continue; 934 } 935 reg = &func->stack[i].spilled_ptr; 936 if (reg->precise) { 937 bt_clear_frame_slot(bt, fr, i); 938 } else { 939 reg->precise = true; 940 *changed = true; 941 } 942 } 943 for (i = 0; i < func->out_stack_arg_cnt; i++) { 944 if (!bt_is_frame_stack_arg_slot_set(bt, fr, i)) 945 continue; 946 reg = &func->stack_arg_regs[i]; 947 if (reg->type != SCALAR_VALUE || reg->precise) { 948 bt_clear_frame_stack_arg_slot(bt, fr, i); 949 } else { 950 reg->precise = true; 951 *changed = true; 952 } 953 } 954 if (env->log.level & BPF_LOG_LEVEL2) { 955 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, 956 bt_frame_reg_mask(bt, fr)); 957 verbose(env, "mark_precise: frame%d: parent state regs=%s ", 958 fr, env->tmp_str_buf); 959 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, 960 bt_frame_stack_mask(bt, fr)); 961 verbose(env, "stack=%s: ", env->tmp_str_buf); 962 print_verifier_state(env, st, fr, true); 963 } 964 } 965 966 if (bt_empty(bt)) 967 return 0; 968 969 subseq_idx = first_idx; 970 last_idx = st->last_insn_idx; 971 first_idx = st->first_insn_idx; 972 } 973 974 /* if we still have requested precise regs or slots, we missed 975 * something (e.g., stack access through non-r10 register), so 976 * fallback to marking all precise 977 */ 978 if (!bt_empty(bt)) { 979 bpf_mark_all_scalars_precise(env, starting_state); 980 bt_reset(bt); 981 } 982 983 return 0; 984 } 985