1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
30 #include <net/xdp.h>
31 #include <linux/trace_events.h>
32 #include <linux/kallsyms.h>
33
34 #include "disasm.h"
35
36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
38 [_id] = & _name ## _verifier_ops,
39 #define BPF_MAP_TYPE(_id, _ops)
40 #define BPF_LINK_TYPE(_id, _name)
41 #include <linux/bpf_types.h>
42 #undef BPF_PROG_TYPE
43 #undef BPF_MAP_TYPE
44 #undef BPF_LINK_TYPE
45 };
46
47 struct bpf_mem_alloc bpf_global_percpu_ma;
48 static bool bpf_global_percpu_ma_set;
49
50 /* bpf_check() is a static code analyzer that walks eBPF program
51 * instruction by instruction and updates register/stack state.
52 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
53 *
54 * The first pass is depth-first-search to check that the program is a DAG.
55 * It rejects the following programs:
56 * - larger than BPF_MAXINSNS insns
57 * - if loop is present (detected via back-edge)
58 * - unreachable insns exist (shouldn't be a forest. program = one function)
59 * - out of bounds or malformed jumps
60 * The second pass is all possible path descent from the 1st insn.
61 * Since it's analyzing all paths through the program, the length of the
62 * analysis is limited to 64k insn, which may be hit even if total number of
63 * insn is less then 4K, but there are too many branches that change stack/regs.
64 * Number of 'branches to be analyzed' is limited to 1k
65 *
66 * On entry to each instruction, each register has a type, and the instruction
67 * changes the types of the registers depending on instruction semantics.
68 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
69 * copied to R1.
70 *
71 * All registers are 64-bit.
72 * R0 - return register
73 * R1-R5 argument passing registers
74 * R6-R9 callee saved registers
75 * R10 - frame pointer read-only
76 *
77 * At the start of BPF program the register R1 contains a pointer to bpf_context
78 * and has type PTR_TO_CTX.
79 *
80 * Verifier tracks arithmetic operations on pointers in case:
81 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
82 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
83 * 1st insn copies R10 (which has FRAME_PTR) type into R1
84 * and 2nd arithmetic instruction is pattern matched to recognize
85 * that it wants to construct a pointer to some element within stack.
86 * So after 2nd insn, the register R1 has type PTR_TO_STACK
87 * (and -20 constant is saved for further stack bounds checking).
88 * Meaning that this reg is a pointer to stack plus known immediate constant.
89 *
90 * Most of the time the registers have SCALAR_VALUE type, which
91 * means the register has some value, but it's not a valid pointer.
92 * (like pointer plus pointer becomes SCALAR_VALUE type)
93 *
94 * When verifier sees load or store instructions the type of base register
95 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
96 * four pointer types recognized by check_mem_access() function.
97 *
98 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
99 * and the range of [ptr, ptr + map's value_size) is accessible.
100 *
101 * registers used to pass values to function calls are checked against
102 * function argument constraints.
103 *
104 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
105 * It means that the register type passed to this function must be
106 * PTR_TO_STACK and it will be used inside the function as
107 * 'pointer to map element key'
108 *
109 * For example the argument constraints for bpf_map_lookup_elem():
110 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
111 * .arg1_type = ARG_CONST_MAP_PTR,
112 * .arg2_type = ARG_PTR_TO_MAP_KEY,
113 *
114 * ret_type says that this function returns 'pointer to map elem value or null'
115 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
116 * 2nd argument should be a pointer to stack, which will be used inside
117 * the helper function as a pointer to map element key.
118 *
119 * On the kernel side the helper function looks like:
120 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121 * {
122 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
123 * void *key = (void *) (unsigned long) r2;
124 * void *value;
125 *
126 * here kernel can access 'key' and 'map' pointers safely, knowing that
127 * [key, key + map->key_size) bytes are valid and were initialized on
128 * the stack of eBPF program.
129 * }
130 *
131 * Corresponding eBPF program may look like:
132 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
133 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
134 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
135 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
136 * here verifier looks at prototype of map_lookup_elem() and sees:
137 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
138 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
139 *
140 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
141 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
142 * and were initialized prior to this call.
143 * If it's ok, then verifier allows this BPF_CALL insn and looks at
144 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
145 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
146 * returns either pointer to map value or NULL.
147 *
148 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
149 * insn, the register holding that pointer in the true branch changes state to
150 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
151 * branch. See check_cond_jmp_op().
152 *
153 * After the call R0 is set to return type of the function and registers R1-R5
154 * are set to NOT_INIT to indicate that they are no longer readable.
155 *
156 * The following reference types represent a potential reference to a kernel
157 * resource which, after first being allocated, must be checked and freed by
158 * the BPF program:
159 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
160 *
161 * When the verifier sees a helper call return a reference type, it allocates a
162 * pointer id for the reference and stores it in the current function state.
163 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
164 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
165 * passes through a NULL-check conditional. For the branch wherein the state is
166 * changed to CONST_IMM, the verifier releases the reference.
167 *
168 * For each helper function that allocates a reference, such as
169 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
170 * bpf_sk_release(). When a reference type passes into the release function,
171 * the verifier also releases the reference. If any unchecked or unreleased
172 * reference remains at the end of the program, the verifier rejects it.
173 */
174
175 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
176 struct bpf_verifier_stack_elem {
177 /* verifier state is 'st'
178 * before processing instruction 'insn_idx'
179 * and after processing instruction 'prev_insn_idx'
180 */
181 struct bpf_verifier_state st;
182 int insn_idx;
183 int prev_insn_idx;
184 struct bpf_verifier_stack_elem *next;
185 /* length of verifier log at the time this state was pushed on stack */
186 u32 log_pos;
187 };
188
189 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
190 #define BPF_COMPLEXITY_LIMIT_STATES 64
191
192 #define BPF_MAP_KEY_POISON (1ULL << 63)
193 #define BPF_MAP_KEY_SEEN (1ULL << 62)
194
195 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
196
197 #define BPF_PRIV_STACK_MIN_SIZE 64
198
199 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
200 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
201 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
202 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
203 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
204 static int ref_set_non_owning(struct bpf_verifier_env *env,
205 struct bpf_reg_state *reg);
206 static void specialize_kfunc(struct bpf_verifier_env *env,
207 u32 func_id, u16 offset, unsigned long *addr);
208 static bool is_trusted_reg(const struct bpf_reg_state *reg);
209
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data * aux)210 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
211 {
212 return aux->map_ptr_state.poison;
213 }
214
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data * aux)215 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
216 {
217 return aux->map_ptr_state.unpriv;
218 }
219
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,struct bpf_map * map,bool unpriv,bool poison)220 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
221 struct bpf_map *map,
222 bool unpriv, bool poison)
223 {
224 unpriv |= bpf_map_ptr_unpriv(aux);
225 aux->map_ptr_state.unpriv = unpriv;
226 aux->map_ptr_state.poison = poison;
227 aux->map_ptr_state.map_ptr = map;
228 }
229
bpf_map_key_poisoned(const struct bpf_insn_aux_data * aux)230 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
231 {
232 return aux->map_key_state & BPF_MAP_KEY_POISON;
233 }
234
bpf_map_key_unseen(const struct bpf_insn_aux_data * aux)235 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
236 {
237 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
238 }
239
bpf_map_key_immediate(const struct bpf_insn_aux_data * aux)240 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
241 {
242 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
243 }
244
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)245 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
246 {
247 bool poisoned = bpf_map_key_poisoned(aux);
248
249 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
250 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
251 }
252
bpf_helper_call(const struct bpf_insn * insn)253 static bool bpf_helper_call(const struct bpf_insn *insn)
254 {
255 return insn->code == (BPF_JMP | BPF_CALL) &&
256 insn->src_reg == 0;
257 }
258
bpf_pseudo_call(const struct bpf_insn * insn)259 static bool bpf_pseudo_call(const struct bpf_insn *insn)
260 {
261 return insn->code == (BPF_JMP | BPF_CALL) &&
262 insn->src_reg == BPF_PSEUDO_CALL;
263 }
264
bpf_pseudo_kfunc_call(const struct bpf_insn * insn)265 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
266 {
267 return insn->code == (BPF_JMP | BPF_CALL) &&
268 insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
269 }
270
271 struct bpf_call_arg_meta {
272 struct bpf_map *map_ptr;
273 bool raw_mode;
274 bool pkt_access;
275 u8 release_regno;
276 int regno;
277 int access_size;
278 int mem_size;
279 u64 msize_max_value;
280 int ref_obj_id;
281 int dynptr_id;
282 int map_uid;
283 int func_id;
284 struct btf *btf;
285 u32 btf_id;
286 struct btf *ret_btf;
287 u32 ret_btf_id;
288 u32 subprogno;
289 struct btf_field *kptr_field;
290 s64 const_map_key;
291 };
292
293 struct bpf_kfunc_call_arg_meta {
294 /* In parameters */
295 struct btf *btf;
296 u32 func_id;
297 u32 kfunc_flags;
298 const struct btf_type *func_proto;
299 const char *func_name;
300 /* Out parameters */
301 u32 ref_obj_id;
302 u8 release_regno;
303 bool r0_rdonly;
304 u32 ret_btf_id;
305 u64 r0_size;
306 u32 subprogno;
307 struct {
308 u64 value;
309 bool found;
310 } arg_constant;
311
312 /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
313 * generally to pass info about user-defined local kptr types to later
314 * verification logic
315 * bpf_obj_drop/bpf_percpu_obj_drop
316 * Record the local kptr type to be drop'd
317 * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
318 * Record the local kptr type to be refcount_incr'd and use
319 * arg_owning_ref to determine whether refcount_acquire should be
320 * fallible
321 */
322 struct btf *arg_btf;
323 u32 arg_btf_id;
324 bool arg_owning_ref;
325
326 struct {
327 struct btf_field *field;
328 } arg_list_head;
329 struct {
330 struct btf_field *field;
331 } arg_rbtree_root;
332 struct {
333 enum bpf_dynptr_type type;
334 u32 id;
335 u32 ref_obj_id;
336 } initialized_dynptr;
337 struct {
338 u8 spi;
339 u8 frameno;
340 } iter;
341 struct {
342 struct bpf_map *ptr;
343 int uid;
344 } map;
345 u64 mem_size;
346 };
347
348 struct btf *btf_vmlinux;
349
btf_type_name(const struct btf * btf,u32 id)350 static const char *btf_type_name(const struct btf *btf, u32 id)
351 {
352 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
353 }
354
355 static DEFINE_MUTEX(bpf_verifier_lock);
356 static DEFINE_MUTEX(bpf_percpu_ma_lock);
357
verbose(void * private_data,const char * fmt,...)358 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
359 {
360 struct bpf_verifier_env *env = private_data;
361 va_list args;
362
363 if (!bpf_verifier_log_needed(&env->log))
364 return;
365
366 va_start(args, fmt);
367 bpf_verifier_vlog(&env->log, fmt, args);
368 va_end(args);
369 }
370
verbose_invalid_scalar(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_retval_range range,const char * ctx,const char * reg_name)371 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
372 struct bpf_reg_state *reg,
373 struct bpf_retval_range range, const char *ctx,
374 const char *reg_name)
375 {
376 bool unknown = true;
377
378 verbose(env, "%s the register %s has", ctx, reg_name);
379 if (reg->smin_value > S64_MIN) {
380 verbose(env, " smin=%lld", reg->smin_value);
381 unknown = false;
382 }
383 if (reg->smax_value < S64_MAX) {
384 verbose(env, " smax=%lld", reg->smax_value);
385 unknown = false;
386 }
387 if (unknown)
388 verbose(env, " unknown scalar value");
389 verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
390 }
391
reg_not_null(const struct bpf_reg_state * reg)392 static bool reg_not_null(const struct bpf_reg_state *reg)
393 {
394 enum bpf_reg_type type;
395
396 type = reg->type;
397 if (type_may_be_null(type))
398 return false;
399
400 type = base_type(type);
401 return type == PTR_TO_SOCKET ||
402 type == PTR_TO_TCP_SOCK ||
403 type == PTR_TO_MAP_VALUE ||
404 type == PTR_TO_MAP_KEY ||
405 type == PTR_TO_SOCK_COMMON ||
406 (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
407 type == PTR_TO_MEM;
408 }
409
reg_btf_record(const struct bpf_reg_state * reg)410 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
411 {
412 struct btf_record *rec = NULL;
413 struct btf_struct_meta *meta;
414
415 if (reg->type == PTR_TO_MAP_VALUE) {
416 rec = reg->map_ptr->record;
417 } else if (type_is_ptr_alloc_obj(reg->type)) {
418 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
419 if (meta)
420 rec = meta->record;
421 }
422 return rec;
423 }
424
subprog_is_global(const struct bpf_verifier_env * env,int subprog)425 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
426 {
427 struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
428
429 return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
430 }
431
subprog_name(const struct bpf_verifier_env * env,int subprog)432 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
433 {
434 struct bpf_func_info *info;
435
436 if (!env->prog->aux->func_info)
437 return "";
438
439 info = &env->prog->aux->func_info[subprog];
440 return btf_type_name(env->prog->aux->btf, info->type_id);
441 }
442
mark_subprog_exc_cb(struct bpf_verifier_env * env,int subprog)443 static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
444 {
445 struct bpf_subprog_info *info = subprog_info(env, subprog);
446
447 info->is_cb = true;
448 info->is_async_cb = true;
449 info->is_exception_cb = true;
450 }
451
subprog_is_exc_cb(struct bpf_verifier_env * env,int subprog)452 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
453 {
454 return subprog_info(env, subprog)->is_exception_cb;
455 }
456
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)457 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
458 {
459 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
460 }
461
type_is_rdonly_mem(u32 type)462 static bool type_is_rdonly_mem(u32 type)
463 {
464 return type & MEM_RDONLY;
465 }
466
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)467 static bool is_acquire_function(enum bpf_func_id func_id,
468 const struct bpf_map *map)
469 {
470 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
471
472 if (func_id == BPF_FUNC_sk_lookup_tcp ||
473 func_id == BPF_FUNC_sk_lookup_udp ||
474 func_id == BPF_FUNC_skc_lookup_tcp ||
475 func_id == BPF_FUNC_ringbuf_reserve ||
476 func_id == BPF_FUNC_kptr_xchg)
477 return true;
478
479 if (func_id == BPF_FUNC_map_lookup_elem &&
480 (map_type == BPF_MAP_TYPE_SOCKMAP ||
481 map_type == BPF_MAP_TYPE_SOCKHASH))
482 return true;
483
484 return false;
485 }
486
is_ptr_cast_function(enum bpf_func_id func_id)487 static bool is_ptr_cast_function(enum bpf_func_id func_id)
488 {
489 return func_id == BPF_FUNC_tcp_sock ||
490 func_id == BPF_FUNC_sk_fullsock ||
491 func_id == BPF_FUNC_skc_to_tcp_sock ||
492 func_id == BPF_FUNC_skc_to_tcp6_sock ||
493 func_id == BPF_FUNC_skc_to_udp6_sock ||
494 func_id == BPF_FUNC_skc_to_mptcp_sock ||
495 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
496 func_id == BPF_FUNC_skc_to_tcp_request_sock;
497 }
498
is_dynptr_ref_function(enum bpf_func_id func_id)499 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
500 {
501 return func_id == BPF_FUNC_dynptr_data;
502 }
503
504 static bool is_sync_callback_calling_kfunc(u32 btf_id);
505 static bool is_async_callback_calling_kfunc(u32 btf_id);
506 static bool is_callback_calling_kfunc(u32 btf_id);
507 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
508
509 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
510
is_sync_callback_calling_function(enum bpf_func_id func_id)511 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
512 {
513 return func_id == BPF_FUNC_for_each_map_elem ||
514 func_id == BPF_FUNC_find_vma ||
515 func_id == BPF_FUNC_loop ||
516 func_id == BPF_FUNC_user_ringbuf_drain;
517 }
518
is_async_callback_calling_function(enum bpf_func_id func_id)519 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
520 {
521 return func_id == BPF_FUNC_timer_set_callback;
522 }
523
is_callback_calling_function(enum bpf_func_id func_id)524 static bool is_callback_calling_function(enum bpf_func_id func_id)
525 {
526 return is_sync_callback_calling_function(func_id) ||
527 is_async_callback_calling_function(func_id);
528 }
529
is_sync_callback_calling_insn(struct bpf_insn * insn)530 static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
531 {
532 return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
533 (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
534 }
535
is_async_callback_calling_insn(struct bpf_insn * insn)536 static bool is_async_callback_calling_insn(struct bpf_insn *insn)
537 {
538 return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
539 (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
540 }
541
is_may_goto_insn(struct bpf_insn * insn)542 static bool is_may_goto_insn(struct bpf_insn *insn)
543 {
544 return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
545 }
546
is_may_goto_insn_at(struct bpf_verifier_env * env,int insn_idx)547 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
548 {
549 return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
550 }
551
is_storage_get_function(enum bpf_func_id func_id)552 static bool is_storage_get_function(enum bpf_func_id func_id)
553 {
554 return func_id == BPF_FUNC_sk_storage_get ||
555 func_id == BPF_FUNC_inode_storage_get ||
556 func_id == BPF_FUNC_task_storage_get ||
557 func_id == BPF_FUNC_cgrp_storage_get;
558 }
559
helper_multiple_ref_obj_use(enum bpf_func_id func_id,const struct bpf_map * map)560 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
561 const struct bpf_map *map)
562 {
563 int ref_obj_uses = 0;
564
565 if (is_ptr_cast_function(func_id))
566 ref_obj_uses++;
567 if (is_acquire_function(func_id, map))
568 ref_obj_uses++;
569 if (is_dynptr_ref_function(func_id))
570 ref_obj_uses++;
571
572 return ref_obj_uses > 1;
573 }
574
is_cmpxchg_insn(const struct bpf_insn * insn)575 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
576 {
577 return BPF_CLASS(insn->code) == BPF_STX &&
578 BPF_MODE(insn->code) == BPF_ATOMIC &&
579 insn->imm == BPF_CMPXCHG;
580 }
581
is_atomic_load_insn(const struct bpf_insn * insn)582 static bool is_atomic_load_insn(const struct bpf_insn *insn)
583 {
584 return BPF_CLASS(insn->code) == BPF_STX &&
585 BPF_MODE(insn->code) == BPF_ATOMIC &&
586 insn->imm == BPF_LOAD_ACQ;
587 }
588
__get_spi(s32 off)589 static int __get_spi(s32 off)
590 {
591 return (-off - 1) / BPF_REG_SIZE;
592 }
593
func(struct bpf_verifier_env * env,const struct bpf_reg_state * reg)594 static struct bpf_func_state *func(struct bpf_verifier_env *env,
595 const struct bpf_reg_state *reg)
596 {
597 struct bpf_verifier_state *cur = env->cur_state;
598
599 return cur->frame[reg->frameno];
600 }
601
is_spi_bounds_valid(struct bpf_func_state * state,int spi,int nr_slots)602 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
603 {
604 int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
605
606 /* We need to check that slots between [spi - nr_slots + 1, spi] are
607 * within [0, allocated_stack).
608 *
609 * Please note that the spi grows downwards. For example, a dynptr
610 * takes the size of two stack slots; the first slot will be at
611 * spi and the second slot will be at spi - 1.
612 */
613 return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
614 }
615
stack_slot_obj_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * obj_kind,int nr_slots)616 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
617 const char *obj_kind, int nr_slots)
618 {
619 int off, spi;
620
621 if (!tnum_is_const(reg->var_off)) {
622 verbose(env, "%s has to be at a constant offset\n", obj_kind);
623 return -EINVAL;
624 }
625
626 off = reg->off + reg->var_off.value;
627 if (off % BPF_REG_SIZE) {
628 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
629 return -EINVAL;
630 }
631
632 spi = __get_spi(off);
633 if (spi + 1 < nr_slots) {
634 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
635 return -EINVAL;
636 }
637
638 if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
639 return -ERANGE;
640 return spi;
641 }
642
dynptr_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)643 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
644 {
645 return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
646 }
647
iter_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)648 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
649 {
650 return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
651 }
652
irq_flag_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)653 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
654 {
655 return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
656 }
657
arg_to_dynptr_type(enum bpf_arg_type arg_type)658 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
659 {
660 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
661 case DYNPTR_TYPE_LOCAL:
662 return BPF_DYNPTR_TYPE_LOCAL;
663 case DYNPTR_TYPE_RINGBUF:
664 return BPF_DYNPTR_TYPE_RINGBUF;
665 case DYNPTR_TYPE_SKB:
666 return BPF_DYNPTR_TYPE_SKB;
667 case DYNPTR_TYPE_XDP:
668 return BPF_DYNPTR_TYPE_XDP;
669 default:
670 return BPF_DYNPTR_TYPE_INVALID;
671 }
672 }
673
get_dynptr_type_flag(enum bpf_dynptr_type type)674 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
675 {
676 switch (type) {
677 case BPF_DYNPTR_TYPE_LOCAL:
678 return DYNPTR_TYPE_LOCAL;
679 case BPF_DYNPTR_TYPE_RINGBUF:
680 return DYNPTR_TYPE_RINGBUF;
681 case BPF_DYNPTR_TYPE_SKB:
682 return DYNPTR_TYPE_SKB;
683 case BPF_DYNPTR_TYPE_XDP:
684 return DYNPTR_TYPE_XDP;
685 default:
686 return 0;
687 }
688 }
689
dynptr_type_refcounted(enum bpf_dynptr_type type)690 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
691 {
692 return type == BPF_DYNPTR_TYPE_RINGBUF;
693 }
694
695 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
696 enum bpf_dynptr_type type,
697 bool first_slot, int dynptr_id);
698
699 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
700 struct bpf_reg_state *reg);
701
mark_dynptr_stack_regs(struct bpf_verifier_env * env,struct bpf_reg_state * sreg1,struct bpf_reg_state * sreg2,enum bpf_dynptr_type type)702 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
703 struct bpf_reg_state *sreg1,
704 struct bpf_reg_state *sreg2,
705 enum bpf_dynptr_type type)
706 {
707 int id = ++env->id_gen;
708
709 __mark_dynptr_reg(sreg1, type, true, id);
710 __mark_dynptr_reg(sreg2, type, false, id);
711 }
712
mark_dynptr_cb_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_dynptr_type type)713 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
714 struct bpf_reg_state *reg,
715 enum bpf_dynptr_type type)
716 {
717 __mark_dynptr_reg(reg, type, true, ++env->id_gen);
718 }
719
720 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
721 struct bpf_func_state *state, int spi);
722
mark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type,int insn_idx,int clone_ref_obj_id)723 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
724 enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
725 {
726 struct bpf_func_state *state = func(env, reg);
727 enum bpf_dynptr_type type;
728 int spi, i, err;
729
730 spi = dynptr_get_spi(env, reg);
731 if (spi < 0)
732 return spi;
733
734 /* We cannot assume both spi and spi - 1 belong to the same dynptr,
735 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
736 * to ensure that for the following example:
737 * [d1][d1][d2][d2]
738 * spi 3 2 1 0
739 * So marking spi = 2 should lead to destruction of both d1 and d2. In
740 * case they do belong to same dynptr, second call won't see slot_type
741 * as STACK_DYNPTR and will simply skip destruction.
742 */
743 err = destroy_if_dynptr_stack_slot(env, state, spi);
744 if (err)
745 return err;
746 err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
747 if (err)
748 return err;
749
750 for (i = 0; i < BPF_REG_SIZE; i++) {
751 state->stack[spi].slot_type[i] = STACK_DYNPTR;
752 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
753 }
754
755 type = arg_to_dynptr_type(arg_type);
756 if (type == BPF_DYNPTR_TYPE_INVALID)
757 return -EINVAL;
758
759 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
760 &state->stack[spi - 1].spilled_ptr, type);
761
762 if (dynptr_type_refcounted(type)) {
763 /* The id is used to track proper releasing */
764 int id;
765
766 if (clone_ref_obj_id)
767 id = clone_ref_obj_id;
768 else
769 id = acquire_reference(env, insn_idx);
770
771 if (id < 0)
772 return id;
773
774 state->stack[spi].spilled_ptr.ref_obj_id = id;
775 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
776 }
777
778 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
779 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
780
781 return 0;
782 }
783
invalidate_dynptr(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)784 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
785 {
786 int i;
787
788 for (i = 0; i < BPF_REG_SIZE; i++) {
789 state->stack[spi].slot_type[i] = STACK_INVALID;
790 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
791 }
792
793 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
794 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
795
796 /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
797 *
798 * While we don't allow reading STACK_INVALID, it is still possible to
799 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
800 * helpers or insns can do partial read of that part without failing,
801 * but check_stack_range_initialized, check_stack_read_var_off, and
802 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
803 * the slot conservatively. Hence we need to prevent those liveness
804 * marking walks.
805 *
806 * This was not a problem before because STACK_INVALID is only set by
807 * default (where the default reg state has its reg->parent as NULL), or
808 * in clean_live_states after REG_LIVE_DONE (at which point
809 * mark_reg_read won't walk reg->parent chain), but not randomly during
810 * verifier state exploration (like we did above). Hence, for our case
811 * parentage chain will still be live (i.e. reg->parent may be
812 * non-NULL), while earlier reg->parent was NULL, so we need
813 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
814 * done later on reads or by mark_dynptr_read as well to unnecessary
815 * mark registers in verifier state.
816 */
817 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
818 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
819 }
820
unmark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg)821 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
822 {
823 struct bpf_func_state *state = func(env, reg);
824 int spi, ref_obj_id, i;
825
826 spi = dynptr_get_spi(env, reg);
827 if (spi < 0)
828 return spi;
829
830 if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
831 invalidate_dynptr(env, state, spi);
832 return 0;
833 }
834
835 ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
836
837 /* If the dynptr has a ref_obj_id, then we need to invalidate
838 * two things:
839 *
840 * 1) Any dynptrs with a matching ref_obj_id (clones)
841 * 2) Any slices derived from this dynptr.
842 */
843
844 /* Invalidate any slices associated with this dynptr */
845 WARN_ON_ONCE(release_reference(env, ref_obj_id));
846
847 /* Invalidate any dynptr clones */
848 for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
849 if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
850 continue;
851
852 /* it should always be the case that if the ref obj id
853 * matches then the stack slot also belongs to a
854 * dynptr
855 */
856 if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
857 verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
858 return -EFAULT;
859 }
860 if (state->stack[i].spilled_ptr.dynptr.first_slot)
861 invalidate_dynptr(env, state, i);
862 }
863
864 return 0;
865 }
866
867 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
868 struct bpf_reg_state *reg);
869
mark_reg_invalid(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)870 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
871 {
872 if (!env->allow_ptr_leaks)
873 __mark_reg_not_init(env, reg);
874 else
875 __mark_reg_unknown(env, reg);
876 }
877
destroy_if_dynptr_stack_slot(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)878 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
879 struct bpf_func_state *state, int spi)
880 {
881 struct bpf_func_state *fstate;
882 struct bpf_reg_state *dreg;
883 int i, dynptr_id;
884
885 /* We always ensure that STACK_DYNPTR is never set partially,
886 * hence just checking for slot_type[0] is enough. This is
887 * different for STACK_SPILL, where it may be only set for
888 * 1 byte, so code has to use is_spilled_reg.
889 */
890 if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
891 return 0;
892
893 /* Reposition spi to first slot */
894 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
895 spi = spi + 1;
896
897 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
898 verbose(env, "cannot overwrite referenced dynptr\n");
899 return -EINVAL;
900 }
901
902 mark_stack_slot_scratched(env, spi);
903 mark_stack_slot_scratched(env, spi - 1);
904
905 /* Writing partially to one dynptr stack slot destroys both. */
906 for (i = 0; i < BPF_REG_SIZE; i++) {
907 state->stack[spi].slot_type[i] = STACK_INVALID;
908 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
909 }
910
911 dynptr_id = state->stack[spi].spilled_ptr.id;
912 /* Invalidate any slices associated with this dynptr */
913 bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
914 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
915 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
916 continue;
917 if (dreg->dynptr_id == dynptr_id)
918 mark_reg_invalid(env, dreg);
919 }));
920
921 /* Do not release reference state, we are destroying dynptr on stack,
922 * not using some helper to release it. Just reset register.
923 */
924 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
925 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
926
927 /* Same reason as unmark_stack_slots_dynptr above */
928 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
929 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
930
931 return 0;
932 }
933
is_dynptr_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)934 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
935 {
936 int spi;
937
938 if (reg->type == CONST_PTR_TO_DYNPTR)
939 return false;
940
941 spi = dynptr_get_spi(env, reg);
942
943 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
944 * error because this just means the stack state hasn't been updated yet.
945 * We will do check_mem_access to check and update stack bounds later.
946 */
947 if (spi < 0 && spi != -ERANGE)
948 return false;
949
950 /* We don't need to check if the stack slots are marked by previous
951 * dynptr initializations because we allow overwriting existing unreferenced
952 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
953 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
954 * touching are completely destructed before we reinitialize them for a new
955 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
956 * instead of delaying it until the end where the user will get "Unreleased
957 * reference" error.
958 */
959 return true;
960 }
961
is_dynptr_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)962 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
963 {
964 struct bpf_func_state *state = func(env, reg);
965 int i, spi;
966
967 /* This already represents first slot of initialized bpf_dynptr.
968 *
969 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
970 * check_func_arg_reg_off's logic, so we don't need to check its
971 * offset and alignment.
972 */
973 if (reg->type == CONST_PTR_TO_DYNPTR)
974 return true;
975
976 spi = dynptr_get_spi(env, reg);
977 if (spi < 0)
978 return false;
979 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
980 return false;
981
982 for (i = 0; i < BPF_REG_SIZE; i++) {
983 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
984 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
985 return false;
986 }
987
988 return true;
989 }
990
is_dynptr_type_expected(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type)991 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
992 enum bpf_arg_type arg_type)
993 {
994 struct bpf_func_state *state = func(env, reg);
995 enum bpf_dynptr_type dynptr_type;
996 int spi;
997
998 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
999 if (arg_type == ARG_PTR_TO_DYNPTR)
1000 return true;
1001
1002 dynptr_type = arg_to_dynptr_type(arg_type);
1003 if (reg->type == CONST_PTR_TO_DYNPTR) {
1004 return reg->dynptr.type == dynptr_type;
1005 } else {
1006 spi = dynptr_get_spi(env, reg);
1007 if (spi < 0)
1008 return false;
1009 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1010 }
1011 }
1012
1013 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1014
1015 static bool in_rcu_cs(struct bpf_verifier_env *env);
1016
1017 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1018
mark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,struct btf * btf,u32 btf_id,int nr_slots)1019 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1020 struct bpf_kfunc_call_arg_meta *meta,
1021 struct bpf_reg_state *reg, int insn_idx,
1022 struct btf *btf, u32 btf_id, int nr_slots)
1023 {
1024 struct bpf_func_state *state = func(env, reg);
1025 int spi, i, j, id;
1026
1027 spi = iter_get_spi(env, reg, nr_slots);
1028 if (spi < 0)
1029 return spi;
1030
1031 id = acquire_reference(env, insn_idx);
1032 if (id < 0)
1033 return id;
1034
1035 for (i = 0; i < nr_slots; i++) {
1036 struct bpf_stack_state *slot = &state->stack[spi - i];
1037 struct bpf_reg_state *st = &slot->spilled_ptr;
1038
1039 __mark_reg_known_zero(st);
1040 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1041 if (is_kfunc_rcu_protected(meta)) {
1042 if (in_rcu_cs(env))
1043 st->type |= MEM_RCU;
1044 else
1045 st->type |= PTR_UNTRUSTED;
1046 }
1047 st->live |= REG_LIVE_WRITTEN;
1048 st->ref_obj_id = i == 0 ? id : 0;
1049 st->iter.btf = btf;
1050 st->iter.btf_id = btf_id;
1051 st->iter.state = BPF_ITER_STATE_ACTIVE;
1052 st->iter.depth = 0;
1053
1054 for (j = 0; j < BPF_REG_SIZE; j++)
1055 slot->slot_type[j] = STACK_ITER;
1056
1057 mark_stack_slot_scratched(env, spi - i);
1058 }
1059
1060 return 0;
1061 }
1062
unmark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1063 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1064 struct bpf_reg_state *reg, int nr_slots)
1065 {
1066 struct bpf_func_state *state = func(env, reg);
1067 int spi, i, j;
1068
1069 spi = iter_get_spi(env, reg, nr_slots);
1070 if (spi < 0)
1071 return spi;
1072
1073 for (i = 0; i < nr_slots; i++) {
1074 struct bpf_stack_state *slot = &state->stack[spi - i];
1075 struct bpf_reg_state *st = &slot->spilled_ptr;
1076
1077 if (i == 0)
1078 WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1079
1080 __mark_reg_not_init(env, st);
1081
1082 /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1083 st->live |= REG_LIVE_WRITTEN;
1084
1085 for (j = 0; j < BPF_REG_SIZE; j++)
1086 slot->slot_type[j] = STACK_INVALID;
1087
1088 mark_stack_slot_scratched(env, spi - i);
1089 }
1090
1091 return 0;
1092 }
1093
is_iter_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1094 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1095 struct bpf_reg_state *reg, int nr_slots)
1096 {
1097 struct bpf_func_state *state = func(env, reg);
1098 int spi, i, j;
1099
1100 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1101 * will do check_mem_access to check and update stack bounds later, so
1102 * return true for that case.
1103 */
1104 spi = iter_get_spi(env, reg, nr_slots);
1105 if (spi == -ERANGE)
1106 return true;
1107 if (spi < 0)
1108 return false;
1109
1110 for (i = 0; i < nr_slots; i++) {
1111 struct bpf_stack_state *slot = &state->stack[spi - i];
1112
1113 for (j = 0; j < BPF_REG_SIZE; j++)
1114 if (slot->slot_type[j] == STACK_ITER)
1115 return false;
1116 }
1117
1118 return true;
1119 }
1120
is_iter_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct btf * btf,u32 btf_id,int nr_slots)1121 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1122 struct btf *btf, u32 btf_id, int nr_slots)
1123 {
1124 struct bpf_func_state *state = func(env, reg);
1125 int spi, i, j;
1126
1127 spi = iter_get_spi(env, reg, nr_slots);
1128 if (spi < 0)
1129 return -EINVAL;
1130
1131 for (i = 0; i < nr_slots; i++) {
1132 struct bpf_stack_state *slot = &state->stack[spi - i];
1133 struct bpf_reg_state *st = &slot->spilled_ptr;
1134
1135 if (st->type & PTR_UNTRUSTED)
1136 return -EPROTO;
1137 /* only main (first) slot has ref_obj_id set */
1138 if (i == 0 && !st->ref_obj_id)
1139 return -EINVAL;
1140 if (i != 0 && st->ref_obj_id)
1141 return -EINVAL;
1142 if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1143 return -EINVAL;
1144
1145 for (j = 0; j < BPF_REG_SIZE; j++)
1146 if (slot->slot_type[j] != STACK_ITER)
1147 return -EINVAL;
1148 }
1149
1150 return 0;
1151 }
1152
1153 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1154 static int release_irq_state(struct bpf_verifier_state *state, int id);
1155
mark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,int kfunc_class)1156 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1157 struct bpf_kfunc_call_arg_meta *meta,
1158 struct bpf_reg_state *reg, int insn_idx,
1159 int kfunc_class)
1160 {
1161 struct bpf_func_state *state = func(env, reg);
1162 struct bpf_stack_state *slot;
1163 struct bpf_reg_state *st;
1164 int spi, i, id;
1165
1166 spi = irq_flag_get_spi(env, reg);
1167 if (spi < 0)
1168 return spi;
1169
1170 id = acquire_irq_state(env, insn_idx);
1171 if (id < 0)
1172 return id;
1173
1174 slot = &state->stack[spi];
1175 st = &slot->spilled_ptr;
1176
1177 __mark_reg_known_zero(st);
1178 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1179 st->live |= REG_LIVE_WRITTEN;
1180 st->ref_obj_id = id;
1181 st->irq.kfunc_class = kfunc_class;
1182
1183 for (i = 0; i < BPF_REG_SIZE; i++)
1184 slot->slot_type[i] = STACK_IRQ_FLAG;
1185
1186 mark_stack_slot_scratched(env, spi);
1187 return 0;
1188 }
1189
unmark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int kfunc_class)1190 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1191 int kfunc_class)
1192 {
1193 struct bpf_func_state *state = func(env, reg);
1194 struct bpf_stack_state *slot;
1195 struct bpf_reg_state *st;
1196 int spi, i, err;
1197
1198 spi = irq_flag_get_spi(env, reg);
1199 if (spi < 0)
1200 return spi;
1201
1202 slot = &state->stack[spi];
1203 st = &slot->spilled_ptr;
1204
1205 if (st->irq.kfunc_class != kfunc_class) {
1206 const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1207 const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1208
1209 verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1210 flag_kfunc, used_kfunc);
1211 return -EINVAL;
1212 }
1213
1214 err = release_irq_state(env->cur_state, st->ref_obj_id);
1215 WARN_ON_ONCE(err && err != -EACCES);
1216 if (err) {
1217 int insn_idx = 0;
1218
1219 for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1220 if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1221 insn_idx = env->cur_state->refs[i].insn_idx;
1222 break;
1223 }
1224 }
1225
1226 verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1227 env->cur_state->active_irq_id, insn_idx);
1228 return err;
1229 }
1230
1231 __mark_reg_not_init(env, st);
1232
1233 /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1234 st->live |= REG_LIVE_WRITTEN;
1235
1236 for (i = 0; i < BPF_REG_SIZE; i++)
1237 slot->slot_type[i] = STACK_INVALID;
1238
1239 mark_stack_slot_scratched(env, spi);
1240 return 0;
1241 }
1242
is_irq_flag_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1243 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1244 {
1245 struct bpf_func_state *state = func(env, reg);
1246 struct bpf_stack_state *slot;
1247 int spi, i;
1248
1249 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1250 * will do check_mem_access to check and update stack bounds later, so
1251 * return true for that case.
1252 */
1253 spi = irq_flag_get_spi(env, reg);
1254 if (spi == -ERANGE)
1255 return true;
1256 if (spi < 0)
1257 return false;
1258
1259 slot = &state->stack[spi];
1260
1261 for (i = 0; i < BPF_REG_SIZE; i++)
1262 if (slot->slot_type[i] == STACK_IRQ_FLAG)
1263 return false;
1264 return true;
1265 }
1266
is_irq_flag_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1267 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1268 {
1269 struct bpf_func_state *state = func(env, reg);
1270 struct bpf_stack_state *slot;
1271 struct bpf_reg_state *st;
1272 int spi, i;
1273
1274 spi = irq_flag_get_spi(env, reg);
1275 if (spi < 0)
1276 return -EINVAL;
1277
1278 slot = &state->stack[spi];
1279 st = &slot->spilled_ptr;
1280
1281 if (!st->ref_obj_id)
1282 return -EINVAL;
1283
1284 for (i = 0; i < BPF_REG_SIZE; i++)
1285 if (slot->slot_type[i] != STACK_IRQ_FLAG)
1286 return -EINVAL;
1287 return 0;
1288 }
1289
1290 /* Check if given stack slot is "special":
1291 * - spilled register state (STACK_SPILL);
1292 * - dynptr state (STACK_DYNPTR);
1293 * - iter state (STACK_ITER).
1294 * - irq flag state (STACK_IRQ_FLAG)
1295 */
is_stack_slot_special(const struct bpf_stack_state * stack)1296 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1297 {
1298 enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1299
1300 switch (type) {
1301 case STACK_SPILL:
1302 case STACK_DYNPTR:
1303 case STACK_ITER:
1304 case STACK_IRQ_FLAG:
1305 return true;
1306 case STACK_INVALID:
1307 case STACK_MISC:
1308 case STACK_ZERO:
1309 return false;
1310 default:
1311 WARN_ONCE(1, "unknown stack slot type %d\n", type);
1312 return true;
1313 }
1314 }
1315
1316 /* The reg state of a pointer or a bounded scalar was saved when
1317 * it was spilled to the stack.
1318 */
is_spilled_reg(const struct bpf_stack_state * stack)1319 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1320 {
1321 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1322 }
1323
is_spilled_scalar_reg(const struct bpf_stack_state * stack)1324 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1325 {
1326 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1327 stack->spilled_ptr.type == SCALAR_VALUE;
1328 }
1329
is_spilled_scalar_reg64(const struct bpf_stack_state * stack)1330 static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
1331 {
1332 return stack->slot_type[0] == STACK_SPILL &&
1333 stack->spilled_ptr.type == SCALAR_VALUE;
1334 }
1335
1336 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
1337 * case they are equivalent, or it's STACK_ZERO, in which case we preserve
1338 * more precise STACK_ZERO.
1339 * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1340 * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1341 * unnecessary as both are considered equivalent when loading data and pruning,
1342 * in case of unprivileged mode it will be incorrect to allow reads of invalid
1343 * slots.
1344 */
mark_stack_slot_misc(struct bpf_verifier_env * env,u8 * stype)1345 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1346 {
1347 if (*stype == STACK_ZERO)
1348 return;
1349 if (*stype == STACK_INVALID)
1350 return;
1351 *stype = STACK_MISC;
1352 }
1353
scrub_spilled_slot(u8 * stype)1354 static void scrub_spilled_slot(u8 *stype)
1355 {
1356 if (*stype != STACK_INVALID)
1357 *stype = STACK_MISC;
1358 }
1359
1360 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1361 * small to hold src. This is different from krealloc since we don't want to preserve
1362 * the contents of dst.
1363 *
1364 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1365 * not be allocated.
1366 */
copy_array(void * dst,const void * src,size_t n,size_t size,gfp_t flags)1367 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1368 {
1369 size_t alloc_bytes;
1370 void *orig = dst;
1371 size_t bytes;
1372
1373 if (ZERO_OR_NULL_PTR(src))
1374 goto out;
1375
1376 if (unlikely(check_mul_overflow(n, size, &bytes)))
1377 return NULL;
1378
1379 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1380 dst = krealloc(orig, alloc_bytes, flags);
1381 if (!dst) {
1382 kfree(orig);
1383 return NULL;
1384 }
1385
1386 memcpy(dst, src, bytes);
1387 out:
1388 return dst ? dst : ZERO_SIZE_PTR;
1389 }
1390
1391 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1392 * small to hold new_n items. new items are zeroed out if the array grows.
1393 *
1394 * Contrary to krealloc_array, does not free arr if new_n is zero.
1395 */
realloc_array(void * arr,size_t old_n,size_t new_n,size_t size)1396 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1397 {
1398 size_t alloc_size;
1399 void *new_arr;
1400
1401 if (!new_n || old_n == new_n)
1402 goto out;
1403
1404 alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1405 new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1406 if (!new_arr) {
1407 kfree(arr);
1408 return NULL;
1409 }
1410 arr = new_arr;
1411
1412 if (new_n > old_n)
1413 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1414
1415 out:
1416 return arr ? arr : ZERO_SIZE_PTR;
1417 }
1418
copy_reference_state(struct bpf_verifier_state * dst,const struct bpf_verifier_state * src)1419 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1420 {
1421 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1422 sizeof(struct bpf_reference_state), GFP_KERNEL);
1423 if (!dst->refs)
1424 return -ENOMEM;
1425
1426 dst->acquired_refs = src->acquired_refs;
1427 dst->active_locks = src->active_locks;
1428 dst->active_preempt_locks = src->active_preempt_locks;
1429 dst->active_rcu_lock = src->active_rcu_lock;
1430 dst->active_irq_id = src->active_irq_id;
1431 dst->active_lock_id = src->active_lock_id;
1432 dst->active_lock_ptr = src->active_lock_ptr;
1433 return 0;
1434 }
1435
copy_stack_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1436 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1437 {
1438 size_t n = src->allocated_stack / BPF_REG_SIZE;
1439
1440 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1441 GFP_KERNEL);
1442 if (!dst->stack)
1443 return -ENOMEM;
1444
1445 dst->allocated_stack = src->allocated_stack;
1446 return 0;
1447 }
1448
resize_reference_state(struct bpf_verifier_state * state,size_t n)1449 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1450 {
1451 state->refs = realloc_array(state->refs, state->acquired_refs, n,
1452 sizeof(struct bpf_reference_state));
1453 if (!state->refs)
1454 return -ENOMEM;
1455
1456 state->acquired_refs = n;
1457 return 0;
1458 }
1459
1460 /* Possibly update state->allocated_stack to be at least size bytes. Also
1461 * possibly update the function's high-water mark in its bpf_subprog_info.
1462 */
grow_stack_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int size)1463 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1464 {
1465 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1466
1467 /* The stack size is always a multiple of BPF_REG_SIZE. */
1468 size = round_up(size, BPF_REG_SIZE);
1469 n = size / BPF_REG_SIZE;
1470
1471 if (old_n >= n)
1472 return 0;
1473
1474 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1475 if (!state->stack)
1476 return -ENOMEM;
1477
1478 state->allocated_stack = size;
1479
1480 /* update known max for given subprogram */
1481 if (env->subprog_info[state->subprogno].stack_depth < size)
1482 env->subprog_info[state->subprogno].stack_depth = size;
1483
1484 return 0;
1485 }
1486
1487 /* Acquire a pointer id from the env and update the state->refs to include
1488 * this new pointer reference.
1489 * On success, returns a valid pointer id to associate with the register
1490 * On failure, returns a negative errno.
1491 */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)1492 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1493 {
1494 struct bpf_verifier_state *state = env->cur_state;
1495 int new_ofs = state->acquired_refs;
1496 int err;
1497
1498 err = resize_reference_state(state, state->acquired_refs + 1);
1499 if (err)
1500 return NULL;
1501 state->refs[new_ofs].insn_idx = insn_idx;
1502
1503 return &state->refs[new_ofs];
1504 }
1505
acquire_reference(struct bpf_verifier_env * env,int insn_idx)1506 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
1507 {
1508 struct bpf_reference_state *s;
1509
1510 s = acquire_reference_state(env, insn_idx);
1511 if (!s)
1512 return -ENOMEM;
1513 s->type = REF_TYPE_PTR;
1514 s->id = ++env->id_gen;
1515 return s->id;
1516 }
1517
acquire_lock_state(struct bpf_verifier_env * env,int insn_idx,enum ref_state_type type,int id,void * ptr)1518 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1519 int id, void *ptr)
1520 {
1521 struct bpf_verifier_state *state = env->cur_state;
1522 struct bpf_reference_state *s;
1523
1524 s = acquire_reference_state(env, insn_idx);
1525 if (!s)
1526 return -ENOMEM;
1527 s->type = type;
1528 s->id = id;
1529 s->ptr = ptr;
1530
1531 state->active_locks++;
1532 state->active_lock_id = id;
1533 state->active_lock_ptr = ptr;
1534 return 0;
1535 }
1536
acquire_irq_state(struct bpf_verifier_env * env,int insn_idx)1537 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1538 {
1539 struct bpf_verifier_state *state = env->cur_state;
1540 struct bpf_reference_state *s;
1541
1542 s = acquire_reference_state(env, insn_idx);
1543 if (!s)
1544 return -ENOMEM;
1545 s->type = REF_TYPE_IRQ;
1546 s->id = ++env->id_gen;
1547
1548 state->active_irq_id = s->id;
1549 return s->id;
1550 }
1551
release_reference_state(struct bpf_verifier_state * state,int idx)1552 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1553 {
1554 int last_idx;
1555 size_t rem;
1556
1557 /* IRQ state requires the relative ordering of elements remaining the
1558 * same, since it relies on the refs array to behave as a stack, so that
1559 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1560 * the array instead of swapping the final element into the deleted idx.
1561 */
1562 last_idx = state->acquired_refs - 1;
1563 rem = state->acquired_refs - idx - 1;
1564 if (last_idx && idx != last_idx)
1565 memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1566 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1567 state->acquired_refs--;
1568 return;
1569 }
1570
find_reference_state(struct bpf_verifier_state * state,int ptr_id)1571 static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
1572 {
1573 int i;
1574
1575 for (i = 0; i < state->acquired_refs; i++)
1576 if (state->refs[i].id == ptr_id)
1577 return true;
1578
1579 return false;
1580 }
1581
release_lock_state(struct bpf_verifier_state * state,int type,int id,void * ptr)1582 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1583 {
1584 void *prev_ptr = NULL;
1585 u32 prev_id = 0;
1586 int i;
1587
1588 for (i = 0; i < state->acquired_refs; i++) {
1589 if (state->refs[i].type == type && state->refs[i].id == id &&
1590 state->refs[i].ptr == ptr) {
1591 release_reference_state(state, i);
1592 state->active_locks--;
1593 /* Reassign active lock (id, ptr). */
1594 state->active_lock_id = prev_id;
1595 state->active_lock_ptr = prev_ptr;
1596 return 0;
1597 }
1598 if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1599 prev_id = state->refs[i].id;
1600 prev_ptr = state->refs[i].ptr;
1601 }
1602 }
1603 return -EINVAL;
1604 }
1605
release_irq_state(struct bpf_verifier_state * state,int id)1606 static int release_irq_state(struct bpf_verifier_state *state, int id)
1607 {
1608 u32 prev_id = 0;
1609 int i;
1610
1611 if (id != state->active_irq_id)
1612 return -EACCES;
1613
1614 for (i = 0; i < state->acquired_refs; i++) {
1615 if (state->refs[i].type != REF_TYPE_IRQ)
1616 continue;
1617 if (state->refs[i].id == id) {
1618 release_reference_state(state, i);
1619 state->active_irq_id = prev_id;
1620 return 0;
1621 } else {
1622 prev_id = state->refs[i].id;
1623 }
1624 }
1625 return -EINVAL;
1626 }
1627
find_lock_state(struct bpf_verifier_state * state,enum ref_state_type type,int id,void * ptr)1628 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1629 int id, void *ptr)
1630 {
1631 int i;
1632
1633 for (i = 0; i < state->acquired_refs; i++) {
1634 struct bpf_reference_state *s = &state->refs[i];
1635
1636 if (!(s->type & type))
1637 continue;
1638
1639 if (s->id == id && s->ptr == ptr)
1640 return s;
1641 }
1642 return NULL;
1643 }
1644
update_peak_states(struct bpf_verifier_env * env)1645 static void update_peak_states(struct bpf_verifier_env *env)
1646 {
1647 u32 cur_states;
1648
1649 cur_states = env->explored_states_size + env->free_list_size;
1650 env->peak_states = max(env->peak_states, cur_states);
1651 }
1652
free_func_state(struct bpf_func_state * state)1653 static void free_func_state(struct bpf_func_state *state)
1654 {
1655 if (!state)
1656 return;
1657 kfree(state->stack);
1658 kfree(state);
1659 }
1660
free_verifier_state(struct bpf_verifier_state * state,bool free_self)1661 static void free_verifier_state(struct bpf_verifier_state *state,
1662 bool free_self)
1663 {
1664 int i;
1665
1666 for (i = 0; i <= state->curframe; i++) {
1667 free_func_state(state->frame[i]);
1668 state->frame[i] = NULL;
1669 }
1670 kfree(state->refs);
1671 if (free_self)
1672 kfree(state);
1673 }
1674
1675 /* struct bpf_verifier_state->{parent,loop_entry} refer to states
1676 * that are in either of env->{expored_states,free_list}.
1677 * In both cases the state is contained in struct bpf_verifier_state_list.
1678 */
state_parent_as_list(struct bpf_verifier_state * st)1679 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
1680 {
1681 if (st->parent)
1682 return container_of(st->parent, struct bpf_verifier_state_list, state);
1683 return NULL;
1684 }
1685
state_loop_entry_as_list(struct bpf_verifier_state * st)1686 static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
1687 {
1688 if (st->loop_entry)
1689 return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
1690 return NULL;
1691 }
1692
1693 /* A state can be freed if it is no longer referenced:
1694 * - is in the env->free_list;
1695 * - has no children states;
1696 * - is not used as loop_entry.
1697 *
1698 * Freeing a state can make it's loop_entry free-able.
1699 */
maybe_free_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state_list * sl)1700 static void maybe_free_verifier_state(struct bpf_verifier_env *env,
1701 struct bpf_verifier_state_list *sl)
1702 {
1703 struct bpf_verifier_state_list *loop_entry_sl;
1704
1705 while (sl && sl->in_free_list &&
1706 sl->state.branches == 0 &&
1707 sl->state.used_as_loop_entry == 0) {
1708 loop_entry_sl = state_loop_entry_as_list(&sl->state);
1709 if (loop_entry_sl)
1710 loop_entry_sl->state.used_as_loop_entry--;
1711 list_del(&sl->node);
1712 free_verifier_state(&sl->state, false);
1713 kfree(sl);
1714 env->free_list_size--;
1715 sl = loop_entry_sl;
1716 }
1717 }
1718
1719 /* copy verifier state from src to dst growing dst stack space
1720 * when necessary to accommodate larger src stack
1721 */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1722 static int copy_func_state(struct bpf_func_state *dst,
1723 const struct bpf_func_state *src)
1724 {
1725 memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1726 return copy_stack_state(dst, src);
1727 }
1728
copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)1729 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1730 const struct bpf_verifier_state *src)
1731 {
1732 struct bpf_func_state *dst;
1733 int i, err;
1734
1735 /* if dst has more stack frames then src frame, free them, this is also
1736 * necessary in case of exceptional exits using bpf_throw.
1737 */
1738 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1739 free_func_state(dst_state->frame[i]);
1740 dst_state->frame[i] = NULL;
1741 }
1742 err = copy_reference_state(dst_state, src);
1743 if (err)
1744 return err;
1745 dst_state->speculative = src->speculative;
1746 dst_state->in_sleepable = src->in_sleepable;
1747 dst_state->curframe = src->curframe;
1748 dst_state->branches = src->branches;
1749 dst_state->parent = src->parent;
1750 dst_state->first_insn_idx = src->first_insn_idx;
1751 dst_state->last_insn_idx = src->last_insn_idx;
1752 dst_state->insn_hist_start = src->insn_hist_start;
1753 dst_state->insn_hist_end = src->insn_hist_end;
1754 dst_state->dfs_depth = src->dfs_depth;
1755 dst_state->callback_unroll_depth = src->callback_unroll_depth;
1756 dst_state->used_as_loop_entry = src->used_as_loop_entry;
1757 dst_state->may_goto_depth = src->may_goto_depth;
1758 dst_state->loop_entry = src->loop_entry;
1759 for (i = 0; i <= src->curframe; i++) {
1760 dst = dst_state->frame[i];
1761 if (!dst) {
1762 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1763 if (!dst)
1764 return -ENOMEM;
1765 dst_state->frame[i] = dst;
1766 }
1767 err = copy_func_state(dst, src->frame[i]);
1768 if (err)
1769 return err;
1770 }
1771 return 0;
1772 }
1773
state_htab_size(struct bpf_verifier_env * env)1774 static u32 state_htab_size(struct bpf_verifier_env *env)
1775 {
1776 return env->prog->len;
1777 }
1778
explored_state(struct bpf_verifier_env * env,int idx)1779 static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
1780 {
1781 struct bpf_verifier_state *cur = env->cur_state;
1782 struct bpf_func_state *state = cur->frame[cur->curframe];
1783
1784 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1785 }
1786
same_callsites(struct bpf_verifier_state * a,struct bpf_verifier_state * b)1787 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1788 {
1789 int fr;
1790
1791 if (a->curframe != b->curframe)
1792 return false;
1793
1794 for (fr = a->curframe; fr >= 0; fr--)
1795 if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1796 return false;
1797
1798 return true;
1799 }
1800
1801 /* Open coded iterators allow back-edges in the state graph in order to
1802 * check unbounded loops that iterators.
1803 *
1804 * In is_state_visited() it is necessary to know if explored states are
1805 * part of some loops in order to decide whether non-exact states
1806 * comparison could be used:
1807 * - non-exact states comparison establishes sub-state relation and uses
1808 * read and precision marks to do so, these marks are propagated from
1809 * children states and thus are not guaranteed to be final in a loop;
1810 * - exact states comparison just checks if current and explored states
1811 * are identical (and thus form a back-edge).
1812 *
1813 * Paper "A New Algorithm for Identifying Loops in Decompilation"
1814 * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1815 * algorithm for loop structure detection and gives an overview of
1816 * relevant terminology. It also has helpful illustrations.
1817 *
1818 * [1] https://api.semanticscholar.org/CorpusID:15784067
1819 *
1820 * We use a similar algorithm but because loop nested structure is
1821 * irrelevant for verifier ours is significantly simpler and resembles
1822 * strongly connected components algorithm from Sedgewick's textbook.
1823 *
1824 * Define topmost loop entry as a first node of the loop traversed in a
1825 * depth first search starting from initial state. The goal of the loop
1826 * tracking algorithm is to associate topmost loop entries with states
1827 * derived from these entries.
1828 *
1829 * For each step in the DFS states traversal algorithm needs to identify
1830 * the following situations:
1831 *
1832 * initial initial initial
1833 * | | |
1834 * V V V
1835 * ... ... .---------> hdr
1836 * | | | |
1837 * V V | V
1838 * cur .-> succ | .------...
1839 * | | | | | |
1840 * V | V | V V
1841 * succ '-- cur | ... ...
1842 * | | |
1843 * | V V
1844 * | succ <- cur
1845 * | |
1846 * | V
1847 * | ...
1848 * | |
1849 * '----'
1850 *
1851 * (A) successor state of cur (B) successor state of cur or it's entry
1852 * not yet traversed are in current DFS path, thus cur and succ
1853 * are members of the same outermost loop
1854 *
1855 * initial initial
1856 * | |
1857 * V V
1858 * ... ...
1859 * | |
1860 * V V
1861 * .------... .------...
1862 * | | | |
1863 * V V V V
1864 * .-> hdr ... ... ...
1865 * | | | | |
1866 * | V V V V
1867 * | succ <- cur succ <- cur
1868 * | | |
1869 * | V V
1870 * | ... ...
1871 * | | |
1872 * '----' exit
1873 *
1874 * (C) successor state of cur is a part of some loop but this loop
1875 * does not include cur or successor state is not in a loop at all.
1876 *
1877 * Algorithm could be described as the following python code:
1878 *
1879 * traversed = set() # Set of traversed nodes
1880 * entries = {} # Mapping from node to loop entry
1881 * depths = {} # Depth level assigned to graph node
1882 * path = set() # Current DFS path
1883 *
1884 * # Find outermost loop entry known for n
1885 * def get_loop_entry(n):
1886 * h = entries.get(n, None)
1887 * while h in entries:
1888 * h = entries[h]
1889 * return h
1890 *
1891 * # Update n's loop entry if h comes before n in current DFS path.
1892 * def update_loop_entry(n, h):
1893 * if h in path and depths[entries.get(n, n)] < depths[n]:
1894 * entries[n] = h1
1895 *
1896 * def dfs(n, depth):
1897 * traversed.add(n)
1898 * path.add(n)
1899 * depths[n] = depth
1900 * for succ in G.successors(n):
1901 * if succ not in traversed:
1902 * # Case A: explore succ and update cur's loop entry
1903 * # only if succ's entry is in current DFS path.
1904 * dfs(succ, depth + 1)
1905 * h = entries.get(succ, None)
1906 * update_loop_entry(n, h)
1907 * else:
1908 * # Case B or C depending on `h1 in path` check in update_loop_entry().
1909 * update_loop_entry(n, succ)
1910 * path.remove(n)
1911 *
1912 * To adapt this algorithm for use with verifier:
1913 * - use st->branch == 0 as a signal that DFS of succ had been finished
1914 * and cur's loop entry has to be updated (case A), handle this in
1915 * update_branch_counts();
1916 * - use st->branch > 0 as a signal that st is in the current DFS path;
1917 * - handle cases B and C in is_state_visited().
1918 */
get_loop_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1919 static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
1920 struct bpf_verifier_state *st)
1921 {
1922 struct bpf_verifier_state *topmost = st->loop_entry;
1923 u32 steps = 0;
1924
1925 while (topmost && topmost->loop_entry) {
1926 if (steps++ > st->dfs_depth) {
1927 WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n");
1928 verbose(env, "verifier bug: infinite loop in get_loop_entry()\n");
1929 return ERR_PTR(-EFAULT);
1930 }
1931 topmost = topmost->loop_entry;
1932 }
1933 return topmost;
1934 }
1935
update_loop_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,struct bpf_verifier_state * hdr)1936 static void update_loop_entry(struct bpf_verifier_env *env,
1937 struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
1938 {
1939 /* The hdr->branches check decides between cases B and C in
1940 * comment for get_loop_entry(). If hdr->branches == 0 then
1941 * head's topmost loop entry is not in current DFS path,
1942 * hence 'cur' and 'hdr' are not in the same loop and there is
1943 * no need to update cur->loop_entry.
1944 */
1945 if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
1946 if (cur->loop_entry) {
1947 cur->loop_entry->used_as_loop_entry--;
1948 maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
1949 }
1950 cur->loop_entry = hdr;
1951 hdr->used_as_loop_entry++;
1952 }
1953 }
1954
update_branch_counts(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1955 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1956 {
1957 struct bpf_verifier_state_list *sl = NULL, *parent_sl;
1958 struct bpf_verifier_state *parent;
1959
1960 while (st) {
1961 u32 br = --st->branches;
1962
1963 /* br == 0 signals that DFS exploration for 'st' is finished,
1964 * thus it is necessary to update parent's loop entry if it
1965 * turned out that st is a part of some loop.
1966 * This is a part of 'case A' in get_loop_entry() comment.
1967 */
1968 if (br == 0 && st->parent && st->loop_entry)
1969 update_loop_entry(env, st->parent, st->loop_entry);
1970
1971 /* WARN_ON(br > 1) technically makes sense here,
1972 * but see comment in push_stack(), hence:
1973 */
1974 WARN_ONCE((int)br < 0,
1975 "BUG update_branch_counts:branches_to_explore=%d\n",
1976 br);
1977 if (br)
1978 break;
1979 parent = st->parent;
1980 parent_sl = state_parent_as_list(st);
1981 if (sl)
1982 maybe_free_verifier_state(env, sl);
1983 st = parent;
1984 sl = parent_sl;
1985 }
1986 }
1987
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)1988 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1989 int *insn_idx, bool pop_log)
1990 {
1991 struct bpf_verifier_state *cur = env->cur_state;
1992 struct bpf_verifier_stack_elem *elem, *head = env->head;
1993 int err;
1994
1995 if (env->head == NULL)
1996 return -ENOENT;
1997
1998 if (cur) {
1999 err = copy_verifier_state(cur, &head->st);
2000 if (err)
2001 return err;
2002 }
2003 if (pop_log)
2004 bpf_vlog_reset(&env->log, head->log_pos);
2005 if (insn_idx)
2006 *insn_idx = head->insn_idx;
2007 if (prev_insn_idx)
2008 *prev_insn_idx = head->prev_insn_idx;
2009 elem = head->next;
2010 free_verifier_state(&head->st, false);
2011 kfree(head);
2012 env->head = elem;
2013 env->stack_size--;
2014 return 0;
2015 }
2016
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)2017 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
2018 int insn_idx, int prev_insn_idx,
2019 bool speculative)
2020 {
2021 struct bpf_verifier_state *cur = env->cur_state;
2022 struct bpf_verifier_stack_elem *elem;
2023 int err;
2024
2025 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2026 if (!elem)
2027 goto err;
2028
2029 elem->insn_idx = insn_idx;
2030 elem->prev_insn_idx = prev_insn_idx;
2031 elem->next = env->head;
2032 elem->log_pos = env->log.end_pos;
2033 env->head = elem;
2034 env->stack_size++;
2035 err = copy_verifier_state(&elem->st, cur);
2036 if (err)
2037 goto err;
2038 elem->st.speculative |= speculative;
2039 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2040 verbose(env, "The sequence of %d jumps is too complex.\n",
2041 env->stack_size);
2042 goto err;
2043 }
2044 if (elem->st.parent) {
2045 ++elem->st.parent->branches;
2046 /* WARN_ON(branches > 2) technically makes sense here,
2047 * but
2048 * 1. speculative states will bump 'branches' for non-branch
2049 * instructions
2050 * 2. is_state_visited() heuristics may decide not to create
2051 * a new state for a sequence of branches and all such current
2052 * and cloned states will be pointing to a single parent state
2053 * which might have large 'branches' count.
2054 */
2055 }
2056 return &elem->st;
2057 err:
2058 free_verifier_state(env->cur_state, true);
2059 env->cur_state = NULL;
2060 /* pop all elements and return */
2061 while (!pop_stack(env, NULL, NULL, false));
2062 return NULL;
2063 }
2064
2065 #define CALLER_SAVED_REGS 6
2066 static const int caller_saved[CALLER_SAVED_REGS] = {
2067 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
2068 };
2069
2070 /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)2071 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2072 {
2073 reg->var_off = tnum_const(imm);
2074 reg->smin_value = (s64)imm;
2075 reg->smax_value = (s64)imm;
2076 reg->umin_value = imm;
2077 reg->umax_value = imm;
2078
2079 reg->s32_min_value = (s32)imm;
2080 reg->s32_max_value = (s32)imm;
2081 reg->u32_min_value = (u32)imm;
2082 reg->u32_max_value = (u32)imm;
2083 }
2084
2085 /* Mark the unknown part of a register (variable offset or scalar value) as
2086 * known to have the value @imm.
2087 */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)2088 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2089 {
2090 /* Clear off and union(map_ptr, range) */
2091 memset(((u8 *)reg) + sizeof(reg->type), 0,
2092 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
2093 reg->id = 0;
2094 reg->ref_obj_id = 0;
2095 ___mark_reg_known(reg, imm);
2096 }
2097
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)2098 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
2099 {
2100 reg->var_off = tnum_const_subreg(reg->var_off, imm);
2101 reg->s32_min_value = (s32)imm;
2102 reg->s32_max_value = (s32)imm;
2103 reg->u32_min_value = (u32)imm;
2104 reg->u32_max_value = (u32)imm;
2105 }
2106
2107 /* Mark the 'variable offset' part of a register as zero. This should be
2108 * used only on registers holding a pointer type.
2109 */
__mark_reg_known_zero(struct bpf_reg_state * reg)2110 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
2111 {
2112 __mark_reg_known(reg, 0);
2113 }
2114
__mark_reg_const_zero(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2115 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2116 {
2117 __mark_reg_known(reg, 0);
2118 reg->type = SCALAR_VALUE;
2119 /* all scalars are assumed imprecise initially (unless unprivileged,
2120 * in which case everything is forced to be precise)
2121 */
2122 reg->precise = !env->bpf_capable;
2123 }
2124
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2125 static void mark_reg_known_zero(struct bpf_verifier_env *env,
2126 struct bpf_reg_state *regs, u32 regno)
2127 {
2128 if (WARN_ON(regno >= MAX_BPF_REG)) {
2129 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
2130 /* Something bad happened, let's kill all regs */
2131 for (regno = 0; regno < MAX_BPF_REG; regno++)
2132 __mark_reg_not_init(env, regs + regno);
2133 return;
2134 }
2135 __mark_reg_known_zero(regs + regno);
2136 }
2137
__mark_dynptr_reg(struct bpf_reg_state * reg,enum bpf_dynptr_type type,bool first_slot,int dynptr_id)2138 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
2139 bool first_slot, int dynptr_id)
2140 {
2141 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
2142 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2143 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2144 */
2145 __mark_reg_known_zero(reg);
2146 reg->type = CONST_PTR_TO_DYNPTR;
2147 /* Give each dynptr a unique id to uniquely associate slices to it. */
2148 reg->id = dynptr_id;
2149 reg->dynptr.type = type;
2150 reg->dynptr.first_slot = first_slot;
2151 }
2152
mark_ptr_not_null_reg(struct bpf_reg_state * reg)2153 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
2154 {
2155 if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
2156 const struct bpf_map *map = reg->map_ptr;
2157
2158 if (map->inner_map_meta) {
2159 reg->type = CONST_PTR_TO_MAP;
2160 reg->map_ptr = map->inner_map_meta;
2161 /* transfer reg's id which is unique for every map_lookup_elem
2162 * as UID of the inner map.
2163 */
2164 if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
2165 reg->map_uid = reg->id;
2166 if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE))
2167 reg->map_uid = reg->id;
2168 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
2169 reg->type = PTR_TO_XDP_SOCK;
2170 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
2171 map->map_type == BPF_MAP_TYPE_SOCKHASH) {
2172 reg->type = PTR_TO_SOCKET;
2173 } else {
2174 reg->type = PTR_TO_MAP_VALUE;
2175 }
2176 return;
2177 }
2178
2179 reg->type &= ~PTR_MAYBE_NULL;
2180 }
2181
mark_reg_graph_node(struct bpf_reg_state * regs,u32 regno,struct btf_field_graph_root * ds_head)2182 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
2183 struct btf_field_graph_root *ds_head)
2184 {
2185 __mark_reg_known_zero(®s[regno]);
2186 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
2187 regs[regno].btf = ds_head->btf;
2188 regs[regno].btf_id = ds_head->value_btf_id;
2189 regs[regno].off = ds_head->node_offset;
2190 }
2191
reg_is_pkt_pointer(const struct bpf_reg_state * reg)2192 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
2193 {
2194 return type_is_pkt_pointer(reg->type);
2195 }
2196
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)2197 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2198 {
2199 return reg_is_pkt_pointer(reg) ||
2200 reg->type == PTR_TO_PACKET_END;
2201 }
2202
reg_is_dynptr_slice_pkt(const struct bpf_reg_state * reg)2203 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2204 {
2205 return base_type(reg->type) == PTR_TO_MEM &&
2206 (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
2207 }
2208
2209 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)2210 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2211 enum bpf_reg_type which)
2212 {
2213 /* The register can already have a range from prior markings.
2214 * This is fine as long as it hasn't been advanced from its
2215 * origin.
2216 */
2217 return reg->type == which &&
2218 reg->id == 0 &&
2219 reg->off == 0 &&
2220 tnum_equals_const(reg->var_off, 0);
2221 }
2222
2223 /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)2224 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2225 {
2226 reg->smin_value = S64_MIN;
2227 reg->smax_value = S64_MAX;
2228 reg->umin_value = 0;
2229 reg->umax_value = U64_MAX;
2230
2231 reg->s32_min_value = S32_MIN;
2232 reg->s32_max_value = S32_MAX;
2233 reg->u32_min_value = 0;
2234 reg->u32_max_value = U32_MAX;
2235 }
2236
__mark_reg64_unbounded(struct bpf_reg_state * reg)2237 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2238 {
2239 reg->smin_value = S64_MIN;
2240 reg->smax_value = S64_MAX;
2241 reg->umin_value = 0;
2242 reg->umax_value = U64_MAX;
2243 }
2244
__mark_reg32_unbounded(struct bpf_reg_state * reg)2245 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2246 {
2247 reg->s32_min_value = S32_MIN;
2248 reg->s32_max_value = S32_MAX;
2249 reg->u32_min_value = 0;
2250 reg->u32_max_value = U32_MAX;
2251 }
2252
__update_reg32_bounds(struct bpf_reg_state * reg)2253 static void __update_reg32_bounds(struct bpf_reg_state *reg)
2254 {
2255 struct tnum var32_off = tnum_subreg(reg->var_off);
2256
2257 /* min signed is max(sign bit) | min(other bits) */
2258 reg->s32_min_value = max_t(s32, reg->s32_min_value,
2259 var32_off.value | (var32_off.mask & S32_MIN));
2260 /* max signed is min(sign bit) | max(other bits) */
2261 reg->s32_max_value = min_t(s32, reg->s32_max_value,
2262 var32_off.value | (var32_off.mask & S32_MAX));
2263 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2264 reg->u32_max_value = min(reg->u32_max_value,
2265 (u32)(var32_off.value | var32_off.mask));
2266 }
2267
__update_reg64_bounds(struct bpf_reg_state * reg)2268 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2269 {
2270 /* min signed is max(sign bit) | min(other bits) */
2271 reg->smin_value = max_t(s64, reg->smin_value,
2272 reg->var_off.value | (reg->var_off.mask & S64_MIN));
2273 /* max signed is min(sign bit) | max(other bits) */
2274 reg->smax_value = min_t(s64, reg->smax_value,
2275 reg->var_off.value | (reg->var_off.mask & S64_MAX));
2276 reg->umin_value = max(reg->umin_value, reg->var_off.value);
2277 reg->umax_value = min(reg->umax_value,
2278 reg->var_off.value | reg->var_off.mask);
2279 }
2280
__update_reg_bounds(struct bpf_reg_state * reg)2281 static void __update_reg_bounds(struct bpf_reg_state *reg)
2282 {
2283 __update_reg32_bounds(reg);
2284 __update_reg64_bounds(reg);
2285 }
2286
2287 /* Uses signed min/max values to inform unsigned, and vice-versa */
__reg32_deduce_bounds(struct bpf_reg_state * reg)2288 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2289 {
2290 /* If upper 32 bits of u64/s64 range don't change, we can use lower 32
2291 * bits to improve our u32/s32 boundaries.
2292 *
2293 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
2294 * u64) is pretty trivial, it's obvious that in u32 we'll also have
2295 * [10, 20] range. But this property holds for any 64-bit range as
2296 * long as upper 32 bits in that entire range of values stay the same.
2297 *
2298 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
2299 * in decimal) has the same upper 32 bits throughout all the values in
2300 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
2301 * range.
2302 *
2303 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
2304 * following the rules outlined below about u64/s64 correspondence
2305 * (which equally applies to u32 vs s32 correspondence). In general it
2306 * depends on actual hexadecimal values of 32-bit range. They can form
2307 * only valid u32, or only valid s32 ranges in some cases.
2308 *
2309 * So we use all these insights to derive bounds for subregisters here.
2310 */
2311 if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
2312 /* u64 to u32 casting preserves validity of low 32 bits as
2313 * a range, if upper 32 bits are the same
2314 */
2315 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2316 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2317
2318 if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2319 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2320 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2321 }
2322 }
2323 if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2324 /* low 32 bits should form a proper u32 range */
2325 if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2326 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2327 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2328 }
2329 /* low 32 bits should form a proper s32 range */
2330 if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2331 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2332 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2333 }
2334 }
2335 /* Special case where upper bits form a small sequence of two
2336 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2337 * 0x00000000 is also valid), while lower bits form a proper s32 range
2338 * going from negative numbers to positive numbers. E.g., let's say we
2339 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2340 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2341 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2342 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2343 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2344 * upper 32 bits. As a random example, s64 range
2345 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2346 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2347 */
2348 if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2349 (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2350 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2351 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2352 }
2353 if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2354 (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2355 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2356 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2357 }
2358 /* if u32 range forms a valid s32 range (due to matching sign bit),
2359 * try to learn from that
2360 */
2361 if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2362 reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2363 reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2364 }
2365 /* If we cannot cross the sign boundary, then signed and unsigned bounds
2366 * are the same, so combine. This works even in the negative case, e.g.
2367 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2368 */
2369 if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2370 reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2371 reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2372 }
2373 }
2374
__reg64_deduce_bounds(struct bpf_reg_state * reg)2375 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2376 {
2377 /* If u64 range forms a valid s64 range (due to matching sign bit),
2378 * try to learn from that. Let's do a bit of ASCII art to see when
2379 * this is happening. Let's take u64 range first:
2380 *
2381 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2382 * |-------------------------------|--------------------------------|
2383 *
2384 * Valid u64 range is formed when umin and umax are anywhere in the
2385 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2386 * straightforward. Let's see how s64 range maps onto the same range
2387 * of values, annotated below the line for comparison:
2388 *
2389 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2390 * |-------------------------------|--------------------------------|
2391 * 0 S64_MAX S64_MIN -1
2392 *
2393 * So s64 values basically start in the middle and they are logically
2394 * contiguous to the right of it, wrapping around from -1 to 0, and
2395 * then finishing as S64_MAX (0x7fffffffffffffff) right before
2396 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2397 * more visually as mapped to sign-agnostic range of hex values.
2398 *
2399 * u64 start u64 end
2400 * _______________________________________________________________
2401 * / \
2402 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2403 * |-------------------------------|--------------------------------|
2404 * 0 S64_MAX S64_MIN -1
2405 * / \
2406 * >------------------------------ ------------------------------->
2407 * s64 continues... s64 end s64 start s64 "midpoint"
2408 *
2409 * What this means is that, in general, we can't always derive
2410 * something new about u64 from any random s64 range, and vice versa.
2411 *
2412 * But we can do that in two particular cases. One is when entire
2413 * u64/s64 range is *entirely* contained within left half of the above
2414 * diagram or when it is *entirely* contained in the right half. I.e.:
2415 *
2416 * |-------------------------------|--------------------------------|
2417 * ^ ^ ^ ^
2418 * A B C D
2419 *
2420 * [A, B] and [C, D] are contained entirely in their respective halves
2421 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2422 * will be non-negative both as u64 and s64 (and in fact it will be
2423 * identical ranges no matter the signedness). [C, D] treated as s64
2424 * will be a range of negative values, while in u64 it will be
2425 * non-negative range of values larger than 0x8000000000000000.
2426 *
2427 * Now, any other range here can't be represented in both u64 and s64
2428 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2429 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2430 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2431 * for example. Similarly, valid s64 range [D, A] (going from negative
2432 * to positive values), would be two separate [D, U64_MAX] and [0, A]
2433 * ranges as u64. Currently reg_state can't represent two segments per
2434 * numeric domain, so in such situations we can only derive maximal
2435 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2436 *
2437 * So we use these facts to derive umin/umax from smin/smax and vice
2438 * versa only if they stay within the same "half". This is equivalent
2439 * to checking sign bit: lower half will have sign bit as zero, upper
2440 * half have sign bit 1. Below in code we simplify this by just
2441 * casting umin/umax as smin/smax and checking if they form valid
2442 * range, and vice versa. Those are equivalent checks.
2443 */
2444 if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2445 reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2446 reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2447 }
2448 /* If we cannot cross the sign boundary, then signed and unsigned bounds
2449 * are the same, so combine. This works even in the negative case, e.g.
2450 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2451 */
2452 if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2453 reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2454 reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2455 }
2456 }
2457
__reg_deduce_mixed_bounds(struct bpf_reg_state * reg)2458 static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
2459 {
2460 /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2461 * values on both sides of 64-bit range in hope to have tighter range.
2462 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2463 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2464 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2465 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2466 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2467 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2468 * We just need to make sure that derived bounds we are intersecting
2469 * with are well-formed ranges in respective s64 or u64 domain, just
2470 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2471 */
2472 __u64 new_umin, new_umax;
2473 __s64 new_smin, new_smax;
2474
2475 /* u32 -> u64 tightening, it's always well-formed */
2476 new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2477 new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2478 reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2479 reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2480 /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2481 new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2482 new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2483 reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2484 reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2485
2486 /* if s32 can be treated as valid u32 range, we can use it as well */
2487 if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2488 /* s32 -> u64 tightening */
2489 new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2490 new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2491 reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2492 reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2493 /* s32 -> s64 tightening */
2494 new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
2495 new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
2496 reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2497 reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2498 }
2499
2500 /* Here we would like to handle a special case after sign extending load,
2501 * when upper bits for a 64-bit range are all 1s or all 0s.
2502 *
2503 * Upper bits are all 1s when register is in a range:
2504 * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
2505 * Upper bits are all 0s when register is in a range:
2506 * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
2507 * Together this forms are continuous range:
2508 * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
2509 *
2510 * Now, suppose that register range is in fact tighter:
2511 * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
2512 * Also suppose that it's 32-bit range is positive,
2513 * meaning that lower 32-bits of the full 64-bit register
2514 * are in the range:
2515 * [0x0000_0000, 0x7fff_ffff] (W)
2516 *
2517 * If this happens, then any value in a range:
2518 * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
2519 * is smaller than a lowest bound of the range (R):
2520 * 0xffff_ffff_8000_0000
2521 * which means that upper bits of the full 64-bit register
2522 * can't be all 1s, when lower bits are in range (W).
2523 *
2524 * Note that:
2525 * - 0xffff_ffff_8000_0000 == (s64)S32_MIN
2526 * - 0x0000_0000_7fff_ffff == (s64)S32_MAX
2527 * These relations are used in the conditions below.
2528 */
2529 if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
2530 reg->smin_value = reg->s32_min_value;
2531 reg->smax_value = reg->s32_max_value;
2532 reg->umin_value = reg->s32_min_value;
2533 reg->umax_value = reg->s32_max_value;
2534 reg->var_off = tnum_intersect(reg->var_off,
2535 tnum_range(reg->smin_value, reg->smax_value));
2536 }
2537 }
2538
__reg_deduce_bounds(struct bpf_reg_state * reg)2539 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2540 {
2541 __reg32_deduce_bounds(reg);
2542 __reg64_deduce_bounds(reg);
2543 __reg_deduce_mixed_bounds(reg);
2544 }
2545
2546 /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)2547 static void __reg_bound_offset(struct bpf_reg_state *reg)
2548 {
2549 struct tnum var64_off = tnum_intersect(reg->var_off,
2550 tnum_range(reg->umin_value,
2551 reg->umax_value));
2552 struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2553 tnum_range(reg->u32_min_value,
2554 reg->u32_max_value));
2555
2556 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2557 }
2558
reg_bounds_sync(struct bpf_reg_state * reg)2559 static void reg_bounds_sync(struct bpf_reg_state *reg)
2560 {
2561 /* We might have learned new bounds from the var_off. */
2562 __update_reg_bounds(reg);
2563 /* We might have learned something about the sign bit. */
2564 __reg_deduce_bounds(reg);
2565 __reg_deduce_bounds(reg);
2566 /* We might have learned some bits from the bounds. */
2567 __reg_bound_offset(reg);
2568 /* Intersecting with the old var_off might have improved our bounds
2569 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2570 * then new var_off is (0; 0x7f...fc) which improves our umax.
2571 */
2572 __update_reg_bounds(reg);
2573 }
2574
reg_bounds_sanity_check(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * ctx)2575 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2576 struct bpf_reg_state *reg, const char *ctx)
2577 {
2578 const char *msg;
2579
2580 if (reg->umin_value > reg->umax_value ||
2581 reg->smin_value > reg->smax_value ||
2582 reg->u32_min_value > reg->u32_max_value ||
2583 reg->s32_min_value > reg->s32_max_value) {
2584 msg = "range bounds violation";
2585 goto out;
2586 }
2587
2588 if (tnum_is_const(reg->var_off)) {
2589 u64 uval = reg->var_off.value;
2590 s64 sval = (s64)uval;
2591
2592 if (reg->umin_value != uval || reg->umax_value != uval ||
2593 reg->smin_value != sval || reg->smax_value != sval) {
2594 msg = "const tnum out of sync with range bounds";
2595 goto out;
2596 }
2597 }
2598
2599 if (tnum_subreg_is_const(reg->var_off)) {
2600 u32 uval32 = tnum_subreg(reg->var_off).value;
2601 s32 sval32 = (s32)uval32;
2602
2603 if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2604 reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
2605 msg = "const subreg tnum out of sync with range bounds";
2606 goto out;
2607 }
2608 }
2609
2610 return 0;
2611 out:
2612 verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2613 "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
2614 ctx, msg, reg->umin_value, reg->umax_value,
2615 reg->smin_value, reg->smax_value,
2616 reg->u32_min_value, reg->u32_max_value,
2617 reg->s32_min_value, reg->s32_max_value,
2618 reg->var_off.value, reg->var_off.mask);
2619 if (env->test_reg_invariants)
2620 return -EFAULT;
2621 __mark_reg_unbounded(reg);
2622 return 0;
2623 }
2624
__reg32_bound_s64(s32 a)2625 static bool __reg32_bound_s64(s32 a)
2626 {
2627 return a >= 0 && a <= S32_MAX;
2628 }
2629
__reg_assign_32_into_64(struct bpf_reg_state * reg)2630 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2631 {
2632 reg->umin_value = reg->u32_min_value;
2633 reg->umax_value = reg->u32_max_value;
2634
2635 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2636 * be positive otherwise set to worse case bounds and refine later
2637 * from tnum.
2638 */
2639 if (__reg32_bound_s64(reg->s32_min_value) &&
2640 __reg32_bound_s64(reg->s32_max_value)) {
2641 reg->smin_value = reg->s32_min_value;
2642 reg->smax_value = reg->s32_max_value;
2643 } else {
2644 reg->smin_value = 0;
2645 reg->smax_value = U32_MAX;
2646 }
2647 }
2648
2649 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown_imprecise(struct bpf_reg_state * reg)2650 static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2651 {
2652 /*
2653 * Clear type, off, and union(map_ptr, range) and
2654 * padding between 'type' and union
2655 */
2656 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2657 reg->type = SCALAR_VALUE;
2658 reg->id = 0;
2659 reg->ref_obj_id = 0;
2660 reg->var_off = tnum_unknown;
2661 reg->frameno = 0;
2662 reg->precise = false;
2663 __mark_reg_unbounded(reg);
2664 }
2665
2666 /* Mark a register as having a completely unknown (scalar) value,
2667 * initialize .precise as true when not bpf capable.
2668 */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2669 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2670 struct bpf_reg_state *reg)
2671 {
2672 __mark_reg_unknown_imprecise(reg);
2673 reg->precise = !env->bpf_capable;
2674 }
2675
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2676 static void mark_reg_unknown(struct bpf_verifier_env *env,
2677 struct bpf_reg_state *regs, u32 regno)
2678 {
2679 if (WARN_ON(regno >= MAX_BPF_REG)) {
2680 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2681 /* Something bad happened, let's kill all regs except FP */
2682 for (regno = 0; regno < BPF_REG_FP; regno++)
2683 __mark_reg_not_init(env, regs + regno);
2684 return;
2685 }
2686 __mark_reg_unknown(env, regs + regno);
2687 }
2688
__mark_reg_s32_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,s32 s32_min,s32 s32_max)2689 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2690 struct bpf_reg_state *regs,
2691 u32 regno,
2692 s32 s32_min,
2693 s32 s32_max)
2694 {
2695 struct bpf_reg_state *reg = regs + regno;
2696
2697 reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2698 reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2699
2700 reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2701 reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2702
2703 reg_bounds_sync(reg);
2704
2705 return reg_bounds_sanity_check(env, reg, "s32_range");
2706 }
2707
__mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2708 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2709 struct bpf_reg_state *reg)
2710 {
2711 __mark_reg_unknown(env, reg);
2712 reg->type = NOT_INIT;
2713 }
2714
mark_reg_not_init(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2715 static void mark_reg_not_init(struct bpf_verifier_env *env,
2716 struct bpf_reg_state *regs, u32 regno)
2717 {
2718 if (WARN_ON(regno >= MAX_BPF_REG)) {
2719 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2720 /* Something bad happened, let's kill all regs except FP */
2721 for (regno = 0; regno < BPF_REG_FP; regno++)
2722 __mark_reg_not_init(env, regs + regno);
2723 return;
2724 }
2725 __mark_reg_not_init(env, regs + regno);
2726 }
2727
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,struct btf * btf,u32 btf_id,enum bpf_type_flag flag)2728 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2729 struct bpf_reg_state *regs, u32 regno,
2730 enum bpf_reg_type reg_type,
2731 struct btf *btf, u32 btf_id,
2732 enum bpf_type_flag flag)
2733 {
2734 if (reg_type == SCALAR_VALUE) {
2735 mark_reg_unknown(env, regs, regno);
2736 return;
2737 }
2738 mark_reg_known_zero(env, regs, regno);
2739 regs[regno].type = PTR_TO_BTF_ID | flag;
2740 regs[regno].btf = btf;
2741 regs[regno].btf_id = btf_id;
2742 if (type_may_be_null(flag))
2743 regs[regno].id = ++env->id_gen;
2744 }
2745
2746 #define DEF_NOT_SUBREG (0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)2747 static void init_reg_state(struct bpf_verifier_env *env,
2748 struct bpf_func_state *state)
2749 {
2750 struct bpf_reg_state *regs = state->regs;
2751 int i;
2752
2753 for (i = 0; i < MAX_BPF_REG; i++) {
2754 mark_reg_not_init(env, regs, i);
2755 regs[i].live = REG_LIVE_NONE;
2756 regs[i].parent = NULL;
2757 regs[i].subreg_def = DEF_NOT_SUBREG;
2758 }
2759
2760 /* frame pointer */
2761 regs[BPF_REG_FP].type = PTR_TO_STACK;
2762 mark_reg_known_zero(env, regs, BPF_REG_FP);
2763 regs[BPF_REG_FP].frameno = state->frameno;
2764 }
2765
retval_range(s32 minval,s32 maxval)2766 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2767 {
2768 return (struct bpf_retval_range){ minval, maxval };
2769 }
2770
2771 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)2772 static void init_func_state(struct bpf_verifier_env *env,
2773 struct bpf_func_state *state,
2774 int callsite, int frameno, int subprogno)
2775 {
2776 state->callsite = callsite;
2777 state->frameno = frameno;
2778 state->subprogno = subprogno;
2779 state->callback_ret_range = retval_range(0, 0);
2780 init_reg_state(env, state);
2781 mark_verifier_state_scratched(env);
2782 }
2783
2784 /* Similar to push_stack(), but for async callbacks */
push_async_cb(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,int subprog,bool is_sleepable)2785 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2786 int insn_idx, int prev_insn_idx,
2787 int subprog, bool is_sleepable)
2788 {
2789 struct bpf_verifier_stack_elem *elem;
2790 struct bpf_func_state *frame;
2791
2792 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2793 if (!elem)
2794 goto err;
2795
2796 elem->insn_idx = insn_idx;
2797 elem->prev_insn_idx = prev_insn_idx;
2798 elem->next = env->head;
2799 elem->log_pos = env->log.end_pos;
2800 env->head = elem;
2801 env->stack_size++;
2802 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2803 verbose(env,
2804 "The sequence of %d jumps is too complex for async cb.\n",
2805 env->stack_size);
2806 goto err;
2807 }
2808 /* Unlike push_stack() do not copy_verifier_state().
2809 * The caller state doesn't matter.
2810 * This is async callback. It starts in a fresh stack.
2811 * Initialize it similar to do_check_common().
2812 * But we do need to make sure to not clobber insn_hist, so we keep
2813 * chaining insn_hist_start/insn_hist_end indices as for a normal
2814 * child state.
2815 */
2816 elem->st.branches = 1;
2817 elem->st.in_sleepable = is_sleepable;
2818 elem->st.insn_hist_start = env->cur_state->insn_hist_end;
2819 elem->st.insn_hist_end = elem->st.insn_hist_start;
2820 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2821 if (!frame)
2822 goto err;
2823 init_func_state(env, frame,
2824 BPF_MAIN_FUNC /* callsite */,
2825 0 /* frameno within this callchain */,
2826 subprog /* subprog number within this prog */);
2827 elem->st.frame[0] = frame;
2828 return &elem->st;
2829 err:
2830 free_verifier_state(env->cur_state, true);
2831 env->cur_state = NULL;
2832 /* pop all elements and return */
2833 while (!pop_stack(env, NULL, NULL, false));
2834 return NULL;
2835 }
2836
2837
2838 enum reg_arg_type {
2839 SRC_OP, /* register is used as source operand */
2840 DST_OP, /* register is used as destination operand */
2841 DST_OP_NO_MARK /* same as above, check only, don't mark */
2842 };
2843
cmp_subprogs(const void * a,const void * b)2844 static int cmp_subprogs(const void *a, const void *b)
2845 {
2846 return ((struct bpf_subprog_info *)a)->start -
2847 ((struct bpf_subprog_info *)b)->start;
2848 }
2849
2850 /* Find subprogram that contains instruction at 'off' */
find_containing_subprog(struct bpf_verifier_env * env,int off)2851 static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off)
2852 {
2853 struct bpf_subprog_info *vals = env->subprog_info;
2854 int l, r, m;
2855
2856 if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2857 return NULL;
2858
2859 l = 0;
2860 r = env->subprog_cnt - 1;
2861 while (l < r) {
2862 m = l + (r - l + 1) / 2;
2863 if (vals[m].start <= off)
2864 l = m;
2865 else
2866 r = m - 1;
2867 }
2868 return &vals[l];
2869 }
2870
2871 /* Find subprogram that starts exactly at 'off' */
find_subprog(struct bpf_verifier_env * env,int off)2872 static int find_subprog(struct bpf_verifier_env *env, int off)
2873 {
2874 struct bpf_subprog_info *p;
2875
2876 p = find_containing_subprog(env, off);
2877 if (!p || p->start != off)
2878 return -ENOENT;
2879 return p - env->subprog_info;
2880 }
2881
add_subprog(struct bpf_verifier_env * env,int off)2882 static int add_subprog(struct bpf_verifier_env *env, int off)
2883 {
2884 int insn_cnt = env->prog->len;
2885 int ret;
2886
2887 if (off >= insn_cnt || off < 0) {
2888 verbose(env, "call to invalid destination\n");
2889 return -EINVAL;
2890 }
2891 ret = find_subprog(env, off);
2892 if (ret >= 0)
2893 return ret;
2894 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2895 verbose(env, "too many subprograms\n");
2896 return -E2BIG;
2897 }
2898 /* determine subprog starts. The end is one before the next starts */
2899 env->subprog_info[env->subprog_cnt++].start = off;
2900 sort(env->subprog_info, env->subprog_cnt,
2901 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2902 return env->subprog_cnt - 1;
2903 }
2904
bpf_find_exception_callback_insn_off(struct bpf_verifier_env * env)2905 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2906 {
2907 struct bpf_prog_aux *aux = env->prog->aux;
2908 struct btf *btf = aux->btf;
2909 const struct btf_type *t;
2910 u32 main_btf_id, id;
2911 const char *name;
2912 int ret, i;
2913
2914 /* Non-zero func_info_cnt implies valid btf */
2915 if (!aux->func_info_cnt)
2916 return 0;
2917 main_btf_id = aux->func_info[0].type_id;
2918
2919 t = btf_type_by_id(btf, main_btf_id);
2920 if (!t) {
2921 verbose(env, "invalid btf id for main subprog in func_info\n");
2922 return -EINVAL;
2923 }
2924
2925 name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2926 if (IS_ERR(name)) {
2927 ret = PTR_ERR(name);
2928 /* If there is no tag present, there is no exception callback */
2929 if (ret == -ENOENT)
2930 ret = 0;
2931 else if (ret == -EEXIST)
2932 verbose(env, "multiple exception callback tags for main subprog\n");
2933 return ret;
2934 }
2935
2936 ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2937 if (ret < 0) {
2938 verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2939 return ret;
2940 }
2941 id = ret;
2942 t = btf_type_by_id(btf, id);
2943 if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2944 verbose(env, "exception callback '%s' must have global linkage\n", name);
2945 return -EINVAL;
2946 }
2947 ret = 0;
2948 for (i = 0; i < aux->func_info_cnt; i++) {
2949 if (aux->func_info[i].type_id != id)
2950 continue;
2951 ret = aux->func_info[i].insn_off;
2952 /* Further func_info and subprog checks will also happen
2953 * later, so assume this is the right insn_off for now.
2954 */
2955 if (!ret) {
2956 verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2957 ret = -EINVAL;
2958 }
2959 }
2960 if (!ret) {
2961 verbose(env, "exception callback type id not found in func_info\n");
2962 ret = -EINVAL;
2963 }
2964 return ret;
2965 }
2966
2967 #define MAX_KFUNC_DESCS 256
2968 #define MAX_KFUNC_BTFS 256
2969
2970 struct bpf_kfunc_desc {
2971 struct btf_func_model func_model;
2972 u32 func_id;
2973 s32 imm;
2974 u16 offset;
2975 unsigned long addr;
2976 };
2977
2978 struct bpf_kfunc_btf {
2979 struct btf *btf;
2980 struct module *module;
2981 u16 offset;
2982 };
2983
2984 struct bpf_kfunc_desc_tab {
2985 /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2986 * verification. JITs do lookups by bpf_insn, where func_id may not be
2987 * available, therefore at the end of verification do_misc_fixups()
2988 * sorts this by imm and offset.
2989 */
2990 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2991 u32 nr_descs;
2992 };
2993
2994 struct bpf_kfunc_btf_tab {
2995 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2996 u32 nr_descs;
2997 };
2998
kfunc_desc_cmp_by_id_off(const void * a,const void * b)2999 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
3000 {
3001 const struct bpf_kfunc_desc *d0 = a;
3002 const struct bpf_kfunc_desc *d1 = b;
3003
3004 /* func_id is not greater than BTF_MAX_TYPE */
3005 return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
3006 }
3007
kfunc_btf_cmp_by_off(const void * a,const void * b)3008 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
3009 {
3010 const struct bpf_kfunc_btf *d0 = a;
3011 const struct bpf_kfunc_btf *d1 = b;
3012
3013 return d0->offset - d1->offset;
3014 }
3015
3016 static const struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog * prog,u32 func_id,u16 offset)3017 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
3018 {
3019 struct bpf_kfunc_desc desc = {
3020 .func_id = func_id,
3021 .offset = offset,
3022 };
3023 struct bpf_kfunc_desc_tab *tab;
3024
3025 tab = prog->aux->kfunc_tab;
3026 return bsearch(&desc, tab->descs, tab->nr_descs,
3027 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
3028 }
3029
bpf_get_kfunc_addr(const struct bpf_prog * prog,u32 func_id,u16 btf_fd_idx,u8 ** func_addr)3030 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
3031 u16 btf_fd_idx, u8 **func_addr)
3032 {
3033 const struct bpf_kfunc_desc *desc;
3034
3035 desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
3036 if (!desc)
3037 return -EFAULT;
3038
3039 *func_addr = (u8 *)desc->addr;
3040 return 0;
3041 }
3042
__find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)3043 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
3044 s16 offset)
3045 {
3046 struct bpf_kfunc_btf kf_btf = { .offset = offset };
3047 struct bpf_kfunc_btf_tab *tab;
3048 struct bpf_kfunc_btf *b;
3049 struct module *mod;
3050 struct btf *btf;
3051 int btf_fd;
3052
3053 tab = env->prog->aux->kfunc_btf_tab;
3054 b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
3055 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
3056 if (!b) {
3057 if (tab->nr_descs == MAX_KFUNC_BTFS) {
3058 verbose(env, "too many different module BTFs\n");
3059 return ERR_PTR(-E2BIG);
3060 }
3061
3062 if (bpfptr_is_null(env->fd_array)) {
3063 verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
3064 return ERR_PTR(-EPROTO);
3065 }
3066
3067 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
3068 offset * sizeof(btf_fd),
3069 sizeof(btf_fd)))
3070 return ERR_PTR(-EFAULT);
3071
3072 btf = btf_get_by_fd(btf_fd);
3073 if (IS_ERR(btf)) {
3074 verbose(env, "invalid module BTF fd specified\n");
3075 return btf;
3076 }
3077
3078 if (!btf_is_module(btf)) {
3079 verbose(env, "BTF fd for kfunc is not a module BTF\n");
3080 btf_put(btf);
3081 return ERR_PTR(-EINVAL);
3082 }
3083
3084 mod = btf_try_get_module(btf);
3085 if (!mod) {
3086 btf_put(btf);
3087 return ERR_PTR(-ENXIO);
3088 }
3089
3090 b = &tab->descs[tab->nr_descs++];
3091 b->btf = btf;
3092 b->module = mod;
3093 b->offset = offset;
3094
3095 /* sort() reorders entries by value, so b may no longer point
3096 * to the right entry after this
3097 */
3098 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3099 kfunc_btf_cmp_by_off, NULL);
3100 } else {
3101 btf = b->btf;
3102 }
3103
3104 return btf;
3105 }
3106
bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab * tab)3107 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
3108 {
3109 if (!tab)
3110 return;
3111
3112 while (tab->nr_descs--) {
3113 module_put(tab->descs[tab->nr_descs].module);
3114 btf_put(tab->descs[tab->nr_descs].btf);
3115 }
3116 kfree(tab);
3117 }
3118
find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)3119 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
3120 {
3121 if (offset) {
3122 if (offset < 0) {
3123 /* In the future, this can be allowed to increase limit
3124 * of fd index into fd_array, interpreted as u16.
3125 */
3126 verbose(env, "negative offset disallowed for kernel module function call\n");
3127 return ERR_PTR(-EINVAL);
3128 }
3129
3130 return __find_kfunc_desc_btf(env, offset);
3131 }
3132 return btf_vmlinux ?: ERR_PTR(-ENOENT);
3133 }
3134
add_kfunc_call(struct bpf_verifier_env * env,u32 func_id,s16 offset)3135 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
3136 {
3137 const struct btf_type *func, *func_proto;
3138 struct bpf_kfunc_btf_tab *btf_tab;
3139 struct bpf_kfunc_desc_tab *tab;
3140 struct bpf_prog_aux *prog_aux;
3141 struct bpf_kfunc_desc *desc;
3142 const char *func_name;
3143 struct btf *desc_btf;
3144 unsigned long call_imm;
3145 unsigned long addr;
3146 int err;
3147
3148 prog_aux = env->prog->aux;
3149 tab = prog_aux->kfunc_tab;
3150 btf_tab = prog_aux->kfunc_btf_tab;
3151 if (!tab) {
3152 if (!btf_vmlinux) {
3153 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
3154 return -ENOTSUPP;
3155 }
3156
3157 if (!env->prog->jit_requested) {
3158 verbose(env, "JIT is required for calling kernel function\n");
3159 return -ENOTSUPP;
3160 }
3161
3162 if (!bpf_jit_supports_kfunc_call()) {
3163 verbose(env, "JIT does not support calling kernel function\n");
3164 return -ENOTSUPP;
3165 }
3166
3167 if (!env->prog->gpl_compatible) {
3168 verbose(env, "cannot call kernel function from non-GPL compatible program\n");
3169 return -EINVAL;
3170 }
3171
3172 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
3173 if (!tab)
3174 return -ENOMEM;
3175 prog_aux->kfunc_tab = tab;
3176 }
3177
3178 /* func_id == 0 is always invalid, but instead of returning an error, be
3179 * conservative and wait until the code elimination pass before returning
3180 * error, so that invalid calls that get pruned out can be in BPF programs
3181 * loaded from userspace. It is also required that offset be untouched
3182 * for such calls.
3183 */
3184 if (!func_id && !offset)
3185 return 0;
3186
3187 if (!btf_tab && offset) {
3188 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
3189 if (!btf_tab)
3190 return -ENOMEM;
3191 prog_aux->kfunc_btf_tab = btf_tab;
3192 }
3193
3194 desc_btf = find_kfunc_desc_btf(env, offset);
3195 if (IS_ERR(desc_btf)) {
3196 verbose(env, "failed to find BTF for kernel function\n");
3197 return PTR_ERR(desc_btf);
3198 }
3199
3200 if (find_kfunc_desc(env->prog, func_id, offset))
3201 return 0;
3202
3203 if (tab->nr_descs == MAX_KFUNC_DESCS) {
3204 verbose(env, "too many different kernel function calls\n");
3205 return -E2BIG;
3206 }
3207
3208 func = btf_type_by_id(desc_btf, func_id);
3209 if (!func || !btf_type_is_func(func)) {
3210 verbose(env, "kernel btf_id %u is not a function\n",
3211 func_id);
3212 return -EINVAL;
3213 }
3214 func_proto = btf_type_by_id(desc_btf, func->type);
3215 if (!func_proto || !btf_type_is_func_proto(func_proto)) {
3216 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
3217 func_id);
3218 return -EINVAL;
3219 }
3220
3221 func_name = btf_name_by_offset(desc_btf, func->name_off);
3222 addr = kallsyms_lookup_name(func_name);
3223 if (!addr) {
3224 verbose(env, "cannot find address for kernel function %s\n",
3225 func_name);
3226 return -EINVAL;
3227 }
3228 specialize_kfunc(env, func_id, offset, &addr);
3229
3230 if (bpf_jit_supports_far_kfunc_call()) {
3231 call_imm = func_id;
3232 } else {
3233 call_imm = BPF_CALL_IMM(addr);
3234 /* Check whether the relative offset overflows desc->imm */
3235 if ((unsigned long)(s32)call_imm != call_imm) {
3236 verbose(env, "address of kernel function %s is out of range\n",
3237 func_name);
3238 return -EINVAL;
3239 }
3240 }
3241
3242 if (bpf_dev_bound_kfunc_id(func_id)) {
3243 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
3244 if (err)
3245 return err;
3246 }
3247
3248 desc = &tab->descs[tab->nr_descs++];
3249 desc->func_id = func_id;
3250 desc->imm = call_imm;
3251 desc->offset = offset;
3252 desc->addr = addr;
3253 err = btf_distill_func_proto(&env->log, desc_btf,
3254 func_proto, func_name,
3255 &desc->func_model);
3256 if (!err)
3257 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3258 kfunc_desc_cmp_by_id_off, NULL);
3259 return err;
3260 }
3261
kfunc_desc_cmp_by_imm_off(const void * a,const void * b)3262 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
3263 {
3264 const struct bpf_kfunc_desc *d0 = a;
3265 const struct bpf_kfunc_desc *d1 = b;
3266
3267 if (d0->imm != d1->imm)
3268 return d0->imm < d1->imm ? -1 : 1;
3269 if (d0->offset != d1->offset)
3270 return d0->offset < d1->offset ? -1 : 1;
3271 return 0;
3272 }
3273
sort_kfunc_descs_by_imm_off(struct bpf_prog * prog)3274 static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
3275 {
3276 struct bpf_kfunc_desc_tab *tab;
3277
3278 tab = prog->aux->kfunc_tab;
3279 if (!tab)
3280 return;
3281
3282 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3283 kfunc_desc_cmp_by_imm_off, NULL);
3284 }
3285
bpf_prog_has_kfunc_call(const struct bpf_prog * prog)3286 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3287 {
3288 return !!prog->aux->kfunc_tab;
3289 }
3290
3291 const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog * prog,const struct bpf_insn * insn)3292 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
3293 const struct bpf_insn *insn)
3294 {
3295 const struct bpf_kfunc_desc desc = {
3296 .imm = insn->imm,
3297 .offset = insn->off,
3298 };
3299 const struct bpf_kfunc_desc *res;
3300 struct bpf_kfunc_desc_tab *tab;
3301
3302 tab = prog->aux->kfunc_tab;
3303 res = bsearch(&desc, tab->descs, tab->nr_descs,
3304 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
3305
3306 return res ? &res->func_model : NULL;
3307 }
3308
add_kfunc_in_insns(struct bpf_verifier_env * env,struct bpf_insn * insn,int cnt)3309 static int add_kfunc_in_insns(struct bpf_verifier_env *env,
3310 struct bpf_insn *insn, int cnt)
3311 {
3312 int i, ret;
3313
3314 for (i = 0; i < cnt; i++, insn++) {
3315 if (bpf_pseudo_kfunc_call(insn)) {
3316 ret = add_kfunc_call(env, insn->imm, insn->off);
3317 if (ret < 0)
3318 return ret;
3319 }
3320 }
3321 return 0;
3322 }
3323
add_subprog_and_kfunc(struct bpf_verifier_env * env)3324 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3325 {
3326 struct bpf_subprog_info *subprog = env->subprog_info;
3327 int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3328 struct bpf_insn *insn = env->prog->insnsi;
3329
3330 /* Add entry function. */
3331 ret = add_subprog(env, 0);
3332 if (ret)
3333 return ret;
3334
3335 for (i = 0; i < insn_cnt; i++, insn++) {
3336 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3337 !bpf_pseudo_kfunc_call(insn))
3338 continue;
3339
3340 if (!env->bpf_capable) {
3341 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3342 return -EPERM;
3343 }
3344
3345 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3346 ret = add_subprog(env, i + insn->imm + 1);
3347 else
3348 ret = add_kfunc_call(env, insn->imm, insn->off);
3349
3350 if (ret < 0)
3351 return ret;
3352 }
3353
3354 ret = bpf_find_exception_callback_insn_off(env);
3355 if (ret < 0)
3356 return ret;
3357 ex_cb_insn = ret;
3358
3359 /* If ex_cb_insn > 0, this means that the main program has a subprog
3360 * marked using BTF decl tag to serve as the exception callback.
3361 */
3362 if (ex_cb_insn) {
3363 ret = add_subprog(env, ex_cb_insn);
3364 if (ret < 0)
3365 return ret;
3366 for (i = 1; i < env->subprog_cnt; i++) {
3367 if (env->subprog_info[i].start != ex_cb_insn)
3368 continue;
3369 env->exception_callback_subprog = i;
3370 mark_subprog_exc_cb(env, i);
3371 break;
3372 }
3373 }
3374
3375 /* Add a fake 'exit' subprog which could simplify subprog iteration
3376 * logic. 'subprog_cnt' should not be increased.
3377 */
3378 subprog[env->subprog_cnt].start = insn_cnt;
3379
3380 if (env->log.level & BPF_LOG_LEVEL2)
3381 for (i = 0; i < env->subprog_cnt; i++)
3382 verbose(env, "func#%d @%d\n", i, subprog[i].start);
3383
3384 return 0;
3385 }
3386
jmp_offset(struct bpf_insn * insn)3387 static int jmp_offset(struct bpf_insn *insn)
3388 {
3389 u8 code = insn->code;
3390
3391 if (code == (BPF_JMP32 | BPF_JA))
3392 return insn->imm;
3393 return insn->off;
3394 }
3395
check_subprogs(struct bpf_verifier_env * env)3396 static int check_subprogs(struct bpf_verifier_env *env)
3397 {
3398 int i, subprog_start, subprog_end, off, cur_subprog = 0;
3399 struct bpf_subprog_info *subprog = env->subprog_info;
3400 struct bpf_insn *insn = env->prog->insnsi;
3401 int insn_cnt = env->prog->len;
3402
3403 /* now check that all jumps are within the same subprog */
3404 subprog_start = subprog[cur_subprog].start;
3405 subprog_end = subprog[cur_subprog + 1].start;
3406 for (i = 0; i < insn_cnt; i++) {
3407 u8 code = insn[i].code;
3408
3409 if (code == (BPF_JMP | BPF_CALL) &&
3410 insn[i].src_reg == 0 &&
3411 insn[i].imm == BPF_FUNC_tail_call) {
3412 subprog[cur_subprog].has_tail_call = true;
3413 subprog[cur_subprog].tail_call_reachable = true;
3414 }
3415 if (BPF_CLASS(code) == BPF_LD &&
3416 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3417 subprog[cur_subprog].has_ld_abs = true;
3418 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3419 goto next;
3420 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
3421 goto next;
3422 off = i + jmp_offset(&insn[i]) + 1;
3423 if (off < subprog_start || off >= subprog_end) {
3424 verbose(env, "jump out of range from insn %d to %d\n", i, off);
3425 return -EINVAL;
3426 }
3427 next:
3428 if (i == subprog_end - 1) {
3429 /* to avoid fall-through from one subprog into another
3430 * the last insn of the subprog should be either exit
3431 * or unconditional jump back or bpf_throw call
3432 */
3433 if (code != (BPF_JMP | BPF_EXIT) &&
3434 code != (BPF_JMP32 | BPF_JA) &&
3435 code != (BPF_JMP | BPF_JA)) {
3436 verbose(env, "last insn is not an exit or jmp\n");
3437 return -EINVAL;
3438 }
3439 subprog_start = subprog_end;
3440 cur_subprog++;
3441 if (cur_subprog < env->subprog_cnt)
3442 subprog_end = subprog[cur_subprog + 1].start;
3443 }
3444 }
3445 return 0;
3446 }
3447
3448 /* Parentage chain of this register (or stack slot) should take care of all
3449 * issues like callee-saved registers, stack slot allocation time, etc.
3450 */
mark_reg_read(struct bpf_verifier_env * env,const struct bpf_reg_state * state,struct bpf_reg_state * parent,u8 flag)3451 static int mark_reg_read(struct bpf_verifier_env *env,
3452 const struct bpf_reg_state *state,
3453 struct bpf_reg_state *parent, u8 flag)
3454 {
3455 bool writes = parent == state->parent; /* Observe write marks */
3456 int cnt = 0;
3457
3458 while (parent) {
3459 /* if read wasn't screened by an earlier write ... */
3460 if (writes && state->live & REG_LIVE_WRITTEN)
3461 break;
3462 if (parent->live & REG_LIVE_DONE) {
3463 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
3464 reg_type_str(env, parent->type),
3465 parent->var_off.value, parent->off);
3466 return -EFAULT;
3467 }
3468 /* The first condition is more likely to be true than the
3469 * second, checked it first.
3470 */
3471 if ((parent->live & REG_LIVE_READ) == flag ||
3472 parent->live & REG_LIVE_READ64)
3473 /* The parentage chain never changes and
3474 * this parent was already marked as LIVE_READ.
3475 * There is no need to keep walking the chain again and
3476 * keep re-marking all parents as LIVE_READ.
3477 * This case happens when the same register is read
3478 * multiple times without writes into it in-between.
3479 * Also, if parent has the stronger REG_LIVE_READ64 set,
3480 * then no need to set the weak REG_LIVE_READ32.
3481 */
3482 break;
3483 /* ... then we depend on parent's value */
3484 parent->live |= flag;
3485 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
3486 if (flag == REG_LIVE_READ64)
3487 parent->live &= ~REG_LIVE_READ32;
3488 state = parent;
3489 parent = state->parent;
3490 writes = true;
3491 cnt++;
3492 }
3493
3494 if (env->longest_mark_read_walk < cnt)
3495 env->longest_mark_read_walk = cnt;
3496 return 0;
3497 }
3498
mark_stack_slot_obj_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3499 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3500 int spi, int nr_slots)
3501 {
3502 struct bpf_func_state *state = func(env, reg);
3503 int err, i;
3504
3505 for (i = 0; i < nr_slots; i++) {
3506 struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3507
3508 err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
3509 if (err)
3510 return err;
3511
3512 mark_stack_slot_scratched(env, spi - i);
3513 }
3514 return 0;
3515 }
3516
mark_dynptr_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3517 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3518 {
3519 int spi;
3520
3521 /* For CONST_PTR_TO_DYNPTR, it must have already been done by
3522 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3523 * check_kfunc_call.
3524 */
3525 if (reg->type == CONST_PTR_TO_DYNPTR)
3526 return 0;
3527 spi = dynptr_get_spi(env, reg);
3528 if (spi < 0)
3529 return spi;
3530 /* Caller ensures dynptr is valid and initialized, which means spi is in
3531 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3532 * read.
3533 */
3534 return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS);
3535 }
3536
mark_iter_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3537 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3538 int spi, int nr_slots)
3539 {
3540 return mark_stack_slot_obj_read(env, reg, spi, nr_slots);
3541 }
3542
mark_irq_flag_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3543 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3544 {
3545 int spi;
3546
3547 spi = irq_flag_get_spi(env, reg);
3548 if (spi < 0)
3549 return spi;
3550 return mark_stack_slot_obj_read(env, reg, spi, 1);
3551 }
3552
3553 /* This function is supposed to be used by the following 32-bit optimization
3554 * code only. It returns TRUE if the source or destination register operates
3555 * on 64-bit, otherwise return FALSE.
3556 */
is_reg64(struct bpf_verifier_env * env,struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum reg_arg_type t)3557 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
3558 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
3559 {
3560 u8 code, class, op;
3561
3562 code = insn->code;
3563 class = BPF_CLASS(code);
3564 op = BPF_OP(code);
3565 if (class == BPF_JMP) {
3566 /* BPF_EXIT for "main" will reach here. Return TRUE
3567 * conservatively.
3568 */
3569 if (op == BPF_EXIT)
3570 return true;
3571 if (op == BPF_CALL) {
3572 /* BPF to BPF call will reach here because of marking
3573 * caller saved clobber with DST_OP_NO_MARK for which we
3574 * don't care the register def because they are anyway
3575 * marked as NOT_INIT already.
3576 */
3577 if (insn->src_reg == BPF_PSEUDO_CALL)
3578 return false;
3579 /* Helper call will reach here because of arg type
3580 * check, conservatively return TRUE.
3581 */
3582 if (t == SRC_OP)
3583 return true;
3584
3585 return false;
3586 }
3587 }
3588
3589 if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3590 return false;
3591
3592 if (class == BPF_ALU64 || class == BPF_JMP ||
3593 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3594 return true;
3595
3596 if (class == BPF_ALU || class == BPF_JMP32)
3597 return false;
3598
3599 if (class == BPF_LDX) {
3600 if (t != SRC_OP)
3601 return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3602 /* LDX source must be ptr. */
3603 return true;
3604 }
3605
3606 if (class == BPF_STX) {
3607 /* BPF_STX (including atomic variants) has one or more source
3608 * operands, one of which is a ptr. Check whether the caller is
3609 * asking about it.
3610 */
3611 if (t == SRC_OP && reg->type != SCALAR_VALUE)
3612 return true;
3613 return BPF_SIZE(code) == BPF_DW;
3614 }
3615
3616 if (class == BPF_LD) {
3617 u8 mode = BPF_MODE(code);
3618
3619 /* LD_IMM64 */
3620 if (mode == BPF_IMM)
3621 return true;
3622
3623 /* Both LD_IND and LD_ABS return 32-bit data. */
3624 if (t != SRC_OP)
3625 return false;
3626
3627 /* Implicit ctx ptr. */
3628 if (regno == BPF_REG_6)
3629 return true;
3630
3631 /* Explicit source could be any width. */
3632 return true;
3633 }
3634
3635 if (class == BPF_ST)
3636 /* The only source register for BPF_ST is a ptr. */
3637 return true;
3638
3639 /* Conservatively return true at default. */
3640 return true;
3641 }
3642
3643 /* Return the regno defined by the insn, or -1. */
insn_def_regno(const struct bpf_insn * insn)3644 static int insn_def_regno(const struct bpf_insn *insn)
3645 {
3646 switch (BPF_CLASS(insn->code)) {
3647 case BPF_JMP:
3648 case BPF_JMP32:
3649 case BPF_ST:
3650 return -1;
3651 case BPF_STX:
3652 if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
3653 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
3654 (insn->imm & BPF_FETCH)) {
3655 if (insn->imm == BPF_CMPXCHG)
3656 return BPF_REG_0;
3657 else
3658 return insn->src_reg;
3659 } else {
3660 return -1;
3661 }
3662 default:
3663 return insn->dst_reg;
3664 }
3665 }
3666
3667 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env * env,struct bpf_insn * insn)3668 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
3669 {
3670 int dst_reg = insn_def_regno(insn);
3671
3672 if (dst_reg == -1)
3673 return false;
3674
3675 return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
3676 }
3677
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3678 static void mark_insn_zext(struct bpf_verifier_env *env,
3679 struct bpf_reg_state *reg)
3680 {
3681 s32 def_idx = reg->subreg_def;
3682
3683 if (def_idx == DEF_NOT_SUBREG)
3684 return;
3685
3686 env->insn_aux_data[def_idx - 1].zext_dst = true;
3687 /* The dst will be zero extended, so won't be sub-register anymore. */
3688 reg->subreg_def = DEF_NOT_SUBREG;
3689 }
3690
__check_reg_arg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum reg_arg_type t)3691 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3692 enum reg_arg_type t)
3693 {
3694 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3695 struct bpf_reg_state *reg;
3696 bool rw64;
3697
3698 if (regno >= MAX_BPF_REG) {
3699 verbose(env, "R%d is invalid\n", regno);
3700 return -EINVAL;
3701 }
3702
3703 mark_reg_scratched(env, regno);
3704
3705 reg = ®s[regno];
3706 rw64 = is_reg64(env, insn, regno, reg, t);
3707 if (t == SRC_OP) {
3708 /* check whether register used as source operand can be read */
3709 if (reg->type == NOT_INIT) {
3710 verbose(env, "R%d !read_ok\n", regno);
3711 return -EACCES;
3712 }
3713 /* We don't need to worry about FP liveness because it's read-only */
3714 if (regno == BPF_REG_FP)
3715 return 0;
3716
3717 if (rw64)
3718 mark_insn_zext(env, reg);
3719
3720 return mark_reg_read(env, reg, reg->parent,
3721 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3722 } else {
3723 /* check whether register used as dest operand can be written to */
3724 if (regno == BPF_REG_FP) {
3725 verbose(env, "frame pointer is read only\n");
3726 return -EACCES;
3727 }
3728 reg->live |= REG_LIVE_WRITTEN;
3729 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3730 if (t == DST_OP)
3731 mark_reg_unknown(env, regs, regno);
3732 }
3733 return 0;
3734 }
3735
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum reg_arg_type t)3736 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3737 enum reg_arg_type t)
3738 {
3739 struct bpf_verifier_state *vstate = env->cur_state;
3740 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3741
3742 return __check_reg_arg(env, state->regs, regno, t);
3743 }
3744
insn_stack_access_flags(int frameno,int spi)3745 static int insn_stack_access_flags(int frameno, int spi)
3746 {
3747 return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3748 }
3749
insn_stack_access_spi(int insn_flags)3750 static int insn_stack_access_spi(int insn_flags)
3751 {
3752 return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
3753 }
3754
insn_stack_access_frameno(int insn_flags)3755 static int insn_stack_access_frameno(int insn_flags)
3756 {
3757 return insn_flags & INSN_F_FRAMENO_MASK;
3758 }
3759
mark_jmp_point(struct bpf_verifier_env * env,int idx)3760 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3761 {
3762 env->insn_aux_data[idx].jmp_point = true;
3763 }
3764
is_jmp_point(struct bpf_verifier_env * env,int insn_idx)3765 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3766 {
3767 return env->insn_aux_data[insn_idx].jmp_point;
3768 }
3769
3770 #define LR_FRAMENO_BITS 3
3771 #define LR_SPI_BITS 6
3772 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3773 #define LR_SIZE_BITS 4
3774 #define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1)
3775 #define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1)
3776 #define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1)
3777 #define LR_SPI_OFF LR_FRAMENO_BITS
3778 #define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS)
3779 #define LINKED_REGS_MAX 6
3780
3781 struct linked_reg {
3782 u8 frameno;
3783 union {
3784 u8 spi;
3785 u8 regno;
3786 };
3787 bool is_reg;
3788 };
3789
3790 struct linked_regs {
3791 int cnt;
3792 struct linked_reg entries[LINKED_REGS_MAX];
3793 };
3794
linked_regs_push(struct linked_regs * s)3795 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3796 {
3797 if (s->cnt < LINKED_REGS_MAX)
3798 return &s->entries[s->cnt++];
3799
3800 return NULL;
3801 }
3802
3803 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3804 * number of elements currently in stack.
3805 * Pack one history entry for linked registers as 10 bits in the following format:
3806 * - 3-bits frameno
3807 * - 6-bits spi_or_reg
3808 * - 1-bit is_reg
3809 */
linked_regs_pack(struct linked_regs * s)3810 static u64 linked_regs_pack(struct linked_regs *s)
3811 {
3812 u64 val = 0;
3813 int i;
3814
3815 for (i = 0; i < s->cnt; ++i) {
3816 struct linked_reg *e = &s->entries[i];
3817 u64 tmp = 0;
3818
3819 tmp |= e->frameno;
3820 tmp |= e->spi << LR_SPI_OFF;
3821 tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3822
3823 val <<= LR_ENTRY_BITS;
3824 val |= tmp;
3825 }
3826 val <<= LR_SIZE_BITS;
3827 val |= s->cnt;
3828 return val;
3829 }
3830
linked_regs_unpack(u64 val,struct linked_regs * s)3831 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3832 {
3833 int i;
3834
3835 s->cnt = val & LR_SIZE_MASK;
3836 val >>= LR_SIZE_BITS;
3837
3838 for (i = 0; i < s->cnt; ++i) {
3839 struct linked_reg *e = &s->entries[i];
3840
3841 e->frameno = val & LR_FRAMENO_MASK;
3842 e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3843 e->is_reg = (val >> LR_IS_REG_OFF) & 0x1;
3844 val >>= LR_ENTRY_BITS;
3845 }
3846 }
3847
3848 /* for any branch, call, exit record the history of jmps in the given state */
push_insn_history(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_flags,u64 linked_regs)3849 static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
3850 int insn_flags, u64 linked_regs)
3851 {
3852 struct bpf_insn_hist_entry *p;
3853 size_t alloc_size;
3854
3855 /* combine instruction flags if we already recorded this instruction */
3856 if (env->cur_hist_ent) {
3857 /* atomic instructions push insn_flags twice, for READ and
3858 * WRITE sides, but they should agree on stack slot
3859 */
3860 WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
3861 (env->cur_hist_ent->flags & insn_flags) != insn_flags,
3862 "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
3863 env->insn_idx, env->cur_hist_ent->flags, insn_flags);
3864 env->cur_hist_ent->flags |= insn_flags;
3865 WARN_ONCE(env->cur_hist_ent->linked_regs != 0,
3866 "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n",
3867 env->insn_idx, env->cur_hist_ent->linked_regs);
3868 env->cur_hist_ent->linked_regs = linked_regs;
3869 return 0;
3870 }
3871
3872 if (cur->insn_hist_end + 1 > env->insn_hist_cap) {
3873 alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p));
3874 p = kvrealloc(env->insn_hist, alloc_size, GFP_USER);
3875 if (!p)
3876 return -ENOMEM;
3877 env->insn_hist = p;
3878 env->insn_hist_cap = alloc_size / sizeof(*p);
3879 }
3880
3881 p = &env->insn_hist[cur->insn_hist_end];
3882 p->idx = env->insn_idx;
3883 p->prev_idx = env->prev_insn_idx;
3884 p->flags = insn_flags;
3885 p->linked_regs = linked_regs;
3886
3887 cur->insn_hist_end++;
3888 env->cur_hist_ent = p;
3889
3890 return 0;
3891 }
3892
get_insn_hist_entry(struct bpf_verifier_env * env,u32 hist_start,u32 hist_end,int insn_idx)3893 static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env,
3894 u32 hist_start, u32 hist_end, int insn_idx)
3895 {
3896 if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx)
3897 return &env->insn_hist[hist_end - 1];
3898 return NULL;
3899 }
3900
3901 /* Backtrack one insn at a time. If idx is not at the top of recorded
3902 * history then previous instruction came from straight line execution.
3903 * Return -ENOENT if we exhausted all instructions within given state.
3904 *
3905 * It's legal to have a bit of a looping with the same starting and ending
3906 * insn index within the same state, e.g.: 3->4->5->3, so just because current
3907 * instruction index is the same as state's first_idx doesn't mean we are
3908 * done. If there is still some jump history left, we should keep going. We
3909 * need to take into account that we might have a jump history between given
3910 * state's parent and itself, due to checkpointing. In this case, we'll have
3911 * history entry recording a jump from last instruction of parent state and
3912 * first instruction of given state.
3913 */
get_prev_insn_idx(const struct bpf_verifier_env * env,struct bpf_verifier_state * st,int insn_idx,u32 hist_start,u32 * hist_endp)3914 static int get_prev_insn_idx(const struct bpf_verifier_env *env,
3915 struct bpf_verifier_state *st,
3916 int insn_idx, u32 hist_start, u32 *hist_endp)
3917 {
3918 u32 hist_end = *hist_endp;
3919 u32 cnt = hist_end - hist_start;
3920
3921 if (insn_idx == st->first_insn_idx) {
3922 if (cnt == 0)
3923 return -ENOENT;
3924 if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx)
3925 return -ENOENT;
3926 }
3927
3928 if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) {
3929 (*hist_endp)--;
3930 return env->insn_hist[hist_end - 1].prev_idx;
3931 } else {
3932 return insn_idx - 1;
3933 }
3934 }
3935
disasm_kfunc_name(void * data,const struct bpf_insn * insn)3936 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3937 {
3938 const struct btf_type *func;
3939 struct btf *desc_btf;
3940
3941 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3942 return NULL;
3943
3944 desc_btf = find_kfunc_desc_btf(data, insn->off);
3945 if (IS_ERR(desc_btf))
3946 return "<error>";
3947
3948 func = btf_type_by_id(desc_btf, insn->imm);
3949 return btf_name_by_offset(desc_btf, func->name_off);
3950 }
3951
verbose_insn(struct bpf_verifier_env * env,struct bpf_insn * insn)3952 static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3953 {
3954 const struct bpf_insn_cbs cbs = {
3955 .cb_call = disasm_kfunc_name,
3956 .cb_print = verbose,
3957 .private_data = env,
3958 };
3959
3960 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3961 }
3962
bt_init(struct backtrack_state * bt,u32 frame)3963 static inline void bt_init(struct backtrack_state *bt, u32 frame)
3964 {
3965 bt->frame = frame;
3966 }
3967
bt_reset(struct backtrack_state * bt)3968 static inline void bt_reset(struct backtrack_state *bt)
3969 {
3970 struct bpf_verifier_env *env = bt->env;
3971
3972 memset(bt, 0, sizeof(*bt));
3973 bt->env = env;
3974 }
3975
bt_empty(struct backtrack_state * bt)3976 static inline u32 bt_empty(struct backtrack_state *bt)
3977 {
3978 u64 mask = 0;
3979 int i;
3980
3981 for (i = 0; i <= bt->frame; i++)
3982 mask |= bt->reg_masks[i] | bt->stack_masks[i];
3983
3984 return mask == 0;
3985 }
3986
bt_subprog_enter(struct backtrack_state * bt)3987 static inline int bt_subprog_enter(struct backtrack_state *bt)
3988 {
3989 if (bt->frame == MAX_CALL_FRAMES - 1) {
3990 verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
3991 WARN_ONCE(1, "verifier backtracking bug");
3992 return -EFAULT;
3993 }
3994 bt->frame++;
3995 return 0;
3996 }
3997
bt_subprog_exit(struct backtrack_state * bt)3998 static inline int bt_subprog_exit(struct backtrack_state *bt)
3999 {
4000 if (bt->frame == 0) {
4001 verbose(bt->env, "BUG subprog exit from frame 0\n");
4002 WARN_ONCE(1, "verifier backtracking bug");
4003 return -EFAULT;
4004 }
4005 bt->frame--;
4006 return 0;
4007 }
4008
bt_set_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)4009 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
4010 {
4011 bt->reg_masks[frame] |= 1 << reg;
4012 }
4013
bt_clear_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)4014 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
4015 {
4016 bt->reg_masks[frame] &= ~(1 << reg);
4017 }
4018
bt_set_reg(struct backtrack_state * bt,u32 reg)4019 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
4020 {
4021 bt_set_frame_reg(bt, bt->frame, reg);
4022 }
4023
bt_clear_reg(struct backtrack_state * bt,u32 reg)4024 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
4025 {
4026 bt_clear_frame_reg(bt, bt->frame, reg);
4027 }
4028
bt_set_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)4029 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
4030 {
4031 bt->stack_masks[frame] |= 1ull << slot;
4032 }
4033
bt_clear_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)4034 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
4035 {
4036 bt->stack_masks[frame] &= ~(1ull << slot);
4037 }
4038
bt_frame_reg_mask(struct backtrack_state * bt,u32 frame)4039 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
4040 {
4041 return bt->reg_masks[frame];
4042 }
4043
bt_reg_mask(struct backtrack_state * bt)4044 static inline u32 bt_reg_mask(struct backtrack_state *bt)
4045 {
4046 return bt->reg_masks[bt->frame];
4047 }
4048
bt_frame_stack_mask(struct backtrack_state * bt,u32 frame)4049 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
4050 {
4051 return bt->stack_masks[frame];
4052 }
4053
bt_stack_mask(struct backtrack_state * bt)4054 static inline u64 bt_stack_mask(struct backtrack_state *bt)
4055 {
4056 return bt->stack_masks[bt->frame];
4057 }
4058
bt_is_reg_set(struct backtrack_state * bt,u32 reg)4059 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
4060 {
4061 return bt->reg_masks[bt->frame] & (1 << reg);
4062 }
4063
bt_is_frame_reg_set(struct backtrack_state * bt,u32 frame,u32 reg)4064 static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
4065 {
4066 return bt->reg_masks[frame] & (1 << reg);
4067 }
4068
bt_is_frame_slot_set(struct backtrack_state * bt,u32 frame,u32 slot)4069 static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
4070 {
4071 return bt->stack_masks[frame] & (1ull << slot);
4072 }
4073
4074 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
fmt_reg_mask(char * buf,ssize_t buf_sz,u32 reg_mask)4075 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
4076 {
4077 DECLARE_BITMAP(mask, 64);
4078 bool first = true;
4079 int i, n;
4080
4081 buf[0] = '\0';
4082
4083 bitmap_from_u64(mask, reg_mask);
4084 for_each_set_bit(i, mask, 32) {
4085 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
4086 first = false;
4087 buf += n;
4088 buf_sz -= n;
4089 if (buf_sz < 0)
4090 break;
4091 }
4092 }
4093 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
fmt_stack_mask(char * buf,ssize_t buf_sz,u64 stack_mask)4094 static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
4095 {
4096 DECLARE_BITMAP(mask, 64);
4097 bool first = true;
4098 int i, n;
4099
4100 buf[0] = '\0';
4101
4102 bitmap_from_u64(mask, stack_mask);
4103 for_each_set_bit(i, mask, 64) {
4104 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
4105 first = false;
4106 buf += n;
4107 buf_sz -= n;
4108 if (buf_sz < 0)
4109 break;
4110 }
4111 }
4112
4113 /* If any register R in hist->linked_regs is marked as precise in bt,
4114 * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
4115 */
bt_sync_linked_regs(struct backtrack_state * bt,struct bpf_insn_hist_entry * hist)4116 static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist)
4117 {
4118 struct linked_regs linked_regs;
4119 bool some_precise = false;
4120 int i;
4121
4122 if (!hist || hist->linked_regs == 0)
4123 return;
4124
4125 linked_regs_unpack(hist->linked_regs, &linked_regs);
4126 for (i = 0; i < linked_regs.cnt; ++i) {
4127 struct linked_reg *e = &linked_regs.entries[i];
4128
4129 if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
4130 (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
4131 some_precise = true;
4132 break;
4133 }
4134 }
4135
4136 if (!some_precise)
4137 return;
4138
4139 for (i = 0; i < linked_regs.cnt; ++i) {
4140 struct linked_reg *e = &linked_regs.entries[i];
4141
4142 if (e->is_reg)
4143 bt_set_frame_reg(bt, e->frameno, e->regno);
4144 else
4145 bt_set_frame_slot(bt, e->frameno, e->spi);
4146 }
4147 }
4148
4149 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
4150
4151 /* For given verifier state backtrack_insn() is called from the last insn to
4152 * the first insn. Its purpose is to compute a bitmask of registers and
4153 * stack slots that needs precision in the parent verifier state.
4154 *
4155 * @idx is an index of the instruction we are currently processing;
4156 * @subseq_idx is an index of the subsequent instruction that:
4157 * - *would be* executed next, if jump history is viewed in forward order;
4158 * - *was* processed previously during backtracking.
4159 */
backtrack_insn(struct bpf_verifier_env * env,int idx,int subseq_idx,struct bpf_insn_hist_entry * hist,struct backtrack_state * bt)4160 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
4161 struct bpf_insn_hist_entry *hist, struct backtrack_state *bt)
4162 {
4163 struct bpf_insn *insn = env->prog->insnsi + idx;
4164 u8 class = BPF_CLASS(insn->code);
4165 u8 opcode = BPF_OP(insn->code);
4166 u8 mode = BPF_MODE(insn->code);
4167 u32 dreg = insn->dst_reg;
4168 u32 sreg = insn->src_reg;
4169 u32 spi, i, fr;
4170
4171 if (insn->code == 0)
4172 return 0;
4173 if (env->log.level & BPF_LOG_LEVEL2) {
4174 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
4175 verbose(env, "mark_precise: frame%d: regs=%s ",
4176 bt->frame, env->tmp_str_buf);
4177 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
4178 verbose(env, "stack=%s before ", env->tmp_str_buf);
4179 verbose(env, "%d: ", idx);
4180 verbose_insn(env, insn);
4181 }
4182
4183 /* If there is a history record that some registers gained range at this insn,
4184 * propagate precision marks to those registers, so that bt_is_reg_set()
4185 * accounts for these registers.
4186 */
4187 bt_sync_linked_regs(bt, hist);
4188
4189 if (class == BPF_ALU || class == BPF_ALU64) {
4190 if (!bt_is_reg_set(bt, dreg))
4191 return 0;
4192 if (opcode == BPF_END || opcode == BPF_NEG) {
4193 /* sreg is reserved and unused
4194 * dreg still need precision before this insn
4195 */
4196 return 0;
4197 } else if (opcode == BPF_MOV) {
4198 if (BPF_SRC(insn->code) == BPF_X) {
4199 /* dreg = sreg or dreg = (s8, s16, s32)sreg
4200 * dreg needs precision after this insn
4201 * sreg needs precision before this insn
4202 */
4203 bt_clear_reg(bt, dreg);
4204 if (sreg != BPF_REG_FP)
4205 bt_set_reg(bt, sreg);
4206 } else {
4207 /* dreg = K
4208 * dreg needs precision after this insn.
4209 * Corresponding register is already marked
4210 * as precise=true in this verifier state.
4211 * No further markings in parent are necessary
4212 */
4213 bt_clear_reg(bt, dreg);
4214 }
4215 } else {
4216 if (BPF_SRC(insn->code) == BPF_X) {
4217 /* dreg += sreg
4218 * both dreg and sreg need precision
4219 * before this insn
4220 */
4221 if (sreg != BPF_REG_FP)
4222 bt_set_reg(bt, sreg);
4223 } /* else dreg += K
4224 * dreg still needs precision before this insn
4225 */
4226 }
4227 } else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
4228 if (!bt_is_reg_set(bt, dreg))
4229 return 0;
4230 bt_clear_reg(bt, dreg);
4231
4232 /* scalars can only be spilled into stack w/o losing precision.
4233 * Load from any other memory can be zero extended.
4234 * The desire to keep that precision is already indicated
4235 * by 'precise' mark in corresponding register of this state.
4236 * No further tracking necessary.
4237 */
4238 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4239 return 0;
4240 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
4241 * that [fp - off] slot contains scalar that needs to be
4242 * tracked with precision
4243 */
4244 spi = insn_stack_access_spi(hist->flags);
4245 fr = insn_stack_access_frameno(hist->flags);
4246 bt_set_frame_slot(bt, fr, spi);
4247 } else if (class == BPF_STX || class == BPF_ST) {
4248 if (bt_is_reg_set(bt, dreg))
4249 /* stx & st shouldn't be using _scalar_ dst_reg
4250 * to access memory. It means backtracking
4251 * encountered a case of pointer subtraction.
4252 */
4253 return -ENOTSUPP;
4254 /* scalars can only be spilled into stack */
4255 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4256 return 0;
4257 spi = insn_stack_access_spi(hist->flags);
4258 fr = insn_stack_access_frameno(hist->flags);
4259 if (!bt_is_frame_slot_set(bt, fr, spi))
4260 return 0;
4261 bt_clear_frame_slot(bt, fr, spi);
4262 if (class == BPF_STX)
4263 bt_set_reg(bt, sreg);
4264 } else if (class == BPF_JMP || class == BPF_JMP32) {
4265 if (bpf_pseudo_call(insn)) {
4266 int subprog_insn_idx, subprog;
4267
4268 subprog_insn_idx = idx + insn->imm + 1;
4269 subprog = find_subprog(env, subprog_insn_idx);
4270 if (subprog < 0)
4271 return -EFAULT;
4272
4273 if (subprog_is_global(env, subprog)) {
4274 /* check that jump history doesn't have any
4275 * extra instructions from subprog; the next
4276 * instruction after call to global subprog
4277 * should be literally next instruction in
4278 * caller program
4279 */
4280 WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
4281 /* r1-r5 are invalidated after subprog call,
4282 * so for global func call it shouldn't be set
4283 * anymore
4284 */
4285 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4286 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4287 WARN_ONCE(1, "verifier backtracking bug");
4288 return -EFAULT;
4289 }
4290 /* global subprog always sets R0 */
4291 bt_clear_reg(bt, BPF_REG_0);
4292 return 0;
4293 } else {
4294 /* static subprog call instruction, which
4295 * means that we are exiting current subprog,
4296 * so only r1-r5 could be still requested as
4297 * precise, r0 and r6-r10 or any stack slot in
4298 * the current frame should be zero by now
4299 */
4300 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4301 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4302 WARN_ONCE(1, "verifier backtracking bug");
4303 return -EFAULT;
4304 }
4305 /* we are now tracking register spills correctly,
4306 * so any instance of leftover slots is a bug
4307 */
4308 if (bt_stack_mask(bt) != 0) {
4309 verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
4310 WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
4311 return -EFAULT;
4312 }
4313 /* propagate r1-r5 to the caller */
4314 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
4315 if (bt_is_reg_set(bt, i)) {
4316 bt_clear_reg(bt, i);
4317 bt_set_frame_reg(bt, bt->frame - 1, i);
4318 }
4319 }
4320 if (bt_subprog_exit(bt))
4321 return -EFAULT;
4322 return 0;
4323 }
4324 } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
4325 /* exit from callback subprog to callback-calling helper or
4326 * kfunc call. Use idx/subseq_idx check to discern it from
4327 * straight line code backtracking.
4328 * Unlike the subprog call handling above, we shouldn't
4329 * propagate precision of r1-r5 (if any requested), as they are
4330 * not actually arguments passed directly to callback subprogs
4331 */
4332 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4333 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4334 WARN_ONCE(1, "verifier backtracking bug");
4335 return -EFAULT;
4336 }
4337 if (bt_stack_mask(bt) != 0) {
4338 verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
4339 WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
4340 return -EFAULT;
4341 }
4342 /* clear r1-r5 in callback subprog's mask */
4343 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4344 bt_clear_reg(bt, i);
4345 if (bt_subprog_exit(bt))
4346 return -EFAULT;
4347 return 0;
4348 } else if (opcode == BPF_CALL) {
4349 /* kfunc with imm==0 is invalid and fixup_kfunc_call will
4350 * catch this error later. Make backtracking conservative
4351 * with ENOTSUPP.
4352 */
4353 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
4354 return -ENOTSUPP;
4355 /* regular helper call sets R0 */
4356 bt_clear_reg(bt, BPF_REG_0);
4357 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4358 /* if backtracing was looking for registers R1-R5
4359 * they should have been found already.
4360 */
4361 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4362 WARN_ONCE(1, "verifier backtracking bug");
4363 return -EFAULT;
4364 }
4365 } else if (opcode == BPF_EXIT) {
4366 bool r0_precise;
4367
4368 /* Backtracking to a nested function call, 'idx' is a part of
4369 * the inner frame 'subseq_idx' is a part of the outer frame.
4370 * In case of a regular function call, instructions giving
4371 * precision to registers R1-R5 should have been found already.
4372 * In case of a callback, it is ok to have R1-R5 marked for
4373 * backtracking, as these registers are set by the function
4374 * invoking callback.
4375 */
4376 if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
4377 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4378 bt_clear_reg(bt, i);
4379 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4380 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
4381 WARN_ONCE(1, "verifier backtracking bug");
4382 return -EFAULT;
4383 }
4384
4385 /* BPF_EXIT in subprog or callback always returns
4386 * right after the call instruction, so by checking
4387 * whether the instruction at subseq_idx-1 is subprog
4388 * call or not we can distinguish actual exit from
4389 * *subprog* from exit from *callback*. In the former
4390 * case, we need to propagate r0 precision, if
4391 * necessary. In the former we never do that.
4392 */
4393 r0_precise = subseq_idx - 1 >= 0 &&
4394 bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
4395 bt_is_reg_set(bt, BPF_REG_0);
4396
4397 bt_clear_reg(bt, BPF_REG_0);
4398 if (bt_subprog_enter(bt))
4399 return -EFAULT;
4400
4401 if (r0_precise)
4402 bt_set_reg(bt, BPF_REG_0);
4403 /* r6-r9 and stack slots will stay set in caller frame
4404 * bitmasks until we return back from callee(s)
4405 */
4406 return 0;
4407 } else if (BPF_SRC(insn->code) == BPF_X) {
4408 if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
4409 return 0;
4410 /* dreg <cond> sreg
4411 * Both dreg and sreg need precision before
4412 * this insn. If only sreg was marked precise
4413 * before it would be equally necessary to
4414 * propagate it to dreg.
4415 */
4416 bt_set_reg(bt, dreg);
4417 bt_set_reg(bt, sreg);
4418 } else if (BPF_SRC(insn->code) == BPF_K) {
4419 /* dreg <cond> K
4420 * Only dreg still needs precision before
4421 * this insn, so for the K-based conditional
4422 * there is nothing new to be marked.
4423 */
4424 }
4425 } else if (class == BPF_LD) {
4426 if (!bt_is_reg_set(bt, dreg))
4427 return 0;
4428 bt_clear_reg(bt, dreg);
4429 /* It's ld_imm64 or ld_abs or ld_ind.
4430 * For ld_imm64 no further tracking of precision
4431 * into parent is necessary
4432 */
4433 if (mode == BPF_IND || mode == BPF_ABS)
4434 /* to be analyzed */
4435 return -ENOTSUPP;
4436 }
4437 /* Propagate precision marks to linked registers, to account for
4438 * registers marked as precise in this function.
4439 */
4440 bt_sync_linked_regs(bt, hist);
4441 return 0;
4442 }
4443
4444 /* the scalar precision tracking algorithm:
4445 * . at the start all registers have precise=false.
4446 * . scalar ranges are tracked as normal through alu and jmp insns.
4447 * . once precise value of the scalar register is used in:
4448 * . ptr + scalar alu
4449 * . if (scalar cond K|scalar)
4450 * . helper_call(.., scalar, ...) where ARG_CONST is expected
4451 * backtrack through the verifier states and mark all registers and
4452 * stack slots with spilled constants that these scalar regisers
4453 * should be precise.
4454 * . during state pruning two registers (or spilled stack slots)
4455 * are equivalent if both are not precise.
4456 *
4457 * Note the verifier cannot simply walk register parentage chain,
4458 * since many different registers and stack slots could have been
4459 * used to compute single precise scalar.
4460 *
4461 * The approach of starting with precise=true for all registers and then
4462 * backtrack to mark a register as not precise when the verifier detects
4463 * that program doesn't care about specific value (e.g., when helper
4464 * takes register as ARG_ANYTHING parameter) is not safe.
4465 *
4466 * It's ok to walk single parentage chain of the verifier states.
4467 * It's possible that this backtracking will go all the way till 1st insn.
4468 * All other branches will be explored for needing precision later.
4469 *
4470 * The backtracking needs to deal with cases like:
4471 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
4472 * r9 -= r8
4473 * r5 = r9
4474 * if r5 > 0x79f goto pc+7
4475 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
4476 * r5 += 1
4477 * ...
4478 * call bpf_perf_event_output#25
4479 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
4480 *
4481 * and this case:
4482 * r6 = 1
4483 * call foo // uses callee's r6 inside to compute r0
4484 * r0 += r6
4485 * if r0 == 0 goto
4486 *
4487 * to track above reg_mask/stack_mask needs to be independent for each frame.
4488 *
4489 * Also if parent's curframe > frame where backtracking started,
4490 * the verifier need to mark registers in both frames, otherwise callees
4491 * may incorrectly prune callers. This is similar to
4492 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
4493 *
4494 * For now backtracking falls back into conservative marking.
4495 */
mark_all_scalars_precise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4496 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
4497 struct bpf_verifier_state *st)
4498 {
4499 struct bpf_func_state *func;
4500 struct bpf_reg_state *reg;
4501 int i, j;
4502
4503 if (env->log.level & BPF_LOG_LEVEL2) {
4504 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
4505 st->curframe);
4506 }
4507
4508 /* big hammer: mark all scalars precise in this path.
4509 * pop_stack may still get !precise scalars.
4510 * We also skip current state and go straight to first parent state,
4511 * because precision markings in current non-checkpointed state are
4512 * not needed. See why in the comment in __mark_chain_precision below.
4513 */
4514 for (st = st->parent; st; st = st->parent) {
4515 for (i = 0; i <= st->curframe; i++) {
4516 func = st->frame[i];
4517 for (j = 0; j < BPF_REG_FP; j++) {
4518 reg = &func->regs[j];
4519 if (reg->type != SCALAR_VALUE || reg->precise)
4520 continue;
4521 reg->precise = true;
4522 if (env->log.level & BPF_LOG_LEVEL2) {
4523 verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
4524 i, j);
4525 }
4526 }
4527 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4528 if (!is_spilled_reg(&func->stack[j]))
4529 continue;
4530 reg = &func->stack[j].spilled_ptr;
4531 if (reg->type != SCALAR_VALUE || reg->precise)
4532 continue;
4533 reg->precise = true;
4534 if (env->log.level & BPF_LOG_LEVEL2) {
4535 verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
4536 i, -(j + 1) * 8);
4537 }
4538 }
4539 }
4540 }
4541 }
4542
mark_all_scalars_imprecise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4543 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
4544 {
4545 struct bpf_func_state *func;
4546 struct bpf_reg_state *reg;
4547 int i, j;
4548
4549 for (i = 0; i <= st->curframe; i++) {
4550 func = st->frame[i];
4551 for (j = 0; j < BPF_REG_FP; j++) {
4552 reg = &func->regs[j];
4553 if (reg->type != SCALAR_VALUE)
4554 continue;
4555 reg->precise = false;
4556 }
4557 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4558 if (!is_spilled_reg(&func->stack[j]))
4559 continue;
4560 reg = &func->stack[j].spilled_ptr;
4561 if (reg->type != SCALAR_VALUE)
4562 continue;
4563 reg->precise = false;
4564 }
4565 }
4566 }
4567
4568 /*
4569 * __mark_chain_precision() backtracks BPF program instruction sequence and
4570 * chain of verifier states making sure that register *regno* (if regno >= 0)
4571 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4572 * SCALARS, as well as any other registers and slots that contribute to
4573 * a tracked state of given registers/stack slots, depending on specific BPF
4574 * assembly instructions (see backtrack_insns() for exact instruction handling
4575 * logic). This backtracking relies on recorded insn_hist and is able to
4576 * traverse entire chain of parent states. This process ends only when all the
4577 * necessary registers/slots and their transitive dependencies are marked as
4578 * precise.
4579 *
4580 * One important and subtle aspect is that precise marks *do not matter* in
4581 * the currently verified state (current state). It is important to understand
4582 * why this is the case.
4583 *
4584 * First, note that current state is the state that is not yet "checkpointed",
4585 * i.e., it is not yet put into env->explored_states, and it has no children
4586 * states as well. It's ephemeral, and can end up either a) being discarded if
4587 * compatible explored state is found at some point or BPF_EXIT instruction is
4588 * reached or b) checkpointed and put into env->explored_states, branching out
4589 * into one or more children states.
4590 *
4591 * In the former case, precise markings in current state are completely
4592 * ignored by state comparison code (see regsafe() for details). Only
4593 * checkpointed ("old") state precise markings are important, and if old
4594 * state's register/slot is precise, regsafe() assumes current state's
4595 * register/slot as precise and checks value ranges exactly and precisely. If
4596 * states turn out to be compatible, current state's necessary precise
4597 * markings and any required parent states' precise markings are enforced
4598 * after the fact with propagate_precision() logic, after the fact. But it's
4599 * important to realize that in this case, even after marking current state
4600 * registers/slots as precise, we immediately discard current state. So what
4601 * actually matters is any of the precise markings propagated into current
4602 * state's parent states, which are always checkpointed (due to b) case above).
4603 * As such, for scenario a) it doesn't matter if current state has precise
4604 * markings set or not.
4605 *
4606 * Now, for the scenario b), checkpointing and forking into child(ren)
4607 * state(s). Note that before current state gets to checkpointing step, any
4608 * processed instruction always assumes precise SCALAR register/slot
4609 * knowledge: if precise value or range is useful to prune jump branch, BPF
4610 * verifier takes this opportunity enthusiastically. Similarly, when
4611 * register's value is used to calculate offset or memory address, exact
4612 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4613 * what we mentioned above about state comparison ignoring precise markings
4614 * during state comparison, BPF verifier ignores and also assumes precise
4615 * markings *at will* during instruction verification process. But as verifier
4616 * assumes precision, it also propagates any precision dependencies across
4617 * parent states, which are not yet finalized, so can be further restricted
4618 * based on new knowledge gained from restrictions enforced by their children
4619 * states. This is so that once those parent states are finalized, i.e., when
4620 * they have no more active children state, state comparison logic in
4621 * is_state_visited() would enforce strict and precise SCALAR ranges, if
4622 * required for correctness.
4623 *
4624 * To build a bit more intuition, note also that once a state is checkpointed,
4625 * the path we took to get to that state is not important. This is crucial
4626 * property for state pruning. When state is checkpointed and finalized at
4627 * some instruction index, it can be correctly and safely used to "short
4628 * circuit" any *compatible* state that reaches exactly the same instruction
4629 * index. I.e., if we jumped to that instruction from a completely different
4630 * code path than original finalized state was derived from, it doesn't
4631 * matter, current state can be discarded because from that instruction
4632 * forward having a compatible state will ensure we will safely reach the
4633 * exit. States describe preconditions for further exploration, but completely
4634 * forget the history of how we got here.
4635 *
4636 * This also means that even if we needed precise SCALAR range to get to
4637 * finalized state, but from that point forward *that same* SCALAR register is
4638 * never used in a precise context (i.e., it's precise value is not needed for
4639 * correctness), it's correct and safe to mark such register as "imprecise"
4640 * (i.e., precise marking set to false). This is what we rely on when we do
4641 * not set precise marking in current state. If no child state requires
4642 * precision for any given SCALAR register, it's safe to dictate that it can
4643 * be imprecise. If any child state does require this register to be precise,
4644 * we'll mark it precise later retroactively during precise markings
4645 * propagation from child state to parent states.
4646 *
4647 * Skipping precise marking setting in current state is a mild version of
4648 * relying on the above observation. But we can utilize this property even
4649 * more aggressively by proactively forgetting any precise marking in the
4650 * current state (which we inherited from the parent state), right before we
4651 * checkpoint it and branch off into new child state. This is done by
4652 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4653 * finalized states which help in short circuiting more future states.
4654 */
__mark_chain_precision(struct bpf_verifier_env * env,int regno)4655 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
4656 {
4657 struct backtrack_state *bt = &env->bt;
4658 struct bpf_verifier_state *st = env->cur_state;
4659 int first_idx = st->first_insn_idx;
4660 int last_idx = env->insn_idx;
4661 int subseq_idx = -1;
4662 struct bpf_func_state *func;
4663 struct bpf_reg_state *reg;
4664 bool skip_first = true;
4665 int i, fr, err;
4666
4667 if (!env->bpf_capable)
4668 return 0;
4669
4670 /* set frame number from which we are starting to backtrack */
4671 bt_init(bt, env->cur_state->curframe);
4672
4673 /* Do sanity checks against current state of register and/or stack
4674 * slot, but don't set precise flag in current state, as precision
4675 * tracking in the current state is unnecessary.
4676 */
4677 func = st->frame[bt->frame];
4678 if (regno >= 0) {
4679 reg = &func->regs[regno];
4680 if (reg->type != SCALAR_VALUE) {
4681 WARN_ONCE(1, "backtracing misuse");
4682 return -EFAULT;
4683 }
4684 bt_set_reg(bt, regno);
4685 }
4686
4687 if (bt_empty(bt))
4688 return 0;
4689
4690 for (;;) {
4691 DECLARE_BITMAP(mask, 64);
4692 u32 hist_start = st->insn_hist_start;
4693 u32 hist_end = st->insn_hist_end;
4694 struct bpf_insn_hist_entry *hist;
4695
4696 if (env->log.level & BPF_LOG_LEVEL2) {
4697 verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4698 bt->frame, last_idx, first_idx, subseq_idx);
4699 }
4700
4701 if (last_idx < 0) {
4702 /* we are at the entry into subprog, which
4703 * is expected for global funcs, but only if
4704 * requested precise registers are R1-R5
4705 * (which are global func's input arguments)
4706 */
4707 if (st->curframe == 0 &&
4708 st->frame[0]->subprogno > 0 &&
4709 st->frame[0]->callsite == BPF_MAIN_FUNC &&
4710 bt_stack_mask(bt) == 0 &&
4711 (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4712 bitmap_from_u64(mask, bt_reg_mask(bt));
4713 for_each_set_bit(i, mask, 32) {
4714 reg = &st->frame[0]->regs[i];
4715 bt_clear_reg(bt, i);
4716 if (reg->type == SCALAR_VALUE)
4717 reg->precise = true;
4718 }
4719 return 0;
4720 }
4721
4722 verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4723 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4724 WARN_ONCE(1, "verifier backtracking bug");
4725 return -EFAULT;
4726 }
4727
4728 for (i = last_idx;;) {
4729 if (skip_first) {
4730 err = 0;
4731 skip_first = false;
4732 } else {
4733 hist = get_insn_hist_entry(env, hist_start, hist_end, i);
4734 err = backtrack_insn(env, i, subseq_idx, hist, bt);
4735 }
4736 if (err == -ENOTSUPP) {
4737 mark_all_scalars_precise(env, env->cur_state);
4738 bt_reset(bt);
4739 return 0;
4740 } else if (err) {
4741 return err;
4742 }
4743 if (bt_empty(bt))
4744 /* Found assignment(s) into tracked register in this state.
4745 * Since this state is already marked, just return.
4746 * Nothing to be tracked further in the parent state.
4747 */
4748 return 0;
4749 subseq_idx = i;
4750 i = get_prev_insn_idx(env, st, i, hist_start, &hist_end);
4751 if (i == -ENOENT)
4752 break;
4753 if (i >= env->prog->len) {
4754 /* This can happen if backtracking reached insn 0
4755 * and there are still reg_mask or stack_mask
4756 * to backtrack.
4757 * It means the backtracking missed the spot where
4758 * particular register was initialized with a constant.
4759 */
4760 verbose(env, "BUG backtracking idx %d\n", i);
4761 WARN_ONCE(1, "verifier backtracking bug");
4762 return -EFAULT;
4763 }
4764 }
4765 st = st->parent;
4766 if (!st)
4767 break;
4768
4769 for (fr = bt->frame; fr >= 0; fr--) {
4770 func = st->frame[fr];
4771 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4772 for_each_set_bit(i, mask, 32) {
4773 reg = &func->regs[i];
4774 if (reg->type != SCALAR_VALUE) {
4775 bt_clear_frame_reg(bt, fr, i);
4776 continue;
4777 }
4778 if (reg->precise)
4779 bt_clear_frame_reg(bt, fr, i);
4780 else
4781 reg->precise = true;
4782 }
4783
4784 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4785 for_each_set_bit(i, mask, 64) {
4786 if (i >= func->allocated_stack / BPF_REG_SIZE) {
4787 verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
4788 i, func->allocated_stack / BPF_REG_SIZE);
4789 WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
4790 return -EFAULT;
4791 }
4792
4793 if (!is_spilled_scalar_reg(&func->stack[i])) {
4794 bt_clear_frame_slot(bt, fr, i);
4795 continue;
4796 }
4797 reg = &func->stack[i].spilled_ptr;
4798 if (reg->precise)
4799 bt_clear_frame_slot(bt, fr, i);
4800 else
4801 reg->precise = true;
4802 }
4803 if (env->log.level & BPF_LOG_LEVEL2) {
4804 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4805 bt_frame_reg_mask(bt, fr));
4806 verbose(env, "mark_precise: frame%d: parent state regs=%s ",
4807 fr, env->tmp_str_buf);
4808 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4809 bt_frame_stack_mask(bt, fr));
4810 verbose(env, "stack=%s: ", env->tmp_str_buf);
4811 print_verifier_state(env, st, fr, true);
4812 }
4813 }
4814
4815 if (bt_empty(bt))
4816 return 0;
4817
4818 subseq_idx = first_idx;
4819 last_idx = st->last_insn_idx;
4820 first_idx = st->first_insn_idx;
4821 }
4822
4823 /* if we still have requested precise regs or slots, we missed
4824 * something (e.g., stack access through non-r10 register), so
4825 * fallback to marking all precise
4826 */
4827 if (!bt_empty(bt)) {
4828 mark_all_scalars_precise(env, env->cur_state);
4829 bt_reset(bt);
4830 }
4831
4832 return 0;
4833 }
4834
mark_chain_precision(struct bpf_verifier_env * env,int regno)4835 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
4836 {
4837 return __mark_chain_precision(env, regno);
4838 }
4839
4840 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4841 * desired reg and stack masks across all relevant frames
4842 */
mark_chain_precision_batch(struct bpf_verifier_env * env)4843 static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4844 {
4845 return __mark_chain_precision(env, -1);
4846 }
4847
is_spillable_regtype(enum bpf_reg_type type)4848 static bool is_spillable_regtype(enum bpf_reg_type type)
4849 {
4850 switch (base_type(type)) {
4851 case PTR_TO_MAP_VALUE:
4852 case PTR_TO_STACK:
4853 case PTR_TO_CTX:
4854 case PTR_TO_PACKET:
4855 case PTR_TO_PACKET_META:
4856 case PTR_TO_PACKET_END:
4857 case PTR_TO_FLOW_KEYS:
4858 case CONST_PTR_TO_MAP:
4859 case PTR_TO_SOCKET:
4860 case PTR_TO_SOCK_COMMON:
4861 case PTR_TO_TCP_SOCK:
4862 case PTR_TO_XDP_SOCK:
4863 case PTR_TO_BTF_ID:
4864 case PTR_TO_BUF:
4865 case PTR_TO_MEM:
4866 case PTR_TO_FUNC:
4867 case PTR_TO_MAP_KEY:
4868 case PTR_TO_ARENA:
4869 return true;
4870 default:
4871 return false;
4872 }
4873 }
4874
4875 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state * reg)4876 static bool register_is_null(struct bpf_reg_state *reg)
4877 {
4878 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
4879 }
4880
4881 /* check if register is a constant scalar value */
is_reg_const(struct bpf_reg_state * reg,bool subreg32)4882 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
4883 {
4884 return reg->type == SCALAR_VALUE &&
4885 tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
4886 }
4887
4888 /* assuming is_reg_const() is true, return constant value of a register */
reg_const_value(struct bpf_reg_state * reg,bool subreg32)4889 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
4890 {
4891 return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
4892 }
4893
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)4894 static bool __is_pointer_value(bool allow_ptr_leaks,
4895 const struct bpf_reg_state *reg)
4896 {
4897 if (allow_ptr_leaks)
4898 return false;
4899
4900 return reg->type != SCALAR_VALUE;
4901 }
4902
assign_scalar_id_before_mov(struct bpf_verifier_env * env,struct bpf_reg_state * src_reg)4903 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
4904 struct bpf_reg_state *src_reg)
4905 {
4906 if (src_reg->type != SCALAR_VALUE)
4907 return;
4908
4909 if (src_reg->id & BPF_ADD_CONST) {
4910 /*
4911 * The verifier is processing rX = rY insn and
4912 * rY->id has special linked register already.
4913 * Cleared it, since multiple rX += const are not supported.
4914 */
4915 src_reg->id = 0;
4916 src_reg->off = 0;
4917 }
4918
4919 if (!src_reg->id && !tnum_is_const(src_reg->var_off))
4920 /* Ensure that src_reg has a valid ID that will be copied to
4921 * dst_reg and then will be used by sync_linked_regs() to
4922 * propagate min/max range.
4923 */
4924 src_reg->id = ++env->id_gen;
4925 }
4926
4927 /* Copy src state preserving dst->parent and dst->live fields */
copy_register_state(struct bpf_reg_state * dst,const struct bpf_reg_state * src)4928 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
4929 {
4930 struct bpf_reg_state *parent = dst->parent;
4931 enum bpf_reg_liveness live = dst->live;
4932
4933 *dst = *src;
4934 dst->parent = parent;
4935 dst->live = live;
4936 }
4937
save_register_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi,struct bpf_reg_state * reg,int size)4938 static void save_register_state(struct bpf_verifier_env *env,
4939 struct bpf_func_state *state,
4940 int spi, struct bpf_reg_state *reg,
4941 int size)
4942 {
4943 int i;
4944
4945 copy_register_state(&state->stack[spi].spilled_ptr, reg);
4946 if (size == BPF_REG_SIZE)
4947 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4948
4949 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4950 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
4951
4952 /* size < 8 bytes spill */
4953 for (; i; i--)
4954 mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
4955 }
4956
is_bpf_st_mem(struct bpf_insn * insn)4957 static bool is_bpf_st_mem(struct bpf_insn *insn)
4958 {
4959 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4960 }
4961
get_reg_width(struct bpf_reg_state * reg)4962 static int get_reg_width(struct bpf_reg_state *reg)
4963 {
4964 return fls64(reg->umax_value);
4965 }
4966
4967 /* See comment for mark_fastcall_pattern_for_call() */
check_fastcall_stack_contract(struct bpf_verifier_env * env,struct bpf_func_state * state,int insn_idx,int off)4968 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
4969 struct bpf_func_state *state, int insn_idx, int off)
4970 {
4971 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4972 struct bpf_insn_aux_data *aux = env->insn_aux_data;
4973 int i;
4974
4975 if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
4976 return;
4977 /* access to the region [max_stack_depth .. fastcall_stack_off)
4978 * from something that is not a part of the fastcall pattern,
4979 * disable fastcall rewrites for current subprogram by setting
4980 * fastcall_stack_off to a value smaller than any possible offset.
4981 */
4982 subprog->fastcall_stack_off = S16_MIN;
4983 /* reset fastcall aux flags within subprogram,
4984 * happens at most once per subprogram
4985 */
4986 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
4987 aux[i].fastcall_spills_num = 0;
4988 aux[i].fastcall_pattern = 0;
4989 }
4990 }
4991
4992 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4993 * stack boundary and alignment are checked in check_mem_access()
4994 */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)4995 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4996 /* stack frame we're writing to */
4997 struct bpf_func_state *state,
4998 int off, int size, int value_regno,
4999 int insn_idx)
5000 {
5001 struct bpf_func_state *cur; /* state of the current function */
5002 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
5003 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5004 struct bpf_reg_state *reg = NULL;
5005 int insn_flags = insn_stack_access_flags(state->frameno, spi);
5006
5007 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
5008 * so it's aligned access and [off, off + size) are within stack limits
5009 */
5010 if (!env->allow_ptr_leaks &&
5011 is_spilled_reg(&state->stack[spi]) &&
5012 !is_spilled_scalar_reg(&state->stack[spi]) &&
5013 size != BPF_REG_SIZE) {
5014 verbose(env, "attempt to corrupt spilled pointer on stack\n");
5015 return -EACCES;
5016 }
5017
5018 cur = env->cur_state->frame[env->cur_state->curframe];
5019 if (value_regno >= 0)
5020 reg = &cur->regs[value_regno];
5021 if (!env->bypass_spec_v4) {
5022 bool sanitize = reg && is_spillable_regtype(reg->type);
5023
5024 for (i = 0; i < size; i++) {
5025 u8 type = state->stack[spi].slot_type[i];
5026
5027 if (type != STACK_MISC && type != STACK_ZERO) {
5028 sanitize = true;
5029 break;
5030 }
5031 }
5032
5033 if (sanitize)
5034 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
5035 }
5036
5037 err = destroy_if_dynptr_stack_slot(env, state, spi);
5038 if (err)
5039 return err;
5040
5041 check_fastcall_stack_contract(env, state, insn_idx, off);
5042 mark_stack_slot_scratched(env, spi);
5043 if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
5044 bool reg_value_fits;
5045
5046 reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
5047 /* Make sure that reg had an ID to build a relation on spill. */
5048 if (reg_value_fits)
5049 assign_scalar_id_before_mov(env, reg);
5050 save_register_state(env, state, spi, reg, size);
5051 /* Break the relation on a narrowing spill. */
5052 if (!reg_value_fits)
5053 state->stack[spi].spilled_ptr.id = 0;
5054 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
5055 env->bpf_capable) {
5056 struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
5057
5058 memset(tmp_reg, 0, sizeof(*tmp_reg));
5059 __mark_reg_known(tmp_reg, insn->imm);
5060 tmp_reg->type = SCALAR_VALUE;
5061 save_register_state(env, state, spi, tmp_reg, size);
5062 } else if (reg && is_spillable_regtype(reg->type)) {
5063 /* register containing pointer is being spilled into stack */
5064 if (size != BPF_REG_SIZE) {
5065 verbose_linfo(env, insn_idx, "; ");
5066 verbose(env, "invalid size of register spill\n");
5067 return -EACCES;
5068 }
5069 if (state != cur && reg->type == PTR_TO_STACK) {
5070 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
5071 return -EINVAL;
5072 }
5073 save_register_state(env, state, spi, reg, size);
5074 } else {
5075 u8 type = STACK_MISC;
5076
5077 /* regular write of data into stack destroys any spilled ptr */
5078 state->stack[spi].spilled_ptr.type = NOT_INIT;
5079 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
5080 if (is_stack_slot_special(&state->stack[spi]))
5081 for (i = 0; i < BPF_REG_SIZE; i++)
5082 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
5083
5084 /* only mark the slot as written if all 8 bytes were written
5085 * otherwise read propagation may incorrectly stop too soon
5086 * when stack slots are partially written.
5087 * This heuristic means that read propagation will be
5088 * conservative, since it will add reg_live_read marks
5089 * to stack slots all the way to first state when programs
5090 * writes+reads less than 8 bytes
5091 */
5092 if (size == BPF_REG_SIZE)
5093 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
5094
5095 /* when we zero initialize stack slots mark them as such */
5096 if ((reg && register_is_null(reg)) ||
5097 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
5098 /* STACK_ZERO case happened because register spill
5099 * wasn't properly aligned at the stack slot boundary,
5100 * so it's not a register spill anymore; force
5101 * originating register to be precise to make
5102 * STACK_ZERO correct for subsequent states
5103 */
5104 err = mark_chain_precision(env, value_regno);
5105 if (err)
5106 return err;
5107 type = STACK_ZERO;
5108 }
5109
5110 /* Mark slots affected by this stack write. */
5111 for (i = 0; i < size; i++)
5112 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
5113 insn_flags = 0; /* not a register spill */
5114 }
5115
5116 if (insn_flags)
5117 return push_insn_history(env, env->cur_state, insn_flags, 0);
5118 return 0;
5119 }
5120
5121 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
5122 * known to contain a variable offset.
5123 * This function checks whether the write is permitted and conservatively
5124 * tracks the effects of the write, considering that each stack slot in the
5125 * dynamic range is potentially written to.
5126 *
5127 * 'off' includes 'regno->off'.
5128 * 'value_regno' can be -1, meaning that an unknown value is being written to
5129 * the stack.
5130 *
5131 * Spilled pointers in range are not marked as written because we don't know
5132 * what's going to be actually written. This means that read propagation for
5133 * future reads cannot be terminated by this write.
5134 *
5135 * For privileged programs, uninitialized stack slots are considered
5136 * initialized by this write (even though we don't know exactly what offsets
5137 * are going to be written to). The idea is that we don't want the verifier to
5138 * reject future reads that access slots written to through variable offsets.
5139 */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)5140 static int check_stack_write_var_off(struct bpf_verifier_env *env,
5141 /* func where register points to */
5142 struct bpf_func_state *state,
5143 int ptr_regno, int off, int size,
5144 int value_regno, int insn_idx)
5145 {
5146 struct bpf_func_state *cur; /* state of the current function */
5147 int min_off, max_off;
5148 int i, err;
5149 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
5150 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5151 bool writing_zero = false;
5152 /* set if the fact that we're writing a zero is used to let any
5153 * stack slots remain STACK_ZERO
5154 */
5155 bool zero_used = false;
5156
5157 cur = env->cur_state->frame[env->cur_state->curframe];
5158 ptr_reg = &cur->regs[ptr_regno];
5159 min_off = ptr_reg->smin_value + off;
5160 max_off = ptr_reg->smax_value + off + size;
5161 if (value_regno >= 0)
5162 value_reg = &cur->regs[value_regno];
5163 if ((value_reg && register_is_null(value_reg)) ||
5164 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
5165 writing_zero = true;
5166
5167 for (i = min_off; i < max_off; i++) {
5168 int spi;
5169
5170 spi = __get_spi(i);
5171 err = destroy_if_dynptr_stack_slot(env, state, spi);
5172 if (err)
5173 return err;
5174 }
5175
5176 check_fastcall_stack_contract(env, state, insn_idx, min_off);
5177 /* Variable offset writes destroy any spilled pointers in range. */
5178 for (i = min_off; i < max_off; i++) {
5179 u8 new_type, *stype;
5180 int slot, spi;
5181
5182 slot = -i - 1;
5183 spi = slot / BPF_REG_SIZE;
5184 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5185 mark_stack_slot_scratched(env, spi);
5186
5187 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
5188 /* Reject the write if range we may write to has not
5189 * been initialized beforehand. If we didn't reject
5190 * here, the ptr status would be erased below (even
5191 * though not all slots are actually overwritten),
5192 * possibly opening the door to leaks.
5193 *
5194 * We do however catch STACK_INVALID case below, and
5195 * only allow reading possibly uninitialized memory
5196 * later for CAP_PERFMON, as the write may not happen to
5197 * that slot.
5198 */
5199 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
5200 insn_idx, i);
5201 return -EINVAL;
5202 }
5203
5204 /* If writing_zero and the spi slot contains a spill of value 0,
5205 * maintain the spill type.
5206 */
5207 if (writing_zero && *stype == STACK_SPILL &&
5208 is_spilled_scalar_reg(&state->stack[spi])) {
5209 struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
5210
5211 if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
5212 zero_used = true;
5213 continue;
5214 }
5215 }
5216
5217 /* Erase all other spilled pointers. */
5218 state->stack[spi].spilled_ptr.type = NOT_INIT;
5219
5220 /* Update the slot type. */
5221 new_type = STACK_MISC;
5222 if (writing_zero && *stype == STACK_ZERO) {
5223 new_type = STACK_ZERO;
5224 zero_used = true;
5225 }
5226 /* If the slot is STACK_INVALID, we check whether it's OK to
5227 * pretend that it will be initialized by this write. The slot
5228 * might not actually be written to, and so if we mark it as
5229 * initialized future reads might leak uninitialized memory.
5230 * For privileged programs, we will accept such reads to slots
5231 * that may or may not be written because, if we're reject
5232 * them, the error would be too confusing.
5233 */
5234 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
5235 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
5236 insn_idx, i);
5237 return -EINVAL;
5238 }
5239 *stype = new_type;
5240 }
5241 if (zero_used) {
5242 /* backtracking doesn't work for STACK_ZERO yet. */
5243 err = mark_chain_precision(env, value_regno);
5244 if (err)
5245 return err;
5246 }
5247 return 0;
5248 }
5249
5250 /* When register 'dst_regno' is assigned some values from stack[min_off,
5251 * max_off), we set the register's type according to the types of the
5252 * respective stack slots. If all the stack values are known to be zeros, then
5253 * so is the destination reg. Otherwise, the register is considered to be
5254 * SCALAR. This function does not deal with register filling; the caller must
5255 * ensure that all spilled registers in the stack range have been marked as
5256 * read.
5257 */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)5258 static void mark_reg_stack_read(struct bpf_verifier_env *env,
5259 /* func where src register points to */
5260 struct bpf_func_state *ptr_state,
5261 int min_off, int max_off, int dst_regno)
5262 {
5263 struct bpf_verifier_state *vstate = env->cur_state;
5264 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5265 int i, slot, spi;
5266 u8 *stype;
5267 int zeros = 0;
5268
5269 for (i = min_off; i < max_off; i++) {
5270 slot = -i - 1;
5271 spi = slot / BPF_REG_SIZE;
5272 mark_stack_slot_scratched(env, spi);
5273 stype = ptr_state->stack[spi].slot_type;
5274 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
5275 break;
5276 zeros++;
5277 }
5278 if (zeros == max_off - min_off) {
5279 /* Any access_size read into register is zero extended,
5280 * so the whole register == const_zero.
5281 */
5282 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5283 } else {
5284 /* have read misc data from the stack */
5285 mark_reg_unknown(env, state->regs, dst_regno);
5286 }
5287 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5288 }
5289
5290 /* Read the stack at 'off' and put the results into the register indicated by
5291 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
5292 * spilled reg.
5293 *
5294 * 'dst_regno' can be -1, meaning that the read value is not going to a
5295 * register.
5296 *
5297 * The access is assumed to be within the current stack bounds.
5298 */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)5299 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
5300 /* func where src register points to */
5301 struct bpf_func_state *reg_state,
5302 int off, int size, int dst_regno)
5303 {
5304 struct bpf_verifier_state *vstate = env->cur_state;
5305 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5306 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
5307 struct bpf_reg_state *reg;
5308 u8 *stype, type;
5309 int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
5310
5311 stype = reg_state->stack[spi].slot_type;
5312 reg = ®_state->stack[spi].spilled_ptr;
5313
5314 mark_stack_slot_scratched(env, spi);
5315 check_fastcall_stack_contract(env, state, env->insn_idx, off);
5316
5317 if (is_spilled_reg(®_state->stack[spi])) {
5318 u8 spill_size = 1;
5319
5320 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
5321 spill_size++;
5322
5323 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
5324 if (reg->type != SCALAR_VALUE) {
5325 verbose_linfo(env, env->insn_idx, "; ");
5326 verbose(env, "invalid size of register fill\n");
5327 return -EACCES;
5328 }
5329
5330 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5331 if (dst_regno < 0)
5332 return 0;
5333
5334 if (size <= spill_size &&
5335 bpf_stack_narrow_access_ok(off, size, spill_size)) {
5336 /* The earlier check_reg_arg() has decided the
5337 * subreg_def for this insn. Save it first.
5338 */
5339 s32 subreg_def = state->regs[dst_regno].subreg_def;
5340
5341 copy_register_state(&state->regs[dst_regno], reg);
5342 state->regs[dst_regno].subreg_def = subreg_def;
5343
5344 /* Break the relation on a narrowing fill.
5345 * coerce_reg_to_size will adjust the boundaries.
5346 */
5347 if (get_reg_width(reg) > size * BITS_PER_BYTE)
5348 state->regs[dst_regno].id = 0;
5349 } else {
5350 int spill_cnt = 0, zero_cnt = 0;
5351
5352 for (i = 0; i < size; i++) {
5353 type = stype[(slot - i) % BPF_REG_SIZE];
5354 if (type == STACK_SPILL) {
5355 spill_cnt++;
5356 continue;
5357 }
5358 if (type == STACK_MISC)
5359 continue;
5360 if (type == STACK_ZERO) {
5361 zero_cnt++;
5362 continue;
5363 }
5364 if (type == STACK_INVALID && env->allow_uninit_stack)
5365 continue;
5366 verbose(env, "invalid read from stack off %d+%d size %d\n",
5367 off, i, size);
5368 return -EACCES;
5369 }
5370
5371 if (spill_cnt == size &&
5372 tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
5373 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5374 /* this IS register fill, so keep insn_flags */
5375 } else if (zero_cnt == size) {
5376 /* similarly to mark_reg_stack_read(), preserve zeroes */
5377 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5378 insn_flags = 0; /* not restoring original register state */
5379 } else {
5380 mark_reg_unknown(env, state->regs, dst_regno);
5381 insn_flags = 0; /* not restoring original register state */
5382 }
5383 }
5384 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5385 } else if (dst_regno >= 0) {
5386 /* restore register state from stack */
5387 copy_register_state(&state->regs[dst_regno], reg);
5388 /* mark reg as written since spilled pointer state likely
5389 * has its liveness marks cleared by is_state_visited()
5390 * which resets stack/reg liveness for state transitions
5391 */
5392 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
5393 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
5394 /* If dst_regno==-1, the caller is asking us whether
5395 * it is acceptable to use this value as a SCALAR_VALUE
5396 * (e.g. for XADD).
5397 * We must not allow unprivileged callers to do that
5398 * with spilled pointers.
5399 */
5400 verbose(env, "leaking pointer from stack off %d\n",
5401 off);
5402 return -EACCES;
5403 }
5404 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5405 } else {
5406 for (i = 0; i < size; i++) {
5407 type = stype[(slot - i) % BPF_REG_SIZE];
5408 if (type == STACK_MISC)
5409 continue;
5410 if (type == STACK_ZERO)
5411 continue;
5412 if (type == STACK_INVALID && env->allow_uninit_stack)
5413 continue;
5414 verbose(env, "invalid read from stack off %d+%d size %d\n",
5415 off, i, size);
5416 return -EACCES;
5417 }
5418 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
5419 if (dst_regno >= 0)
5420 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
5421 insn_flags = 0; /* we are not restoring spilled register */
5422 }
5423 if (insn_flags)
5424 return push_insn_history(env, env->cur_state, insn_flags, 0);
5425 return 0;
5426 }
5427
5428 enum bpf_access_src {
5429 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
5430 ACCESS_HELPER = 2, /* the access is performed by a helper */
5431 };
5432
5433 static int check_stack_range_initialized(struct bpf_verifier_env *env,
5434 int regno, int off, int access_size,
5435 bool zero_size_allowed,
5436 enum bpf_access_type type,
5437 struct bpf_call_arg_meta *meta);
5438
reg_state(struct bpf_verifier_env * env,int regno)5439 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
5440 {
5441 return cur_regs(env) + regno;
5442 }
5443
5444 /* Read the stack at 'ptr_regno + off' and put the result into the register
5445 * 'dst_regno'.
5446 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
5447 * but not its variable offset.
5448 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
5449 *
5450 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
5451 * filling registers (i.e. reads of spilled register cannot be detected when
5452 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
5453 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
5454 * offset; for a fixed offset check_stack_read_fixed_off should be used
5455 * instead.
5456 */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5457 static int check_stack_read_var_off(struct bpf_verifier_env *env,
5458 int ptr_regno, int off, int size, int dst_regno)
5459 {
5460 /* The state of the source register. */
5461 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5462 struct bpf_func_state *ptr_state = func(env, reg);
5463 int err;
5464 int min_off, max_off;
5465
5466 /* Note that we pass a NULL meta, so raw access will not be permitted.
5467 */
5468 err = check_stack_range_initialized(env, ptr_regno, off, size,
5469 false, BPF_READ, NULL);
5470 if (err)
5471 return err;
5472
5473 min_off = reg->smin_value + off;
5474 max_off = reg->smax_value + off;
5475 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
5476 check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
5477 return 0;
5478 }
5479
5480 /* check_stack_read dispatches to check_stack_read_fixed_off or
5481 * check_stack_read_var_off.
5482 *
5483 * The caller must ensure that the offset falls within the allocated stack
5484 * bounds.
5485 *
5486 * 'dst_regno' is a register which will receive the value from the stack. It
5487 * can be -1, meaning that the read value is not going to a register.
5488 */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5489 static int check_stack_read(struct bpf_verifier_env *env,
5490 int ptr_regno, int off, int size,
5491 int dst_regno)
5492 {
5493 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5494 struct bpf_func_state *state = func(env, reg);
5495 int err;
5496 /* Some accesses are only permitted with a static offset. */
5497 bool var_off = !tnum_is_const(reg->var_off);
5498
5499 /* The offset is required to be static when reads don't go to a
5500 * register, in order to not leak pointers (see
5501 * check_stack_read_fixed_off).
5502 */
5503 if (dst_regno < 0 && var_off) {
5504 char tn_buf[48];
5505
5506 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5507 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5508 tn_buf, off, size);
5509 return -EACCES;
5510 }
5511 /* Variable offset is prohibited for unprivileged mode for simplicity
5512 * since it requires corresponding support in Spectre masking for stack
5513 * ALU. See also retrieve_ptr_limit(). The check in
5514 * check_stack_access_for_ptr_arithmetic() called by
5515 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5516 * with variable offsets, therefore no check is required here. Further,
5517 * just checking it here would be insufficient as speculative stack
5518 * writes could still lead to unsafe speculative behaviour.
5519 */
5520 if (!var_off) {
5521 off += reg->var_off.value;
5522 err = check_stack_read_fixed_off(env, state, off, size,
5523 dst_regno);
5524 } else {
5525 /* Variable offset stack reads need more conservative handling
5526 * than fixed offset ones. Note that dst_regno >= 0 on this
5527 * branch.
5528 */
5529 err = check_stack_read_var_off(env, ptr_regno, off, size,
5530 dst_regno);
5531 }
5532 return err;
5533 }
5534
5535
5536 /* check_stack_write dispatches to check_stack_write_fixed_off or
5537 * check_stack_write_var_off.
5538 *
5539 * 'ptr_regno' is the register used as a pointer into the stack.
5540 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5541 * 'value_regno' is the register whose value we're writing to the stack. It can
5542 * be -1, meaning that we're not writing from a register.
5543 *
5544 * The caller must ensure that the offset falls within the maximum stack size.
5545 */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)5546 static int check_stack_write(struct bpf_verifier_env *env,
5547 int ptr_regno, int off, int size,
5548 int value_regno, int insn_idx)
5549 {
5550 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5551 struct bpf_func_state *state = func(env, reg);
5552 int err;
5553
5554 if (tnum_is_const(reg->var_off)) {
5555 off += reg->var_off.value;
5556 err = check_stack_write_fixed_off(env, state, off, size,
5557 value_regno, insn_idx);
5558 } else {
5559 /* Variable offset stack reads need more conservative handling
5560 * than fixed offset ones.
5561 */
5562 err = check_stack_write_var_off(env, state,
5563 ptr_regno, off, size,
5564 value_regno, insn_idx);
5565 }
5566 return err;
5567 }
5568
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)5569 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5570 int off, int size, enum bpf_access_type type)
5571 {
5572 struct bpf_reg_state *regs = cur_regs(env);
5573 struct bpf_map *map = regs[regno].map_ptr;
5574 u32 cap = bpf_map_flags_to_cap(map);
5575
5576 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5577 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
5578 map->value_size, off, size);
5579 return -EACCES;
5580 }
5581
5582 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5583 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
5584 map->value_size, off, size);
5585 return -EACCES;
5586 }
5587
5588 return 0;
5589 }
5590
5591 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)5592 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
5593 int off, int size, u32 mem_size,
5594 bool zero_size_allowed)
5595 {
5596 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
5597 struct bpf_reg_state *reg;
5598
5599 if (off >= 0 && size_ok && (u64)off + size <= mem_size)
5600 return 0;
5601
5602 reg = &cur_regs(env)[regno];
5603 switch (reg->type) {
5604 case PTR_TO_MAP_KEY:
5605 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
5606 mem_size, off, size);
5607 break;
5608 case PTR_TO_MAP_VALUE:
5609 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
5610 mem_size, off, size);
5611 break;
5612 case PTR_TO_PACKET:
5613 case PTR_TO_PACKET_META:
5614 case PTR_TO_PACKET_END:
5615 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5616 off, size, regno, reg->id, off, mem_size);
5617 break;
5618 case PTR_TO_MEM:
5619 default:
5620 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5621 mem_size, off, size);
5622 }
5623
5624 return -EACCES;
5625 }
5626
5627 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)5628 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5629 int off, int size, u32 mem_size,
5630 bool zero_size_allowed)
5631 {
5632 struct bpf_verifier_state *vstate = env->cur_state;
5633 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5634 struct bpf_reg_state *reg = &state->regs[regno];
5635 int err;
5636
5637 /* We may have adjusted the register pointing to memory region, so we
5638 * need to try adding each of min_value and max_value to off
5639 * to make sure our theoretical access will be safe.
5640 *
5641 * The minimum value is only important with signed
5642 * comparisons where we can't assume the floor of a
5643 * value is 0. If we are using signed variables for our
5644 * index'es we need to make sure that whatever we use
5645 * will have a set floor within our range.
5646 */
5647 if (reg->smin_value < 0 &&
5648 (reg->smin_value == S64_MIN ||
5649 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
5650 reg->smin_value + off < 0)) {
5651 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5652 regno);
5653 return -EACCES;
5654 }
5655 err = __check_mem_access(env, regno, reg->smin_value + off, size,
5656 mem_size, zero_size_allowed);
5657 if (err) {
5658 verbose(env, "R%d min value is outside of the allowed memory range\n",
5659 regno);
5660 return err;
5661 }
5662
5663 /* If we haven't set a max value then we need to bail since we can't be
5664 * sure we won't do bad things.
5665 * If reg->umax_value + off could overflow, treat that as unbounded too.
5666 */
5667 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5668 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
5669 regno);
5670 return -EACCES;
5671 }
5672 err = __check_mem_access(env, regno, reg->umax_value + off, size,
5673 mem_size, zero_size_allowed);
5674 if (err) {
5675 verbose(env, "R%d max value is outside of the allowed memory range\n",
5676 regno);
5677 return err;
5678 }
5679
5680 return 0;
5681 }
5682
__check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,bool fixed_off_ok)5683 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5684 const struct bpf_reg_state *reg, int regno,
5685 bool fixed_off_ok)
5686 {
5687 /* Access to this pointer-typed register or passing it to a helper
5688 * is only allowed in its original, unmodified form.
5689 */
5690
5691 if (reg->off < 0) {
5692 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
5693 reg_type_str(env, reg->type), regno, reg->off);
5694 return -EACCES;
5695 }
5696
5697 if (!fixed_off_ok && reg->off) {
5698 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
5699 reg_type_str(env, reg->type), regno, reg->off);
5700 return -EACCES;
5701 }
5702
5703 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5704 char tn_buf[48];
5705
5706 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5707 verbose(env, "variable %s access var_off=%s disallowed\n",
5708 reg_type_str(env, reg->type), tn_buf);
5709 return -EACCES;
5710 }
5711
5712 return 0;
5713 }
5714
check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)5715 static int check_ptr_off_reg(struct bpf_verifier_env *env,
5716 const struct bpf_reg_state *reg, int regno)
5717 {
5718 return __check_ptr_off_reg(env, reg, regno, false);
5719 }
5720
map_kptr_match_type(struct bpf_verifier_env * env,struct btf_field * kptr_field,struct bpf_reg_state * reg,u32 regno)5721 static int map_kptr_match_type(struct bpf_verifier_env *env,
5722 struct btf_field *kptr_field,
5723 struct bpf_reg_state *reg, u32 regno)
5724 {
5725 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
5726 int perm_flags;
5727 const char *reg_name = "";
5728
5729 if (btf_is_kernel(reg->btf)) {
5730 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
5731
5732 /* Only unreferenced case accepts untrusted pointers */
5733 if (kptr_field->type == BPF_KPTR_UNREF)
5734 perm_flags |= PTR_UNTRUSTED;
5735 } else {
5736 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5737 if (kptr_field->type == BPF_KPTR_PERCPU)
5738 perm_flags |= MEM_PERCPU;
5739 }
5740
5741 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
5742 goto bad_type;
5743
5744 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
5745 reg_name = btf_type_name(reg->btf, reg->btf_id);
5746
5747 /* For ref_ptr case, release function check should ensure we get one
5748 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5749 * normal store of unreferenced kptr, we must ensure var_off is zero.
5750 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5751 * reg->off and reg->ref_obj_id are not needed here.
5752 */
5753 if (__check_ptr_off_reg(env, reg, regno, true))
5754 return -EACCES;
5755
5756 /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5757 * we also need to take into account the reg->off.
5758 *
5759 * We want to support cases like:
5760 *
5761 * struct foo {
5762 * struct bar br;
5763 * struct baz bz;
5764 * };
5765 *
5766 * struct foo *v;
5767 * v = func(); // PTR_TO_BTF_ID
5768 * val->foo = v; // reg->off is zero, btf and btf_id match type
5769 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5770 * // first member type of struct after comparison fails
5771 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5772 * // to match type
5773 *
5774 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5775 * is zero. We must also ensure that btf_struct_ids_match does not walk
5776 * the struct to match type against first member of struct, i.e. reject
5777 * second case from above. Hence, when type is BPF_KPTR_REF, we set
5778 * strict mode to true for type match.
5779 */
5780 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5781 kptr_field->kptr.btf, kptr_field->kptr.btf_id,
5782 kptr_field->type != BPF_KPTR_UNREF))
5783 goto bad_type;
5784 return 0;
5785 bad_type:
5786 verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
5787 reg_type_str(env, reg->type), reg_name);
5788 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
5789 if (kptr_field->type == BPF_KPTR_UNREF)
5790 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
5791 targ_name);
5792 else
5793 verbose(env, "\n");
5794 return -EINVAL;
5795 }
5796
in_sleepable(struct bpf_verifier_env * env)5797 static bool in_sleepable(struct bpf_verifier_env *env)
5798 {
5799 return env->prog->sleepable ||
5800 (env->cur_state && env->cur_state->in_sleepable);
5801 }
5802
5803 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5804 * can dereference RCU protected pointers and result is PTR_TRUSTED.
5805 */
in_rcu_cs(struct bpf_verifier_env * env)5806 static bool in_rcu_cs(struct bpf_verifier_env *env)
5807 {
5808 return env->cur_state->active_rcu_lock ||
5809 env->cur_state->active_locks ||
5810 !in_sleepable(env);
5811 }
5812
5813 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5814 BTF_SET_START(rcu_protected_types)
5815 #ifdef CONFIG_NET
BTF_ID(struct,prog_test_ref_kfunc)5816 BTF_ID(struct, prog_test_ref_kfunc)
5817 #endif
5818 #ifdef CONFIG_CGROUPS
5819 BTF_ID(struct, cgroup)
5820 #endif
5821 #ifdef CONFIG_BPF_JIT
5822 BTF_ID(struct, bpf_cpumask)
5823 #endif
5824 BTF_ID(struct, task_struct)
5825 #ifdef CONFIG_CRYPTO
5826 BTF_ID(struct, bpf_crypto_ctx)
5827 #endif
5828 BTF_SET_END(rcu_protected_types)
5829
5830 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5831 {
5832 if (!btf_is_kernel(btf))
5833 return true;
5834 return btf_id_set_contains(&rcu_protected_types, btf_id);
5835 }
5836
kptr_pointee_btf_record(struct btf_field * kptr_field)5837 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
5838 {
5839 struct btf_struct_meta *meta;
5840
5841 if (btf_is_kernel(kptr_field->kptr.btf))
5842 return NULL;
5843
5844 meta = btf_find_struct_meta(kptr_field->kptr.btf,
5845 kptr_field->kptr.btf_id);
5846
5847 return meta ? meta->record : NULL;
5848 }
5849
rcu_safe_kptr(const struct btf_field * field)5850 static bool rcu_safe_kptr(const struct btf_field *field)
5851 {
5852 const struct btf_field_kptr *kptr = &field->kptr;
5853
5854 return field->type == BPF_KPTR_PERCPU ||
5855 (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
5856 }
5857
btf_ld_kptr_type(struct bpf_verifier_env * env,struct btf_field * kptr_field)5858 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
5859 {
5860 struct btf_record *rec;
5861 u32 ret;
5862
5863 ret = PTR_MAYBE_NULL;
5864 if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
5865 ret |= MEM_RCU;
5866 if (kptr_field->type == BPF_KPTR_PERCPU)
5867 ret |= MEM_PERCPU;
5868 else if (!btf_is_kernel(kptr_field->kptr.btf))
5869 ret |= MEM_ALLOC;
5870
5871 rec = kptr_pointee_btf_record(kptr_field);
5872 if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
5873 ret |= NON_OWN_REF;
5874 } else {
5875 ret |= PTR_UNTRUSTED;
5876 }
5877
5878 return ret;
5879 }
5880
mark_uptr_ld_reg(struct bpf_verifier_env * env,u32 regno,struct btf_field * field)5881 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
5882 struct btf_field *field)
5883 {
5884 struct bpf_reg_state *reg;
5885 const struct btf_type *t;
5886
5887 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
5888 mark_reg_known_zero(env, cur_regs(env), regno);
5889 reg = reg_state(env, regno);
5890 reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
5891 reg->mem_size = t->size;
5892 reg->id = ++env->id_gen;
5893
5894 return 0;
5895 }
5896
check_map_kptr_access(struct bpf_verifier_env * env,u32 regno,int value_regno,int insn_idx,struct btf_field * kptr_field)5897 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5898 int value_regno, int insn_idx,
5899 struct btf_field *kptr_field)
5900 {
5901 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5902 int class = BPF_CLASS(insn->code);
5903 struct bpf_reg_state *val_reg;
5904
5905 /* Things we already checked for in check_map_access and caller:
5906 * - Reject cases where variable offset may touch kptr
5907 * - size of access (must be BPF_DW)
5908 * - tnum_is_const(reg->var_off)
5909 * - kptr_field->offset == off + reg->var_off.value
5910 */
5911 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5912 if (BPF_MODE(insn->code) != BPF_MEM) {
5913 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5914 return -EACCES;
5915 }
5916
5917 /* We only allow loading referenced kptr, since it will be marked as
5918 * untrusted, similar to unreferenced kptr.
5919 */
5920 if (class != BPF_LDX &&
5921 (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
5922 verbose(env, "store to referenced kptr disallowed\n");
5923 return -EACCES;
5924 }
5925 if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
5926 verbose(env, "store to uptr disallowed\n");
5927 return -EACCES;
5928 }
5929
5930 if (class == BPF_LDX) {
5931 if (kptr_field->type == BPF_UPTR)
5932 return mark_uptr_ld_reg(env, value_regno, kptr_field);
5933
5934 /* We can simply mark the value_regno receiving the pointer
5935 * value from map as PTR_TO_BTF_ID, with the correct type.
5936 */
5937 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
5938 kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
5939 } else if (class == BPF_STX) {
5940 val_reg = reg_state(env, value_regno);
5941 if (!register_is_null(val_reg) &&
5942 map_kptr_match_type(env, kptr_field, val_reg, value_regno))
5943 return -EACCES;
5944 } else if (class == BPF_ST) {
5945 if (insn->imm) {
5946 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5947 kptr_field->offset);
5948 return -EACCES;
5949 }
5950 } else {
5951 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5952 return -EACCES;
5953 }
5954 return 0;
5955 }
5956
5957 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed,enum bpf_access_src src)5958 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5959 int off, int size, bool zero_size_allowed,
5960 enum bpf_access_src src)
5961 {
5962 struct bpf_verifier_state *vstate = env->cur_state;
5963 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5964 struct bpf_reg_state *reg = &state->regs[regno];
5965 struct bpf_map *map = reg->map_ptr;
5966 struct btf_record *rec;
5967 int err, i;
5968
5969 err = check_mem_region_access(env, regno, off, size, map->value_size,
5970 zero_size_allowed);
5971 if (err)
5972 return err;
5973
5974 if (IS_ERR_OR_NULL(map->record))
5975 return 0;
5976 rec = map->record;
5977 for (i = 0; i < rec->cnt; i++) {
5978 struct btf_field *field = &rec->fields[i];
5979 u32 p = field->offset;
5980
5981 /* If any part of a field can be touched by load/store, reject
5982 * this program. To check that [x1, x2) overlaps with [y1, y2),
5983 * it is sufficient to check x1 < y2 && y1 < x2.
5984 */
5985 if (reg->smin_value + off < p + field->size &&
5986 p < reg->umax_value + off + size) {
5987 switch (field->type) {
5988 case BPF_KPTR_UNREF:
5989 case BPF_KPTR_REF:
5990 case BPF_KPTR_PERCPU:
5991 case BPF_UPTR:
5992 if (src != ACCESS_DIRECT) {
5993 verbose(env, "%s cannot be accessed indirectly by helper\n",
5994 btf_field_type_name(field->type));
5995 return -EACCES;
5996 }
5997 if (!tnum_is_const(reg->var_off)) {
5998 verbose(env, "%s access cannot have variable offset\n",
5999 btf_field_type_name(field->type));
6000 return -EACCES;
6001 }
6002 if (p != off + reg->var_off.value) {
6003 verbose(env, "%s access misaligned expected=%u off=%llu\n",
6004 btf_field_type_name(field->type),
6005 p, off + reg->var_off.value);
6006 return -EACCES;
6007 }
6008 if (size != bpf_size_to_bytes(BPF_DW)) {
6009 verbose(env, "%s access size must be BPF_DW\n",
6010 btf_field_type_name(field->type));
6011 return -EACCES;
6012 }
6013 break;
6014 default:
6015 verbose(env, "%s cannot be accessed directly by load/store\n",
6016 btf_field_type_name(field->type));
6017 return -EACCES;
6018 }
6019 }
6020 }
6021 return 0;
6022 }
6023
6024 #define MAX_PACKET_OFF 0xffff
6025
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)6026 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
6027 const struct bpf_call_arg_meta *meta,
6028 enum bpf_access_type t)
6029 {
6030 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
6031
6032 switch (prog_type) {
6033 /* Program types only with direct read access go here! */
6034 case BPF_PROG_TYPE_LWT_IN:
6035 case BPF_PROG_TYPE_LWT_OUT:
6036 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
6037 case BPF_PROG_TYPE_SK_REUSEPORT:
6038 case BPF_PROG_TYPE_FLOW_DISSECTOR:
6039 case BPF_PROG_TYPE_CGROUP_SKB:
6040 if (t == BPF_WRITE)
6041 return false;
6042 fallthrough;
6043
6044 /* Program types with direct read + write access go here! */
6045 case BPF_PROG_TYPE_SCHED_CLS:
6046 case BPF_PROG_TYPE_SCHED_ACT:
6047 case BPF_PROG_TYPE_XDP:
6048 case BPF_PROG_TYPE_LWT_XMIT:
6049 case BPF_PROG_TYPE_SK_SKB:
6050 case BPF_PROG_TYPE_SK_MSG:
6051 if (meta)
6052 return meta->pkt_access;
6053
6054 env->seen_direct_write = true;
6055 return true;
6056
6057 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
6058 if (t == BPF_WRITE)
6059 env->seen_direct_write = true;
6060
6061 return true;
6062
6063 default:
6064 return false;
6065 }
6066 }
6067
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)6068 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
6069 int size, bool zero_size_allowed)
6070 {
6071 struct bpf_reg_state *regs = cur_regs(env);
6072 struct bpf_reg_state *reg = ®s[regno];
6073 int err;
6074
6075 /* We may have added a variable offset to the packet pointer; but any
6076 * reg->range we have comes after that. We are only checking the fixed
6077 * offset.
6078 */
6079
6080 /* We don't allow negative numbers, because we aren't tracking enough
6081 * detail to prove they're safe.
6082 */
6083 if (reg->smin_value < 0) {
6084 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
6085 regno);
6086 return -EACCES;
6087 }
6088
6089 err = reg->range < 0 ? -EINVAL :
6090 __check_mem_access(env, regno, off, size, reg->range,
6091 zero_size_allowed);
6092 if (err) {
6093 verbose(env, "R%d offset is outside of the packet\n", regno);
6094 return err;
6095 }
6096
6097 /* __check_mem_access has made sure "off + size - 1" is within u16.
6098 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
6099 * otherwise find_good_pkt_pointers would have refused to set range info
6100 * that __check_mem_access would have rejected this pkt access.
6101 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
6102 */
6103 env->prog->aux->max_pkt_offset =
6104 max_t(u32, env->prog->aux->max_pkt_offset,
6105 off + reg->umax_value + size - 1);
6106
6107 return err;
6108 }
6109
6110 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,struct bpf_insn_access_aux * info)6111 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
6112 enum bpf_access_type t, struct bpf_insn_access_aux *info)
6113 {
6114 if (env->ops->is_valid_access &&
6115 env->ops->is_valid_access(off, size, t, env->prog, info)) {
6116 /* A non zero info.ctx_field_size indicates that this field is a
6117 * candidate for later verifier transformation to load the whole
6118 * field and then apply a mask when accessed with a narrower
6119 * access than actual ctx access size. A zero info.ctx_field_size
6120 * will only allow for whole field access and rejects any other
6121 * type of narrower access.
6122 */
6123 if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
6124 if (info->ref_obj_id &&
6125 !find_reference_state(env->cur_state, info->ref_obj_id)) {
6126 verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
6127 off);
6128 return -EACCES;
6129 }
6130 } else {
6131 env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
6132 }
6133 /* remember the offset of last byte accessed in ctx */
6134 if (env->prog->aux->max_ctx_offset < off + size)
6135 env->prog->aux->max_ctx_offset = off + size;
6136 return 0;
6137 }
6138
6139 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
6140 return -EACCES;
6141 }
6142
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)6143 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
6144 int size)
6145 {
6146 if (size < 0 || off < 0 ||
6147 (u64)off + size > sizeof(struct bpf_flow_keys)) {
6148 verbose(env, "invalid access to flow keys off=%d size=%d\n",
6149 off, size);
6150 return -EACCES;
6151 }
6152 return 0;
6153 }
6154
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)6155 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
6156 u32 regno, int off, int size,
6157 enum bpf_access_type t)
6158 {
6159 struct bpf_reg_state *regs = cur_regs(env);
6160 struct bpf_reg_state *reg = ®s[regno];
6161 struct bpf_insn_access_aux info = {};
6162 bool valid;
6163
6164 if (reg->smin_value < 0) {
6165 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
6166 regno);
6167 return -EACCES;
6168 }
6169
6170 switch (reg->type) {
6171 case PTR_TO_SOCK_COMMON:
6172 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
6173 break;
6174 case PTR_TO_SOCKET:
6175 valid = bpf_sock_is_valid_access(off, size, t, &info);
6176 break;
6177 case PTR_TO_TCP_SOCK:
6178 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
6179 break;
6180 case PTR_TO_XDP_SOCK:
6181 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
6182 break;
6183 default:
6184 valid = false;
6185 }
6186
6187
6188 if (valid) {
6189 env->insn_aux_data[insn_idx].ctx_field_size =
6190 info.ctx_field_size;
6191 return 0;
6192 }
6193
6194 verbose(env, "R%d invalid %s access off=%d size=%d\n",
6195 regno, reg_type_str(env, reg->type), off, size);
6196
6197 return -EACCES;
6198 }
6199
is_pointer_value(struct bpf_verifier_env * env,int regno)6200 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
6201 {
6202 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
6203 }
6204
is_ctx_reg(struct bpf_verifier_env * env,int regno)6205 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
6206 {
6207 const struct bpf_reg_state *reg = reg_state(env, regno);
6208
6209 return reg->type == PTR_TO_CTX;
6210 }
6211
is_sk_reg(struct bpf_verifier_env * env,int regno)6212 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
6213 {
6214 const struct bpf_reg_state *reg = reg_state(env, regno);
6215
6216 return type_is_sk_pointer(reg->type);
6217 }
6218
is_pkt_reg(struct bpf_verifier_env * env,int regno)6219 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
6220 {
6221 const struct bpf_reg_state *reg = reg_state(env, regno);
6222
6223 return type_is_pkt_pointer(reg->type);
6224 }
6225
is_flow_key_reg(struct bpf_verifier_env * env,int regno)6226 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
6227 {
6228 const struct bpf_reg_state *reg = reg_state(env, regno);
6229
6230 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
6231 return reg->type == PTR_TO_FLOW_KEYS;
6232 }
6233
is_arena_reg(struct bpf_verifier_env * env,int regno)6234 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
6235 {
6236 const struct bpf_reg_state *reg = reg_state(env, regno);
6237
6238 return reg->type == PTR_TO_ARENA;
6239 }
6240
6241 /* Return false if @regno contains a pointer whose type isn't supported for
6242 * atomic instruction @insn.
6243 */
atomic_ptr_type_ok(struct bpf_verifier_env * env,int regno,struct bpf_insn * insn)6244 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
6245 struct bpf_insn *insn)
6246 {
6247 if (is_ctx_reg(env, regno))
6248 return false;
6249 if (is_pkt_reg(env, regno))
6250 return false;
6251 if (is_flow_key_reg(env, regno))
6252 return false;
6253 if (is_sk_reg(env, regno))
6254 return false;
6255 if (is_arena_reg(env, regno))
6256 return bpf_jit_supports_insn(insn, true);
6257
6258 return true;
6259 }
6260
6261 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
6262 #ifdef CONFIG_NET
6263 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
6264 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6265 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
6266 #endif
6267 [CONST_PTR_TO_MAP] = btf_bpf_map_id,
6268 };
6269
is_trusted_reg(const struct bpf_reg_state * reg)6270 static bool is_trusted_reg(const struct bpf_reg_state *reg)
6271 {
6272 /* A referenced register is always trusted. */
6273 if (reg->ref_obj_id)
6274 return true;
6275
6276 /* Types listed in the reg2btf_ids are always trusted */
6277 if (reg2btf_ids[base_type(reg->type)] &&
6278 !bpf_type_has_unsafe_modifiers(reg->type))
6279 return true;
6280
6281 /* If a register is not referenced, it is trusted if it has the
6282 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
6283 * other type modifiers may be safe, but we elect to take an opt-in
6284 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
6285 * not.
6286 *
6287 * Eventually, we should make PTR_TRUSTED the single source of truth
6288 * for whether a register is trusted.
6289 */
6290 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
6291 !bpf_type_has_unsafe_modifiers(reg->type);
6292 }
6293
is_rcu_reg(const struct bpf_reg_state * reg)6294 static bool is_rcu_reg(const struct bpf_reg_state *reg)
6295 {
6296 return reg->type & MEM_RCU;
6297 }
6298
clear_trusted_flags(enum bpf_type_flag * flag)6299 static void clear_trusted_flags(enum bpf_type_flag *flag)
6300 {
6301 *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
6302 }
6303
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)6304 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
6305 const struct bpf_reg_state *reg,
6306 int off, int size, bool strict)
6307 {
6308 struct tnum reg_off;
6309 int ip_align;
6310
6311 /* Byte size accesses are always allowed. */
6312 if (!strict || size == 1)
6313 return 0;
6314
6315 /* For platforms that do not have a Kconfig enabling
6316 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
6317 * NET_IP_ALIGN is universally set to '2'. And on platforms
6318 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
6319 * to this code only in strict mode where we want to emulate
6320 * the NET_IP_ALIGN==2 checking. Therefore use an
6321 * unconditional IP align value of '2'.
6322 */
6323 ip_align = 2;
6324
6325 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
6326 if (!tnum_is_aligned(reg_off, size)) {
6327 char tn_buf[48];
6328
6329 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6330 verbose(env,
6331 "misaligned packet access off %d+%s+%d+%d size %d\n",
6332 ip_align, tn_buf, reg->off, off, size);
6333 return -EACCES;
6334 }
6335
6336 return 0;
6337 }
6338
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)6339 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
6340 const struct bpf_reg_state *reg,
6341 const char *pointer_desc,
6342 int off, int size, bool strict)
6343 {
6344 struct tnum reg_off;
6345
6346 /* Byte size accesses are always allowed. */
6347 if (!strict || size == 1)
6348 return 0;
6349
6350 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
6351 if (!tnum_is_aligned(reg_off, size)) {
6352 char tn_buf[48];
6353
6354 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6355 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
6356 pointer_desc, tn_buf, reg->off, off, size);
6357 return -EACCES;
6358 }
6359
6360 return 0;
6361 }
6362
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)6363 static int check_ptr_alignment(struct bpf_verifier_env *env,
6364 const struct bpf_reg_state *reg, int off,
6365 int size, bool strict_alignment_once)
6366 {
6367 bool strict = env->strict_alignment || strict_alignment_once;
6368 const char *pointer_desc = "";
6369
6370 switch (reg->type) {
6371 case PTR_TO_PACKET:
6372 case PTR_TO_PACKET_META:
6373 /* Special case, because of NET_IP_ALIGN. Given metadata sits
6374 * right in front, treat it the very same way.
6375 */
6376 return check_pkt_ptr_alignment(env, reg, off, size, strict);
6377 case PTR_TO_FLOW_KEYS:
6378 pointer_desc = "flow keys ";
6379 break;
6380 case PTR_TO_MAP_KEY:
6381 pointer_desc = "key ";
6382 break;
6383 case PTR_TO_MAP_VALUE:
6384 pointer_desc = "value ";
6385 break;
6386 case PTR_TO_CTX:
6387 pointer_desc = "context ";
6388 break;
6389 case PTR_TO_STACK:
6390 pointer_desc = "stack ";
6391 /* The stack spill tracking logic in check_stack_write_fixed_off()
6392 * and check_stack_read_fixed_off() relies on stack accesses being
6393 * aligned.
6394 */
6395 strict = true;
6396 break;
6397 case PTR_TO_SOCKET:
6398 pointer_desc = "sock ";
6399 break;
6400 case PTR_TO_SOCK_COMMON:
6401 pointer_desc = "sock_common ";
6402 break;
6403 case PTR_TO_TCP_SOCK:
6404 pointer_desc = "tcp_sock ";
6405 break;
6406 case PTR_TO_XDP_SOCK:
6407 pointer_desc = "xdp_sock ";
6408 break;
6409 case PTR_TO_ARENA:
6410 return 0;
6411 default:
6412 break;
6413 }
6414 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
6415 strict);
6416 }
6417
bpf_enable_priv_stack(struct bpf_prog * prog)6418 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
6419 {
6420 if (!bpf_jit_supports_private_stack())
6421 return NO_PRIV_STACK;
6422
6423 /* bpf_prog_check_recur() checks all prog types that use bpf trampoline
6424 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
6425 * explicitly.
6426 */
6427 switch (prog->type) {
6428 case BPF_PROG_TYPE_KPROBE:
6429 case BPF_PROG_TYPE_TRACEPOINT:
6430 case BPF_PROG_TYPE_PERF_EVENT:
6431 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6432 return PRIV_STACK_ADAPTIVE;
6433 case BPF_PROG_TYPE_TRACING:
6434 case BPF_PROG_TYPE_LSM:
6435 case BPF_PROG_TYPE_STRUCT_OPS:
6436 if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
6437 return PRIV_STACK_ADAPTIVE;
6438 fallthrough;
6439 default:
6440 break;
6441 }
6442
6443 return NO_PRIV_STACK;
6444 }
6445
round_up_stack_depth(struct bpf_verifier_env * env,int stack_depth)6446 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
6447 {
6448 if (env->prog->jit_requested)
6449 return round_up(stack_depth, 16);
6450
6451 /* round up to 32-bytes, since this is granularity
6452 * of interpreter stack size
6453 */
6454 return round_up(max_t(u32, stack_depth, 1), 32);
6455 }
6456
6457 /* starting from main bpf function walk all instructions of the function
6458 * and recursively walk all callees that given function can call.
6459 * Ignore jump and exit insns.
6460 * Since recursion is prevented by check_cfg() this algorithm
6461 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
6462 */
check_max_stack_depth_subprog(struct bpf_verifier_env * env,int idx,bool priv_stack_supported)6463 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
6464 bool priv_stack_supported)
6465 {
6466 struct bpf_subprog_info *subprog = env->subprog_info;
6467 struct bpf_insn *insn = env->prog->insnsi;
6468 int depth = 0, frame = 0, i, subprog_end, subprog_depth;
6469 bool tail_call_reachable = false;
6470 int ret_insn[MAX_CALL_FRAMES];
6471 int ret_prog[MAX_CALL_FRAMES];
6472 int j;
6473
6474 i = subprog[idx].start;
6475 if (!priv_stack_supported)
6476 subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6477 process_func:
6478 /* protect against potential stack overflow that might happen when
6479 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
6480 * depth for such case down to 256 so that the worst case scenario
6481 * would result in 8k stack size (32 which is tailcall limit * 256 =
6482 * 8k).
6483 *
6484 * To get the idea what might happen, see an example:
6485 * func1 -> sub rsp, 128
6486 * subfunc1 -> sub rsp, 256
6487 * tailcall1 -> add rsp, 256
6488 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
6489 * subfunc2 -> sub rsp, 64
6490 * subfunc22 -> sub rsp, 128
6491 * tailcall2 -> add rsp, 128
6492 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
6493 *
6494 * tailcall will unwind the current stack frame but it will not get rid
6495 * of caller's stack as shown on the example above.
6496 */
6497 if (idx && subprog[idx].has_tail_call && depth >= 256) {
6498 verbose(env,
6499 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
6500 depth);
6501 return -EACCES;
6502 }
6503
6504 subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
6505 if (priv_stack_supported) {
6506 /* Request private stack support only if the subprog stack
6507 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
6508 * avoid jit penalty if the stack usage is small.
6509 */
6510 if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
6511 subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
6512 subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
6513 }
6514
6515 if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6516 if (subprog_depth > MAX_BPF_STACK) {
6517 verbose(env, "stack size of subprog %d is %d. Too large\n",
6518 idx, subprog_depth);
6519 return -EACCES;
6520 }
6521 } else {
6522 depth += subprog_depth;
6523 if (depth > MAX_BPF_STACK) {
6524 verbose(env, "combined stack size of %d calls is %d. Too large\n",
6525 frame + 1, depth);
6526 return -EACCES;
6527 }
6528 }
6529 continue_func:
6530 subprog_end = subprog[idx + 1].start;
6531 for (; i < subprog_end; i++) {
6532 int next_insn, sidx;
6533
6534 if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
6535 bool err = false;
6536
6537 if (!is_bpf_throw_kfunc(insn + i))
6538 continue;
6539 if (subprog[idx].is_cb)
6540 err = true;
6541 for (int c = 0; c < frame && !err; c++) {
6542 if (subprog[ret_prog[c]].is_cb) {
6543 err = true;
6544 break;
6545 }
6546 }
6547 if (!err)
6548 continue;
6549 verbose(env,
6550 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
6551 i, idx);
6552 return -EINVAL;
6553 }
6554
6555 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
6556 continue;
6557 /* remember insn and function to return to */
6558 ret_insn[frame] = i + 1;
6559 ret_prog[frame] = idx;
6560
6561 /* find the callee */
6562 next_insn = i + insn[i].imm + 1;
6563 sidx = find_subprog(env, next_insn);
6564 if (sidx < 0) {
6565 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6566 next_insn);
6567 return -EFAULT;
6568 }
6569 if (subprog[sidx].is_async_cb) {
6570 if (subprog[sidx].has_tail_call) {
6571 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
6572 return -EFAULT;
6573 }
6574 /* async callbacks don't increase bpf prog stack size unless called directly */
6575 if (!bpf_pseudo_call(insn + i))
6576 continue;
6577 if (subprog[sidx].is_exception_cb) {
6578 verbose(env, "insn %d cannot call exception cb directly\n", i);
6579 return -EINVAL;
6580 }
6581 }
6582 i = next_insn;
6583 idx = sidx;
6584 if (!priv_stack_supported)
6585 subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6586
6587 if (subprog[idx].has_tail_call)
6588 tail_call_reachable = true;
6589
6590 frame++;
6591 if (frame >= MAX_CALL_FRAMES) {
6592 verbose(env, "the call stack of %d frames is too deep !\n",
6593 frame);
6594 return -E2BIG;
6595 }
6596 goto process_func;
6597 }
6598 /* if tail call got detected across bpf2bpf calls then mark each of the
6599 * currently present subprog frames as tail call reachable subprogs;
6600 * this info will be utilized by JIT so that we will be preserving the
6601 * tail call counter throughout bpf2bpf calls combined with tailcalls
6602 */
6603 if (tail_call_reachable)
6604 for (j = 0; j < frame; j++) {
6605 if (subprog[ret_prog[j]].is_exception_cb) {
6606 verbose(env, "cannot tail call within exception cb\n");
6607 return -EINVAL;
6608 }
6609 subprog[ret_prog[j]].tail_call_reachable = true;
6610 }
6611 if (subprog[0].tail_call_reachable)
6612 env->prog->aux->tail_call_reachable = true;
6613
6614 /* end of for() loop means the last insn of the 'subprog'
6615 * was reached. Doesn't matter whether it was JA or EXIT
6616 */
6617 if (frame == 0)
6618 return 0;
6619 if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
6620 depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
6621 frame--;
6622 i = ret_insn[frame];
6623 idx = ret_prog[frame];
6624 goto continue_func;
6625 }
6626
check_max_stack_depth(struct bpf_verifier_env * env)6627 static int check_max_stack_depth(struct bpf_verifier_env *env)
6628 {
6629 enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
6630 struct bpf_subprog_info *si = env->subprog_info;
6631 bool priv_stack_supported;
6632 int ret;
6633
6634 for (int i = 0; i < env->subprog_cnt; i++) {
6635 if (si[i].has_tail_call) {
6636 priv_stack_mode = NO_PRIV_STACK;
6637 break;
6638 }
6639 }
6640
6641 if (priv_stack_mode == PRIV_STACK_UNKNOWN)
6642 priv_stack_mode = bpf_enable_priv_stack(env->prog);
6643
6644 /* All async_cb subprogs use normal kernel stack. If a particular
6645 * subprog appears in both main prog and async_cb subtree, that
6646 * subprog will use normal kernel stack to avoid potential nesting.
6647 * The reverse subprog traversal ensures when main prog subtree is
6648 * checked, the subprogs appearing in async_cb subtrees are already
6649 * marked as using normal kernel stack, so stack size checking can
6650 * be done properly.
6651 */
6652 for (int i = env->subprog_cnt - 1; i >= 0; i--) {
6653 if (!i || si[i].is_async_cb) {
6654 priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
6655 ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
6656 if (ret < 0)
6657 return ret;
6658 }
6659 }
6660
6661 for (int i = 0; i < env->subprog_cnt; i++) {
6662 if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6663 env->prog->aux->jits_use_priv_stack = true;
6664 break;
6665 }
6666 }
6667
6668 return 0;
6669 }
6670
6671 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)6672 static int get_callee_stack_depth(struct bpf_verifier_env *env,
6673 const struct bpf_insn *insn, int idx)
6674 {
6675 int start = idx + insn->imm + 1, subprog;
6676
6677 subprog = find_subprog(env, start);
6678 if (subprog < 0) {
6679 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6680 start);
6681 return -EFAULT;
6682 }
6683 return env->subprog_info[subprog].stack_depth;
6684 }
6685 #endif
6686
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)6687 static int __check_buffer_access(struct bpf_verifier_env *env,
6688 const char *buf_info,
6689 const struct bpf_reg_state *reg,
6690 int regno, int off, int size)
6691 {
6692 if (off < 0) {
6693 verbose(env,
6694 "R%d invalid %s buffer access: off=%d, size=%d\n",
6695 regno, buf_info, off, size);
6696 return -EACCES;
6697 }
6698 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6699 char tn_buf[48];
6700
6701 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6702 verbose(env,
6703 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6704 regno, off, tn_buf);
6705 return -EACCES;
6706 }
6707
6708 return 0;
6709 }
6710
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)6711 static int check_tp_buffer_access(struct bpf_verifier_env *env,
6712 const struct bpf_reg_state *reg,
6713 int regno, int off, int size)
6714 {
6715 int err;
6716
6717 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
6718 if (err)
6719 return err;
6720
6721 if (off + size > env->prog->aux->max_tp_access)
6722 env->prog->aux->max_tp_access = off + size;
6723
6724 return 0;
6725 }
6726
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,u32 * max_access)6727 static int check_buffer_access(struct bpf_verifier_env *env,
6728 const struct bpf_reg_state *reg,
6729 int regno, int off, int size,
6730 bool zero_size_allowed,
6731 u32 *max_access)
6732 {
6733 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
6734 int err;
6735
6736 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6737 if (err)
6738 return err;
6739
6740 if (off + size > *max_access)
6741 *max_access = off + size;
6742
6743 return 0;
6744 }
6745
6746 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)6747 static void zext_32_to_64(struct bpf_reg_state *reg)
6748 {
6749 reg->var_off = tnum_subreg(reg->var_off);
6750 __reg_assign_32_into_64(reg);
6751 }
6752
6753 /* truncate register to smaller size (in bytes)
6754 * must be called with size < BPF_REG_SIZE
6755 */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)6756 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
6757 {
6758 u64 mask;
6759
6760 /* clear high bits in bit representation */
6761 reg->var_off = tnum_cast(reg->var_off, size);
6762
6763 /* fix arithmetic bounds */
6764 mask = ((u64)1 << (size * 8)) - 1;
6765 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6766 reg->umin_value &= mask;
6767 reg->umax_value &= mask;
6768 } else {
6769 reg->umin_value = 0;
6770 reg->umax_value = mask;
6771 }
6772 reg->smin_value = reg->umin_value;
6773 reg->smax_value = reg->umax_value;
6774
6775 /* If size is smaller than 32bit register the 32bit register
6776 * values are also truncated so we push 64-bit bounds into
6777 * 32-bit bounds. Above were truncated < 32-bits already.
6778 */
6779 if (size < 4)
6780 __mark_reg32_unbounded(reg);
6781
6782 reg_bounds_sync(reg);
6783 }
6784
set_sext64_default_val(struct bpf_reg_state * reg,int size)6785 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
6786 {
6787 if (size == 1) {
6788 reg->smin_value = reg->s32_min_value = S8_MIN;
6789 reg->smax_value = reg->s32_max_value = S8_MAX;
6790 } else if (size == 2) {
6791 reg->smin_value = reg->s32_min_value = S16_MIN;
6792 reg->smax_value = reg->s32_max_value = S16_MAX;
6793 } else {
6794 /* size == 4 */
6795 reg->smin_value = reg->s32_min_value = S32_MIN;
6796 reg->smax_value = reg->s32_max_value = S32_MAX;
6797 }
6798 reg->umin_value = reg->u32_min_value = 0;
6799 reg->umax_value = U64_MAX;
6800 reg->u32_max_value = U32_MAX;
6801 reg->var_off = tnum_unknown;
6802 }
6803
coerce_reg_to_size_sx(struct bpf_reg_state * reg,int size)6804 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
6805 {
6806 s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6807 u64 top_smax_value, top_smin_value;
6808 u64 num_bits = size * 8;
6809
6810 if (tnum_is_const(reg->var_off)) {
6811 u64_cval = reg->var_off.value;
6812 if (size == 1)
6813 reg->var_off = tnum_const((s8)u64_cval);
6814 else if (size == 2)
6815 reg->var_off = tnum_const((s16)u64_cval);
6816 else
6817 /* size == 4 */
6818 reg->var_off = tnum_const((s32)u64_cval);
6819
6820 u64_cval = reg->var_off.value;
6821 reg->smax_value = reg->smin_value = u64_cval;
6822 reg->umax_value = reg->umin_value = u64_cval;
6823 reg->s32_max_value = reg->s32_min_value = u64_cval;
6824 reg->u32_max_value = reg->u32_min_value = u64_cval;
6825 return;
6826 }
6827
6828 top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6829 top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6830
6831 if (top_smax_value != top_smin_value)
6832 goto out;
6833
6834 /* find the s64_min and s64_min after sign extension */
6835 if (size == 1) {
6836 init_s64_max = (s8)reg->smax_value;
6837 init_s64_min = (s8)reg->smin_value;
6838 } else if (size == 2) {
6839 init_s64_max = (s16)reg->smax_value;
6840 init_s64_min = (s16)reg->smin_value;
6841 } else {
6842 init_s64_max = (s32)reg->smax_value;
6843 init_s64_min = (s32)reg->smin_value;
6844 }
6845
6846 s64_max = max(init_s64_max, init_s64_min);
6847 s64_min = min(init_s64_max, init_s64_min);
6848
6849 /* both of s64_max/s64_min positive or negative */
6850 if ((s64_max >= 0) == (s64_min >= 0)) {
6851 reg->s32_min_value = reg->smin_value = s64_min;
6852 reg->s32_max_value = reg->smax_value = s64_max;
6853 reg->u32_min_value = reg->umin_value = s64_min;
6854 reg->u32_max_value = reg->umax_value = s64_max;
6855 reg->var_off = tnum_range(s64_min, s64_max);
6856 return;
6857 }
6858
6859 out:
6860 set_sext64_default_val(reg, size);
6861 }
6862
set_sext32_default_val(struct bpf_reg_state * reg,int size)6863 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
6864 {
6865 if (size == 1) {
6866 reg->s32_min_value = S8_MIN;
6867 reg->s32_max_value = S8_MAX;
6868 } else {
6869 /* size == 2 */
6870 reg->s32_min_value = S16_MIN;
6871 reg->s32_max_value = S16_MAX;
6872 }
6873 reg->u32_min_value = 0;
6874 reg->u32_max_value = U32_MAX;
6875 reg->var_off = tnum_subreg(tnum_unknown);
6876 }
6877
coerce_subreg_to_size_sx(struct bpf_reg_state * reg,int size)6878 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
6879 {
6880 s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6881 u32 top_smax_value, top_smin_value;
6882 u32 num_bits = size * 8;
6883
6884 if (tnum_is_const(reg->var_off)) {
6885 u32_val = reg->var_off.value;
6886 if (size == 1)
6887 reg->var_off = tnum_const((s8)u32_val);
6888 else
6889 reg->var_off = tnum_const((s16)u32_val);
6890
6891 u32_val = reg->var_off.value;
6892 reg->s32_min_value = reg->s32_max_value = u32_val;
6893 reg->u32_min_value = reg->u32_max_value = u32_val;
6894 return;
6895 }
6896
6897 top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6898 top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6899
6900 if (top_smax_value != top_smin_value)
6901 goto out;
6902
6903 /* find the s32_min and s32_min after sign extension */
6904 if (size == 1) {
6905 init_s32_max = (s8)reg->s32_max_value;
6906 init_s32_min = (s8)reg->s32_min_value;
6907 } else {
6908 /* size == 2 */
6909 init_s32_max = (s16)reg->s32_max_value;
6910 init_s32_min = (s16)reg->s32_min_value;
6911 }
6912 s32_max = max(init_s32_max, init_s32_min);
6913 s32_min = min(init_s32_max, init_s32_min);
6914
6915 if ((s32_min >= 0) == (s32_max >= 0)) {
6916 reg->s32_min_value = s32_min;
6917 reg->s32_max_value = s32_max;
6918 reg->u32_min_value = (u32)s32_min;
6919 reg->u32_max_value = (u32)s32_max;
6920 reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
6921 return;
6922 }
6923
6924 out:
6925 set_sext32_default_val(reg, size);
6926 }
6927
bpf_map_is_rdonly(const struct bpf_map * map)6928 static bool bpf_map_is_rdonly(const struct bpf_map *map)
6929 {
6930 /* A map is considered read-only if the following condition are true:
6931 *
6932 * 1) BPF program side cannot change any of the map content. The
6933 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6934 * and was set at map creation time.
6935 * 2) The map value(s) have been initialized from user space by a
6936 * loader and then "frozen", such that no new map update/delete
6937 * operations from syscall side are possible for the rest of
6938 * the map's lifetime from that point onwards.
6939 * 3) Any parallel/pending map update/delete operations from syscall
6940 * side have been completed. Only after that point, it's safe to
6941 * assume that map value(s) are immutable.
6942 */
6943 return (map->map_flags & BPF_F_RDONLY_PROG) &&
6944 READ_ONCE(map->frozen) &&
6945 !bpf_map_write_active(map);
6946 }
6947
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val,bool is_ldsx)6948 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
6949 bool is_ldsx)
6950 {
6951 void *ptr;
6952 u64 addr;
6953 int err;
6954
6955 err = map->ops->map_direct_value_addr(map, &addr, off);
6956 if (err)
6957 return err;
6958 ptr = (void *)(long)addr + off;
6959
6960 switch (size) {
6961 case sizeof(u8):
6962 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
6963 break;
6964 case sizeof(u16):
6965 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
6966 break;
6967 case sizeof(u32):
6968 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
6969 break;
6970 case sizeof(u64):
6971 *val = *(u64 *)ptr;
6972 break;
6973 default:
6974 return -EINVAL;
6975 }
6976 return 0;
6977 }
6978
6979 #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
6980 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
6981 #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
6982 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null)
6983
6984 /*
6985 * Allow list few fields as RCU trusted or full trusted.
6986 * This logic doesn't allow mix tagging and will be removed once GCC supports
6987 * btf_type_tag.
6988 */
6989
6990 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct)6991 BTF_TYPE_SAFE_RCU(struct task_struct) {
6992 const cpumask_t *cpus_ptr;
6993 struct css_set __rcu *cgroups;
6994 struct task_struct __rcu *real_parent;
6995 struct task_struct *group_leader;
6996 };
6997
BTF_TYPE_SAFE_RCU(struct cgroup)6998 BTF_TYPE_SAFE_RCU(struct cgroup) {
6999 /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
7000 struct kernfs_node *kn;
7001 };
7002
BTF_TYPE_SAFE_RCU(struct css_set)7003 BTF_TYPE_SAFE_RCU(struct css_set) {
7004 struct cgroup *dfl_cgrp;
7005 };
7006
7007 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)7008 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
7009 struct file __rcu *exe_file;
7010 };
7011
7012 /* skb->sk, req->sk are not RCU protected, but we mark them as such
7013 * because bpf prog accessible sockets are SOCK_RCU_FREE.
7014 */
BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)7015 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
7016 struct sock *sk;
7017 };
7018
BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)7019 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
7020 struct sock *sk;
7021 };
7022
7023 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)7024 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
7025 struct seq_file *seq;
7026 };
7027
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)7028 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
7029 struct bpf_iter_meta *meta;
7030 struct task_struct *task;
7031 };
7032
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)7033 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
7034 struct file *file;
7035 };
7036
BTF_TYPE_SAFE_TRUSTED(struct file)7037 BTF_TYPE_SAFE_TRUSTED(struct file) {
7038 struct inode *f_inode;
7039 };
7040
BTF_TYPE_SAFE_TRUSTED(struct dentry)7041 BTF_TYPE_SAFE_TRUSTED(struct dentry) {
7042 /* no negative dentry-s in places where bpf can see it */
7043 struct inode *d_inode;
7044 };
7045
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)7046 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
7047 struct sock *sk;
7048 };
7049
type_is_rcu(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7050 static bool type_is_rcu(struct bpf_verifier_env *env,
7051 struct bpf_reg_state *reg,
7052 const char *field_name, u32 btf_id)
7053 {
7054 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
7055 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
7056 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
7057
7058 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
7059 }
7060
type_is_rcu_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7061 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
7062 struct bpf_reg_state *reg,
7063 const char *field_name, u32 btf_id)
7064 {
7065 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
7066 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
7067 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
7068
7069 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
7070 }
7071
type_is_trusted(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7072 static bool type_is_trusted(struct bpf_verifier_env *env,
7073 struct bpf_reg_state *reg,
7074 const char *field_name, u32 btf_id)
7075 {
7076 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
7077 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
7078 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
7079 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
7080 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
7081
7082 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
7083 }
7084
type_is_trusted_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7085 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
7086 struct bpf_reg_state *reg,
7087 const char *field_name, u32 btf_id)
7088 {
7089 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
7090
7091 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
7092 "__safe_trusted_or_null");
7093 }
7094
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)7095 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
7096 struct bpf_reg_state *regs,
7097 int regno, int off, int size,
7098 enum bpf_access_type atype,
7099 int value_regno)
7100 {
7101 struct bpf_reg_state *reg = regs + regno;
7102 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
7103 const char *tname = btf_name_by_offset(reg->btf, t->name_off);
7104 const char *field_name = NULL;
7105 enum bpf_type_flag flag = 0;
7106 u32 btf_id = 0;
7107 int ret;
7108
7109 if (!env->allow_ptr_leaks) {
7110 verbose(env,
7111 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
7112 tname);
7113 return -EPERM;
7114 }
7115 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
7116 verbose(env,
7117 "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
7118 tname);
7119 return -EINVAL;
7120 }
7121 if (off < 0) {
7122 verbose(env,
7123 "R%d is ptr_%s invalid negative access: off=%d\n",
7124 regno, tname, off);
7125 return -EACCES;
7126 }
7127 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
7128 char tn_buf[48];
7129
7130 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7131 verbose(env,
7132 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
7133 regno, tname, off, tn_buf);
7134 return -EACCES;
7135 }
7136
7137 if (reg->type & MEM_USER) {
7138 verbose(env,
7139 "R%d is ptr_%s access user memory: off=%d\n",
7140 regno, tname, off);
7141 return -EACCES;
7142 }
7143
7144 if (reg->type & MEM_PERCPU) {
7145 verbose(env,
7146 "R%d is ptr_%s access percpu memory: off=%d\n",
7147 regno, tname, off);
7148 return -EACCES;
7149 }
7150
7151 if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
7152 if (!btf_is_kernel(reg->btf)) {
7153 verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
7154 return -EFAULT;
7155 }
7156 ret = env->ops->btf_struct_access(&env->log, reg, off, size);
7157 } else {
7158 /* Writes are permitted with default btf_struct_access for
7159 * program allocated objects (which always have ref_obj_id > 0),
7160 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
7161 */
7162 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
7163 verbose(env, "only read is supported\n");
7164 return -EACCES;
7165 }
7166
7167 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
7168 !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
7169 verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
7170 return -EFAULT;
7171 }
7172
7173 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
7174 }
7175
7176 if (ret < 0)
7177 return ret;
7178
7179 if (ret != PTR_TO_BTF_ID) {
7180 /* just mark; */
7181
7182 } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
7183 /* If this is an untrusted pointer, all pointers formed by walking it
7184 * also inherit the untrusted flag.
7185 */
7186 flag = PTR_UNTRUSTED;
7187
7188 } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
7189 /* By default any pointer obtained from walking a trusted pointer is no
7190 * longer trusted, unless the field being accessed has explicitly been
7191 * marked as inheriting its parent's state of trust (either full or RCU).
7192 * For example:
7193 * 'cgroups' pointer is untrusted if task->cgroups dereference
7194 * happened in a sleepable program outside of bpf_rcu_read_lock()
7195 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
7196 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
7197 *
7198 * A regular RCU-protected pointer with __rcu tag can also be deemed
7199 * trusted if we are in an RCU CS. Such pointer can be NULL.
7200 */
7201 if (type_is_trusted(env, reg, field_name, btf_id)) {
7202 flag |= PTR_TRUSTED;
7203 } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
7204 flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
7205 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
7206 if (type_is_rcu(env, reg, field_name, btf_id)) {
7207 /* ignore __rcu tag and mark it MEM_RCU */
7208 flag |= MEM_RCU;
7209 } else if (flag & MEM_RCU ||
7210 type_is_rcu_or_null(env, reg, field_name, btf_id)) {
7211 /* __rcu tagged pointers can be NULL */
7212 flag |= MEM_RCU | PTR_MAYBE_NULL;
7213
7214 /* We always trust them */
7215 if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
7216 flag & PTR_UNTRUSTED)
7217 flag &= ~PTR_UNTRUSTED;
7218 } else if (flag & (MEM_PERCPU | MEM_USER)) {
7219 /* keep as-is */
7220 } else {
7221 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
7222 clear_trusted_flags(&flag);
7223 }
7224 } else {
7225 /*
7226 * If not in RCU CS or MEM_RCU pointer can be NULL then
7227 * aggressively mark as untrusted otherwise such
7228 * pointers will be plain PTR_TO_BTF_ID without flags
7229 * and will be allowed to be passed into helpers for
7230 * compat reasons.
7231 */
7232 flag = PTR_UNTRUSTED;
7233 }
7234 } else {
7235 /* Old compat. Deprecated */
7236 clear_trusted_flags(&flag);
7237 }
7238
7239 if (atype == BPF_READ && value_regno >= 0)
7240 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
7241
7242 return 0;
7243 }
7244
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)7245 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
7246 struct bpf_reg_state *regs,
7247 int regno, int off, int size,
7248 enum bpf_access_type atype,
7249 int value_regno)
7250 {
7251 struct bpf_reg_state *reg = regs + regno;
7252 struct bpf_map *map = reg->map_ptr;
7253 struct bpf_reg_state map_reg;
7254 enum bpf_type_flag flag = 0;
7255 const struct btf_type *t;
7256 const char *tname;
7257 u32 btf_id;
7258 int ret;
7259
7260 if (!btf_vmlinux) {
7261 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
7262 return -ENOTSUPP;
7263 }
7264
7265 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
7266 verbose(env, "map_ptr access not supported for map type %d\n",
7267 map->map_type);
7268 return -ENOTSUPP;
7269 }
7270
7271 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
7272 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
7273
7274 if (!env->allow_ptr_leaks) {
7275 verbose(env,
7276 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
7277 tname);
7278 return -EPERM;
7279 }
7280
7281 if (off < 0) {
7282 verbose(env, "R%d is %s invalid negative access: off=%d\n",
7283 regno, tname, off);
7284 return -EACCES;
7285 }
7286
7287 if (atype != BPF_READ) {
7288 verbose(env, "only read from %s is supported\n", tname);
7289 return -EACCES;
7290 }
7291
7292 /* Simulate access to a PTR_TO_BTF_ID */
7293 memset(&map_reg, 0, sizeof(map_reg));
7294 mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
7295 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
7296 if (ret < 0)
7297 return ret;
7298
7299 if (value_regno >= 0)
7300 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
7301
7302 return 0;
7303 }
7304
7305 /* Check that the stack access at the given offset is within bounds. The
7306 * maximum valid offset is -1.
7307 *
7308 * The minimum valid offset is -MAX_BPF_STACK for writes, and
7309 * -state->allocated_stack for reads.
7310 */
check_stack_slot_within_bounds(struct bpf_verifier_env * env,s64 off,struct bpf_func_state * state,enum bpf_access_type t)7311 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
7312 s64 off,
7313 struct bpf_func_state *state,
7314 enum bpf_access_type t)
7315 {
7316 int min_valid_off;
7317
7318 if (t == BPF_WRITE || env->allow_uninit_stack)
7319 min_valid_off = -MAX_BPF_STACK;
7320 else
7321 min_valid_off = -state->allocated_stack;
7322
7323 if (off < min_valid_off || off > -1)
7324 return -EACCES;
7325 return 0;
7326 }
7327
7328 /* Check that the stack access at 'regno + off' falls within the maximum stack
7329 * bounds.
7330 *
7331 * 'off' includes `regno->offset`, but not its dynamic part (if any).
7332 */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum bpf_access_type type)7333 static int check_stack_access_within_bounds(
7334 struct bpf_verifier_env *env,
7335 int regno, int off, int access_size,
7336 enum bpf_access_type type)
7337 {
7338 struct bpf_reg_state *regs = cur_regs(env);
7339 struct bpf_reg_state *reg = regs + regno;
7340 struct bpf_func_state *state = func(env, reg);
7341 s64 min_off, max_off;
7342 int err;
7343 char *err_extra;
7344
7345 if (type == BPF_READ)
7346 err_extra = " read from";
7347 else
7348 err_extra = " write to";
7349
7350 if (tnum_is_const(reg->var_off)) {
7351 min_off = (s64)reg->var_off.value + off;
7352 max_off = min_off + access_size;
7353 } else {
7354 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
7355 reg->smin_value <= -BPF_MAX_VAR_OFF) {
7356 verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
7357 err_extra, regno);
7358 return -EACCES;
7359 }
7360 min_off = reg->smin_value + off;
7361 max_off = reg->smax_value + off + access_size;
7362 }
7363
7364 err = check_stack_slot_within_bounds(env, min_off, state, type);
7365 if (!err && max_off > 0)
7366 err = -EINVAL; /* out of stack access into non-negative offsets */
7367 if (!err && access_size < 0)
7368 /* access_size should not be negative (or overflow an int); others checks
7369 * along the way should have prevented such an access.
7370 */
7371 err = -EFAULT; /* invalid negative access size; integer overflow? */
7372
7373 if (err) {
7374 if (tnum_is_const(reg->var_off)) {
7375 verbose(env, "invalid%s stack R%d off=%d size=%d\n",
7376 err_extra, regno, off, access_size);
7377 } else {
7378 char tn_buf[48];
7379
7380 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7381 verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
7382 err_extra, regno, tn_buf, off, access_size);
7383 }
7384 return err;
7385 }
7386
7387 /* Note that there is no stack access with offset zero, so the needed stack
7388 * size is -min_off, not -min_off+1.
7389 */
7390 return grow_stack_state(env, state, -min_off /* size */);
7391 }
7392
get_func_retval_range(struct bpf_prog * prog,struct bpf_retval_range * range)7393 static bool get_func_retval_range(struct bpf_prog *prog,
7394 struct bpf_retval_range *range)
7395 {
7396 if (prog->type == BPF_PROG_TYPE_LSM &&
7397 prog->expected_attach_type == BPF_LSM_MAC &&
7398 !bpf_lsm_get_retval_range(prog, range)) {
7399 return true;
7400 }
7401 return false;
7402 }
7403
7404 /* check whether memory at (regno + off) is accessible for t = (read | write)
7405 * if t==write, value_regno is a register which value is stored into memory
7406 * if t==read, value_regno is a register which will receive the value from memory
7407 * if t==write && value_regno==-1, some unknown value is stored into memory
7408 * if t==read && value_regno==-1, don't care what we read from memory
7409 */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once,bool is_ldsx)7410 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
7411 int off, int bpf_size, enum bpf_access_type t,
7412 int value_regno, bool strict_alignment_once, bool is_ldsx)
7413 {
7414 struct bpf_reg_state *regs = cur_regs(env);
7415 struct bpf_reg_state *reg = regs + regno;
7416 int size, err = 0;
7417
7418 size = bpf_size_to_bytes(bpf_size);
7419 if (size < 0)
7420 return size;
7421
7422 /* alignment checks will add in reg->off themselves */
7423 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
7424 if (err)
7425 return err;
7426
7427 /* for access checks, reg->off is just part of off */
7428 off += reg->off;
7429
7430 if (reg->type == PTR_TO_MAP_KEY) {
7431 if (t == BPF_WRITE) {
7432 verbose(env, "write to change key R%d not allowed\n", regno);
7433 return -EACCES;
7434 }
7435
7436 err = check_mem_region_access(env, regno, off, size,
7437 reg->map_ptr->key_size, false);
7438 if (err)
7439 return err;
7440 if (value_regno >= 0)
7441 mark_reg_unknown(env, regs, value_regno);
7442 } else if (reg->type == PTR_TO_MAP_VALUE) {
7443 struct btf_field *kptr_field = NULL;
7444
7445 if (t == BPF_WRITE && value_regno >= 0 &&
7446 is_pointer_value(env, value_regno)) {
7447 verbose(env, "R%d leaks addr into map\n", value_regno);
7448 return -EACCES;
7449 }
7450 err = check_map_access_type(env, regno, off, size, t);
7451 if (err)
7452 return err;
7453 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
7454 if (err)
7455 return err;
7456 if (tnum_is_const(reg->var_off))
7457 kptr_field = btf_record_find(reg->map_ptr->record,
7458 off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
7459 if (kptr_field) {
7460 err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
7461 } else if (t == BPF_READ && value_regno >= 0) {
7462 struct bpf_map *map = reg->map_ptr;
7463
7464 /* if map is read-only, track its contents as scalars */
7465 if (tnum_is_const(reg->var_off) &&
7466 bpf_map_is_rdonly(map) &&
7467 map->ops->map_direct_value_addr) {
7468 int map_off = off + reg->var_off.value;
7469 u64 val = 0;
7470
7471 err = bpf_map_direct_read(map, map_off, size,
7472 &val, is_ldsx);
7473 if (err)
7474 return err;
7475
7476 regs[value_regno].type = SCALAR_VALUE;
7477 __mark_reg_known(®s[value_regno], val);
7478 } else {
7479 mark_reg_unknown(env, regs, value_regno);
7480 }
7481 }
7482 } else if (base_type(reg->type) == PTR_TO_MEM) {
7483 bool rdonly_mem = type_is_rdonly_mem(reg->type);
7484
7485 if (type_may_be_null(reg->type)) {
7486 verbose(env, "R%d invalid mem access '%s'\n", regno,
7487 reg_type_str(env, reg->type));
7488 return -EACCES;
7489 }
7490
7491 if (t == BPF_WRITE && rdonly_mem) {
7492 verbose(env, "R%d cannot write into %s\n",
7493 regno, reg_type_str(env, reg->type));
7494 return -EACCES;
7495 }
7496
7497 if (t == BPF_WRITE && value_regno >= 0 &&
7498 is_pointer_value(env, value_regno)) {
7499 verbose(env, "R%d leaks addr into mem\n", value_regno);
7500 return -EACCES;
7501 }
7502
7503 err = check_mem_region_access(env, regno, off, size,
7504 reg->mem_size, false);
7505 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
7506 mark_reg_unknown(env, regs, value_regno);
7507 } else if (reg->type == PTR_TO_CTX) {
7508 struct bpf_retval_range range;
7509 struct bpf_insn_access_aux info = {
7510 .reg_type = SCALAR_VALUE,
7511 .is_ldsx = is_ldsx,
7512 .log = &env->log,
7513 };
7514
7515 if (t == BPF_WRITE && value_regno >= 0 &&
7516 is_pointer_value(env, value_regno)) {
7517 verbose(env, "R%d leaks addr into ctx\n", value_regno);
7518 return -EACCES;
7519 }
7520
7521 err = check_ptr_off_reg(env, reg, regno);
7522 if (err < 0)
7523 return err;
7524
7525 err = check_ctx_access(env, insn_idx, off, size, t, &info);
7526 if (err)
7527 verbose_linfo(env, insn_idx, "; ");
7528 if (!err && t == BPF_READ && value_regno >= 0) {
7529 /* ctx access returns either a scalar, or a
7530 * PTR_TO_PACKET[_META,_END]. In the latter
7531 * case, we know the offset is zero.
7532 */
7533 if (info.reg_type == SCALAR_VALUE) {
7534 if (info.is_retval && get_func_retval_range(env->prog, &range)) {
7535 err = __mark_reg_s32_range(env, regs, value_regno,
7536 range.minval, range.maxval);
7537 if (err)
7538 return err;
7539 } else {
7540 mark_reg_unknown(env, regs, value_regno);
7541 }
7542 } else {
7543 mark_reg_known_zero(env, regs,
7544 value_regno);
7545 if (type_may_be_null(info.reg_type))
7546 regs[value_regno].id = ++env->id_gen;
7547 /* A load of ctx field could have different
7548 * actual load size with the one encoded in the
7549 * insn. When the dst is PTR, it is for sure not
7550 * a sub-register.
7551 */
7552 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
7553 if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
7554 regs[value_regno].btf = info.btf;
7555 regs[value_regno].btf_id = info.btf_id;
7556 regs[value_regno].ref_obj_id = info.ref_obj_id;
7557 }
7558 }
7559 regs[value_regno].type = info.reg_type;
7560 }
7561
7562 } else if (reg->type == PTR_TO_STACK) {
7563 /* Basic bounds checks. */
7564 err = check_stack_access_within_bounds(env, regno, off, size, t);
7565 if (err)
7566 return err;
7567
7568 if (t == BPF_READ)
7569 err = check_stack_read(env, regno, off, size,
7570 value_regno);
7571 else
7572 err = check_stack_write(env, regno, off, size,
7573 value_regno, insn_idx);
7574 } else if (reg_is_pkt_pointer(reg)) {
7575 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
7576 verbose(env, "cannot write into packet\n");
7577 return -EACCES;
7578 }
7579 if (t == BPF_WRITE && value_regno >= 0 &&
7580 is_pointer_value(env, value_regno)) {
7581 verbose(env, "R%d leaks addr into packet\n",
7582 value_regno);
7583 return -EACCES;
7584 }
7585 err = check_packet_access(env, regno, off, size, false);
7586 if (!err && t == BPF_READ && value_regno >= 0)
7587 mark_reg_unknown(env, regs, value_regno);
7588 } else if (reg->type == PTR_TO_FLOW_KEYS) {
7589 if (t == BPF_WRITE && value_regno >= 0 &&
7590 is_pointer_value(env, value_regno)) {
7591 verbose(env, "R%d leaks addr into flow keys\n",
7592 value_regno);
7593 return -EACCES;
7594 }
7595
7596 err = check_flow_keys_access(env, off, size);
7597 if (!err && t == BPF_READ && value_regno >= 0)
7598 mark_reg_unknown(env, regs, value_regno);
7599 } else if (type_is_sk_pointer(reg->type)) {
7600 if (t == BPF_WRITE) {
7601 verbose(env, "R%d cannot write into %s\n",
7602 regno, reg_type_str(env, reg->type));
7603 return -EACCES;
7604 }
7605 err = check_sock_access(env, insn_idx, regno, off, size, t);
7606 if (!err && value_regno >= 0)
7607 mark_reg_unknown(env, regs, value_regno);
7608 } else if (reg->type == PTR_TO_TP_BUFFER) {
7609 err = check_tp_buffer_access(env, reg, regno, off, size);
7610 if (!err && t == BPF_READ && value_regno >= 0)
7611 mark_reg_unknown(env, regs, value_regno);
7612 } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
7613 !type_may_be_null(reg->type)) {
7614 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
7615 value_regno);
7616 } else if (reg->type == CONST_PTR_TO_MAP) {
7617 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
7618 value_regno);
7619 } else if (base_type(reg->type) == PTR_TO_BUF) {
7620 bool rdonly_mem = type_is_rdonly_mem(reg->type);
7621 u32 *max_access;
7622
7623 if (rdonly_mem) {
7624 if (t == BPF_WRITE) {
7625 verbose(env, "R%d cannot write into %s\n",
7626 regno, reg_type_str(env, reg->type));
7627 return -EACCES;
7628 }
7629 max_access = &env->prog->aux->max_rdonly_access;
7630 } else {
7631 max_access = &env->prog->aux->max_rdwr_access;
7632 }
7633
7634 err = check_buffer_access(env, reg, regno, off, size, false,
7635 max_access);
7636
7637 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
7638 mark_reg_unknown(env, regs, value_regno);
7639 } else if (reg->type == PTR_TO_ARENA) {
7640 if (t == BPF_READ && value_regno >= 0)
7641 mark_reg_unknown(env, regs, value_regno);
7642 } else {
7643 verbose(env, "R%d invalid mem access '%s'\n", regno,
7644 reg_type_str(env, reg->type));
7645 return -EACCES;
7646 }
7647
7648 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
7649 regs[value_regno].type == SCALAR_VALUE) {
7650 if (!is_ldsx)
7651 /* b/h/w load zero-extends, mark upper bits as known 0 */
7652 coerce_reg_to_size(®s[value_regno], size);
7653 else
7654 coerce_reg_to_size_sx(®s[value_regno], size);
7655 }
7656 return err;
7657 }
7658
7659 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
7660 bool allow_trust_mismatch);
7661
check_load_mem(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once,bool is_ldsx,bool allow_trust_mismatch,const char * ctx)7662 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
7663 bool strict_alignment_once, bool is_ldsx,
7664 bool allow_trust_mismatch, const char *ctx)
7665 {
7666 struct bpf_reg_state *regs = cur_regs(env);
7667 enum bpf_reg_type src_reg_type;
7668 int err;
7669
7670 /* check src operand */
7671 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7672 if (err)
7673 return err;
7674
7675 /* check dst operand */
7676 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7677 if (err)
7678 return err;
7679
7680 src_reg_type = regs[insn->src_reg].type;
7681
7682 /* Check if (src_reg + off) is readable. The state of dst_reg will be
7683 * updated by this call.
7684 */
7685 err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off,
7686 BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
7687 strict_alignment_once, is_ldsx);
7688 err = err ?: save_aux_ptr_type(env, src_reg_type,
7689 allow_trust_mismatch);
7690 err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], ctx);
7691
7692 return err;
7693 }
7694
check_store_reg(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once)7695 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
7696 bool strict_alignment_once)
7697 {
7698 struct bpf_reg_state *regs = cur_regs(env);
7699 enum bpf_reg_type dst_reg_type;
7700 int err;
7701
7702 /* check src1 operand */
7703 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7704 if (err)
7705 return err;
7706
7707 /* check src2 operand */
7708 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7709 if (err)
7710 return err;
7711
7712 dst_reg_type = regs[insn->dst_reg].type;
7713
7714 /* Check if (dst_reg + off) is writeable. */
7715 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
7716 BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
7717 strict_alignment_once, false);
7718 err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
7719
7720 return err;
7721 }
7722
check_atomic_rmw(struct bpf_verifier_env * env,struct bpf_insn * insn)7723 static int check_atomic_rmw(struct bpf_verifier_env *env,
7724 struct bpf_insn *insn)
7725 {
7726 int load_reg;
7727 int err;
7728
7729 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
7730 verbose(env, "invalid atomic operand size\n");
7731 return -EINVAL;
7732 }
7733
7734 /* check src1 operand */
7735 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7736 if (err)
7737 return err;
7738
7739 /* check src2 operand */
7740 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7741 if (err)
7742 return err;
7743
7744 if (insn->imm == BPF_CMPXCHG) {
7745 /* Check comparison of R0 with memory location */
7746 const u32 aux_reg = BPF_REG_0;
7747
7748 err = check_reg_arg(env, aux_reg, SRC_OP);
7749 if (err)
7750 return err;
7751
7752 if (is_pointer_value(env, aux_reg)) {
7753 verbose(env, "R%d leaks addr into mem\n", aux_reg);
7754 return -EACCES;
7755 }
7756 }
7757
7758 if (is_pointer_value(env, insn->src_reg)) {
7759 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
7760 return -EACCES;
7761 }
7762
7763 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
7764 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
7765 insn->dst_reg,
7766 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
7767 return -EACCES;
7768 }
7769
7770 if (insn->imm & BPF_FETCH) {
7771 if (insn->imm == BPF_CMPXCHG)
7772 load_reg = BPF_REG_0;
7773 else
7774 load_reg = insn->src_reg;
7775
7776 /* check and record load of old value */
7777 err = check_reg_arg(env, load_reg, DST_OP);
7778 if (err)
7779 return err;
7780 } else {
7781 /* This instruction accesses a memory location but doesn't
7782 * actually load it into a register.
7783 */
7784 load_reg = -1;
7785 }
7786
7787 /* Check whether we can read the memory, with second call for fetch
7788 * case to simulate the register fill.
7789 */
7790 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
7791 BPF_SIZE(insn->code), BPF_READ, -1, true, false);
7792 if (!err && load_reg >= 0)
7793 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
7794 insn->off, BPF_SIZE(insn->code),
7795 BPF_READ, load_reg, true, false);
7796 if (err)
7797 return err;
7798
7799 if (is_arena_reg(env, insn->dst_reg)) {
7800 err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
7801 if (err)
7802 return err;
7803 }
7804 /* Check whether we can write into the same memory. */
7805 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
7806 BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
7807 if (err)
7808 return err;
7809 return 0;
7810 }
7811
check_atomic_load(struct bpf_verifier_env * env,struct bpf_insn * insn)7812 static int check_atomic_load(struct bpf_verifier_env *env,
7813 struct bpf_insn *insn)
7814 {
7815 int err;
7816
7817 err = check_load_mem(env, insn, true, false, false, "atomic_load");
7818 if (err)
7819 return err;
7820
7821 if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
7822 verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
7823 insn->src_reg,
7824 reg_type_str(env, reg_state(env, insn->src_reg)->type));
7825 return -EACCES;
7826 }
7827
7828 return 0;
7829 }
7830
check_atomic_store(struct bpf_verifier_env * env,struct bpf_insn * insn)7831 static int check_atomic_store(struct bpf_verifier_env *env,
7832 struct bpf_insn *insn)
7833 {
7834 int err;
7835
7836 err = check_store_reg(env, insn, true);
7837 if (err)
7838 return err;
7839
7840 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
7841 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
7842 insn->dst_reg,
7843 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
7844 return -EACCES;
7845 }
7846
7847 return 0;
7848 }
7849
check_atomic(struct bpf_verifier_env * env,struct bpf_insn * insn)7850 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
7851 {
7852 switch (insn->imm) {
7853 case BPF_ADD:
7854 case BPF_ADD | BPF_FETCH:
7855 case BPF_AND:
7856 case BPF_AND | BPF_FETCH:
7857 case BPF_OR:
7858 case BPF_OR | BPF_FETCH:
7859 case BPF_XOR:
7860 case BPF_XOR | BPF_FETCH:
7861 case BPF_XCHG:
7862 case BPF_CMPXCHG:
7863 return check_atomic_rmw(env, insn);
7864 case BPF_LOAD_ACQ:
7865 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
7866 verbose(env,
7867 "64-bit load-acquires are only supported on 64-bit arches\n");
7868 return -EOPNOTSUPP;
7869 }
7870 return check_atomic_load(env, insn);
7871 case BPF_STORE_REL:
7872 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
7873 verbose(env,
7874 "64-bit store-releases are only supported on 64-bit arches\n");
7875 return -EOPNOTSUPP;
7876 }
7877 return check_atomic_store(env, insn);
7878 default:
7879 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
7880 insn->imm);
7881 return -EINVAL;
7882 }
7883 }
7884
7885 /* When register 'regno' is used to read the stack (either directly or through
7886 * a helper function) make sure that it's within stack boundary and, depending
7887 * on the access type and privileges, that all elements of the stack are
7888 * initialized.
7889 *
7890 * 'off' includes 'regno->off', but not its dynamic part (if any).
7891 *
7892 * All registers that have been spilled on the stack in the slots within the
7893 * read offsets are marked as read.
7894 */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum bpf_access_type type,struct bpf_call_arg_meta * meta)7895 static int check_stack_range_initialized(
7896 struct bpf_verifier_env *env, int regno, int off,
7897 int access_size, bool zero_size_allowed,
7898 enum bpf_access_type type, struct bpf_call_arg_meta *meta)
7899 {
7900 struct bpf_reg_state *reg = reg_state(env, regno);
7901 struct bpf_func_state *state = func(env, reg);
7902 int err, min_off, max_off, i, j, slot, spi;
7903 /* Some accesses can write anything into the stack, others are
7904 * read-only.
7905 */
7906 bool clobber = false;
7907
7908 if (access_size == 0 && !zero_size_allowed) {
7909 verbose(env, "invalid zero-sized read\n");
7910 return -EACCES;
7911 }
7912
7913 if (type == BPF_WRITE)
7914 clobber = true;
7915
7916 err = check_stack_access_within_bounds(env, regno, off, access_size, type);
7917 if (err)
7918 return err;
7919
7920
7921 if (tnum_is_const(reg->var_off)) {
7922 min_off = max_off = reg->var_off.value + off;
7923 } else {
7924 /* Variable offset is prohibited for unprivileged mode for
7925 * simplicity since it requires corresponding support in
7926 * Spectre masking for stack ALU.
7927 * See also retrieve_ptr_limit().
7928 */
7929 if (!env->bypass_spec_v1) {
7930 char tn_buf[48];
7931
7932 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7933 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
7934 regno, tn_buf);
7935 return -EACCES;
7936 }
7937 /* Only initialized buffer on stack is allowed to be accessed
7938 * with variable offset. With uninitialized buffer it's hard to
7939 * guarantee that whole memory is marked as initialized on
7940 * helper return since specific bounds are unknown what may
7941 * cause uninitialized stack leaking.
7942 */
7943 if (meta && meta->raw_mode)
7944 meta = NULL;
7945
7946 min_off = reg->smin_value + off;
7947 max_off = reg->smax_value + off;
7948 }
7949
7950 if (meta && meta->raw_mode) {
7951 /* Ensure we won't be overwriting dynptrs when simulating byte
7952 * by byte access in check_helper_call using meta.access_size.
7953 * This would be a problem if we have a helper in the future
7954 * which takes:
7955 *
7956 * helper(uninit_mem, len, dynptr)
7957 *
7958 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
7959 * may end up writing to dynptr itself when touching memory from
7960 * arg 1. This can be relaxed on a case by case basis for known
7961 * safe cases, but reject due to the possibilitiy of aliasing by
7962 * default.
7963 */
7964 for (i = min_off; i < max_off + access_size; i++) {
7965 int stack_off = -i - 1;
7966
7967 spi = __get_spi(i);
7968 /* raw_mode may write past allocated_stack */
7969 if (state->allocated_stack <= stack_off)
7970 continue;
7971 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7972 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
7973 return -EACCES;
7974 }
7975 }
7976 meta->access_size = access_size;
7977 meta->regno = regno;
7978 return 0;
7979 }
7980
7981 for (i = min_off; i < max_off + access_size; i++) {
7982 u8 *stype;
7983
7984 slot = -i - 1;
7985 spi = slot / BPF_REG_SIZE;
7986 if (state->allocated_stack <= slot) {
7987 verbose(env, "verifier bug: allocated_stack too small\n");
7988 return -EFAULT;
7989 }
7990
7991 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7992 if (*stype == STACK_MISC)
7993 goto mark;
7994 if ((*stype == STACK_ZERO) ||
7995 (*stype == STACK_INVALID && env->allow_uninit_stack)) {
7996 if (clobber) {
7997 /* helper can write anything into the stack */
7998 *stype = STACK_MISC;
7999 }
8000 goto mark;
8001 }
8002
8003 if (is_spilled_reg(&state->stack[spi]) &&
8004 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
8005 env->allow_ptr_leaks)) {
8006 if (clobber) {
8007 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
8008 for (j = 0; j < BPF_REG_SIZE; j++)
8009 scrub_spilled_slot(&state->stack[spi].slot_type[j]);
8010 }
8011 goto mark;
8012 }
8013
8014 if (tnum_is_const(reg->var_off)) {
8015 verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
8016 regno, min_off, i - min_off, access_size);
8017 } else {
8018 char tn_buf[48];
8019
8020 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8021 verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
8022 regno, tn_buf, i - min_off, access_size);
8023 }
8024 return -EACCES;
8025 mark:
8026 /* reading any byte out of 8-byte 'spill_slot' will cause
8027 * the whole slot to be marked as 'read'
8028 */
8029 mark_reg_read(env, &state->stack[spi].spilled_ptr,
8030 state->stack[spi].spilled_ptr.parent,
8031 REG_LIVE_READ64);
8032 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
8033 * be sure that whether stack slot is written to or not. Hence,
8034 * we must still conservatively propagate reads upwards even if
8035 * helper may write to the entire memory range.
8036 */
8037 }
8038 return 0;
8039 }
8040
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)8041 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
8042 int access_size, enum bpf_access_type access_type,
8043 bool zero_size_allowed,
8044 struct bpf_call_arg_meta *meta)
8045 {
8046 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8047 u32 *max_access;
8048
8049 switch (base_type(reg->type)) {
8050 case PTR_TO_PACKET:
8051 case PTR_TO_PACKET_META:
8052 return check_packet_access(env, regno, reg->off, access_size,
8053 zero_size_allowed);
8054 case PTR_TO_MAP_KEY:
8055 if (access_type == BPF_WRITE) {
8056 verbose(env, "R%d cannot write into %s\n", regno,
8057 reg_type_str(env, reg->type));
8058 return -EACCES;
8059 }
8060 return check_mem_region_access(env, regno, reg->off, access_size,
8061 reg->map_ptr->key_size, false);
8062 case PTR_TO_MAP_VALUE:
8063 if (check_map_access_type(env, regno, reg->off, access_size, access_type))
8064 return -EACCES;
8065 return check_map_access(env, regno, reg->off, access_size,
8066 zero_size_allowed, ACCESS_HELPER);
8067 case PTR_TO_MEM:
8068 if (type_is_rdonly_mem(reg->type)) {
8069 if (access_type == BPF_WRITE) {
8070 verbose(env, "R%d cannot write into %s\n", regno,
8071 reg_type_str(env, reg->type));
8072 return -EACCES;
8073 }
8074 }
8075 return check_mem_region_access(env, regno, reg->off,
8076 access_size, reg->mem_size,
8077 zero_size_allowed);
8078 case PTR_TO_BUF:
8079 if (type_is_rdonly_mem(reg->type)) {
8080 if (access_type == BPF_WRITE) {
8081 verbose(env, "R%d cannot write into %s\n", regno,
8082 reg_type_str(env, reg->type));
8083 return -EACCES;
8084 }
8085
8086 max_access = &env->prog->aux->max_rdonly_access;
8087 } else {
8088 max_access = &env->prog->aux->max_rdwr_access;
8089 }
8090 return check_buffer_access(env, reg, regno, reg->off,
8091 access_size, zero_size_allowed,
8092 max_access);
8093 case PTR_TO_STACK:
8094 return check_stack_range_initialized(
8095 env,
8096 regno, reg->off, access_size,
8097 zero_size_allowed, access_type, meta);
8098 case PTR_TO_BTF_ID:
8099 return check_ptr_to_btf_access(env, regs, regno, reg->off,
8100 access_size, BPF_READ, -1);
8101 case PTR_TO_CTX:
8102 /* in case the function doesn't know how to access the context,
8103 * (because we are in a program of type SYSCALL for example), we
8104 * can not statically check its size.
8105 * Dynamically check it now.
8106 */
8107 if (!env->ops->convert_ctx_access) {
8108 int offset = access_size - 1;
8109
8110 /* Allow zero-byte read from PTR_TO_CTX */
8111 if (access_size == 0)
8112 return zero_size_allowed ? 0 : -EACCES;
8113
8114 return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
8115 access_type, -1, false, false);
8116 }
8117
8118 fallthrough;
8119 default: /* scalar_value or invalid ptr */
8120 /* Allow zero-byte read from NULL, regardless of pointer type */
8121 if (zero_size_allowed && access_size == 0 &&
8122 register_is_null(reg))
8123 return 0;
8124
8125 verbose(env, "R%d type=%s ", regno,
8126 reg_type_str(env, reg->type));
8127 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
8128 return -EACCES;
8129 }
8130 }
8131
8132 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
8133 * size.
8134 *
8135 * @regno is the register containing the access size. regno-1 is the register
8136 * containing the pointer.
8137 */
check_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)8138 static int check_mem_size_reg(struct bpf_verifier_env *env,
8139 struct bpf_reg_state *reg, u32 regno,
8140 enum bpf_access_type access_type,
8141 bool zero_size_allowed,
8142 struct bpf_call_arg_meta *meta)
8143 {
8144 int err;
8145
8146 /* This is used to refine r0 return value bounds for helpers
8147 * that enforce this value as an upper bound on return values.
8148 * See do_refine_retval_range() for helpers that can refine
8149 * the return value. C type of helper is u32 so we pull register
8150 * bound from umax_value however, if negative verifier errors
8151 * out. Only upper bounds can be learned because retval is an
8152 * int type and negative retvals are allowed.
8153 */
8154 meta->msize_max_value = reg->umax_value;
8155
8156 /* The register is SCALAR_VALUE; the access check happens using
8157 * its boundaries. For unprivileged variable accesses, disable
8158 * raw mode so that the program is required to initialize all
8159 * the memory that the helper could just partially fill up.
8160 */
8161 if (!tnum_is_const(reg->var_off))
8162 meta = NULL;
8163
8164 if (reg->smin_value < 0) {
8165 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
8166 regno);
8167 return -EACCES;
8168 }
8169
8170 if (reg->umin_value == 0 && !zero_size_allowed) {
8171 verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
8172 regno, reg->umin_value, reg->umax_value);
8173 return -EACCES;
8174 }
8175
8176 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
8177 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
8178 regno);
8179 return -EACCES;
8180 }
8181 err = check_helper_mem_access(env, regno - 1, reg->umax_value,
8182 access_type, zero_size_allowed, meta);
8183 if (!err)
8184 err = mark_chain_precision(env, regno);
8185 return err;
8186 }
8187
check_mem_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,u32 mem_size)8188 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
8189 u32 regno, u32 mem_size)
8190 {
8191 bool may_be_null = type_may_be_null(reg->type);
8192 struct bpf_reg_state saved_reg;
8193 int err;
8194
8195 if (register_is_null(reg))
8196 return 0;
8197
8198 /* Assuming that the register contains a value check if the memory
8199 * access is safe. Temporarily save and restore the register's state as
8200 * the conversion shouldn't be visible to a caller.
8201 */
8202 if (may_be_null) {
8203 saved_reg = *reg;
8204 mark_ptr_not_null_reg(reg);
8205 }
8206
8207 err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
8208 err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
8209
8210 if (may_be_null)
8211 *reg = saved_reg;
8212
8213 return err;
8214 }
8215
check_kfunc_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)8216 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
8217 u32 regno)
8218 {
8219 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
8220 bool may_be_null = type_may_be_null(mem_reg->type);
8221 struct bpf_reg_state saved_reg;
8222 struct bpf_call_arg_meta meta;
8223 int err;
8224
8225 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
8226
8227 memset(&meta, 0, sizeof(meta));
8228
8229 if (may_be_null) {
8230 saved_reg = *mem_reg;
8231 mark_ptr_not_null_reg(mem_reg);
8232 }
8233
8234 err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
8235 err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
8236
8237 if (may_be_null)
8238 *mem_reg = saved_reg;
8239
8240 return err;
8241 }
8242
8243 enum {
8244 PROCESS_SPIN_LOCK = (1 << 0),
8245 PROCESS_RES_LOCK = (1 << 1),
8246 PROCESS_LOCK_IRQ = (1 << 2),
8247 };
8248
8249 /* Implementation details:
8250 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
8251 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
8252 * Two bpf_map_lookups (even with the same key) will have different reg->id.
8253 * Two separate bpf_obj_new will also have different reg->id.
8254 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
8255 * clears reg->id after value_or_null->value transition, since the verifier only
8256 * cares about the range of access to valid map value pointer and doesn't care
8257 * about actual address of the map element.
8258 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
8259 * reg->id > 0 after value_or_null->value transition. By doing so
8260 * two bpf_map_lookups will be considered two different pointers that
8261 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
8262 * returned from bpf_obj_new.
8263 * The verifier allows taking only one bpf_spin_lock at a time to avoid
8264 * dead-locks.
8265 * Since only one bpf_spin_lock is allowed the checks are simpler than
8266 * reg_is_refcounted() logic. The verifier needs to remember only
8267 * one spin_lock instead of array of acquired_refs.
8268 * env->cur_state->active_locks remembers which map value element or allocated
8269 * object got locked and clears it after bpf_spin_unlock.
8270 */
process_spin_lock(struct bpf_verifier_env * env,int regno,int flags)8271 static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
8272 {
8273 bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
8274 const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
8275 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8276 struct bpf_verifier_state *cur = env->cur_state;
8277 bool is_const = tnum_is_const(reg->var_off);
8278 bool is_irq = flags & PROCESS_LOCK_IRQ;
8279 u64 val = reg->var_off.value;
8280 struct bpf_map *map = NULL;
8281 struct btf *btf = NULL;
8282 struct btf_record *rec;
8283 u32 spin_lock_off;
8284 int err;
8285
8286 if (!is_const) {
8287 verbose(env,
8288 "R%d doesn't have constant offset. %s_lock has to be at the constant offset\n",
8289 regno, lock_str);
8290 return -EINVAL;
8291 }
8292 if (reg->type == PTR_TO_MAP_VALUE) {
8293 map = reg->map_ptr;
8294 if (!map->btf) {
8295 verbose(env,
8296 "map '%s' has to have BTF in order to use %s_lock\n",
8297 map->name, lock_str);
8298 return -EINVAL;
8299 }
8300 } else {
8301 btf = reg->btf;
8302 }
8303
8304 rec = reg_btf_record(reg);
8305 if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
8306 verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
8307 map ? map->name : "kptr", lock_str);
8308 return -EINVAL;
8309 }
8310 spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
8311 if (spin_lock_off != val + reg->off) {
8312 verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
8313 val + reg->off, lock_str, spin_lock_off);
8314 return -EINVAL;
8315 }
8316 if (is_lock) {
8317 void *ptr;
8318 int type;
8319
8320 if (map)
8321 ptr = map;
8322 else
8323 ptr = btf;
8324
8325 if (!is_res_lock && cur->active_locks) {
8326 if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
8327 verbose(env,
8328 "Locking two bpf_spin_locks are not allowed\n");
8329 return -EINVAL;
8330 }
8331 } else if (is_res_lock && cur->active_locks) {
8332 if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
8333 verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
8334 return -EINVAL;
8335 }
8336 }
8337
8338 if (is_res_lock && is_irq)
8339 type = REF_TYPE_RES_LOCK_IRQ;
8340 else if (is_res_lock)
8341 type = REF_TYPE_RES_LOCK;
8342 else
8343 type = REF_TYPE_LOCK;
8344 err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
8345 if (err < 0) {
8346 verbose(env, "Failed to acquire lock state\n");
8347 return err;
8348 }
8349 } else {
8350 void *ptr;
8351 int type;
8352
8353 if (map)
8354 ptr = map;
8355 else
8356 ptr = btf;
8357
8358 if (!cur->active_locks) {
8359 verbose(env, "%s_unlock without taking a lock\n", lock_str);
8360 return -EINVAL;
8361 }
8362
8363 if (is_res_lock && is_irq)
8364 type = REF_TYPE_RES_LOCK_IRQ;
8365 else if (is_res_lock)
8366 type = REF_TYPE_RES_LOCK;
8367 else
8368 type = REF_TYPE_LOCK;
8369 if (!find_lock_state(cur, type, reg->id, ptr)) {
8370 verbose(env, "%s_unlock of different lock\n", lock_str);
8371 return -EINVAL;
8372 }
8373 if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
8374 verbose(env, "%s_unlock cannot be out of order\n", lock_str);
8375 return -EINVAL;
8376 }
8377 if (release_lock_state(cur, type, reg->id, ptr)) {
8378 verbose(env, "%s_unlock of different lock\n", lock_str);
8379 return -EINVAL;
8380 }
8381
8382 invalidate_non_owning_refs(env);
8383 }
8384 return 0;
8385 }
8386
process_timer_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8387 static int process_timer_func(struct bpf_verifier_env *env, int regno,
8388 struct bpf_call_arg_meta *meta)
8389 {
8390 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8391 bool is_const = tnum_is_const(reg->var_off);
8392 struct bpf_map *map = reg->map_ptr;
8393 u64 val = reg->var_off.value;
8394
8395 if (!is_const) {
8396 verbose(env,
8397 "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
8398 regno);
8399 return -EINVAL;
8400 }
8401 if (!map->btf) {
8402 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
8403 map->name);
8404 return -EINVAL;
8405 }
8406 if (!btf_record_has_field(map->record, BPF_TIMER)) {
8407 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
8408 return -EINVAL;
8409 }
8410 if (map->record->timer_off != val + reg->off) {
8411 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
8412 val + reg->off, map->record->timer_off);
8413 return -EINVAL;
8414 }
8415 if (meta->map_ptr) {
8416 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
8417 return -EFAULT;
8418 }
8419 meta->map_uid = reg->map_uid;
8420 meta->map_ptr = map;
8421 return 0;
8422 }
8423
process_wq_func(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)8424 static int process_wq_func(struct bpf_verifier_env *env, int regno,
8425 struct bpf_kfunc_call_arg_meta *meta)
8426 {
8427 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8428 struct bpf_map *map = reg->map_ptr;
8429 u64 val = reg->var_off.value;
8430
8431 if (map->record->wq_off != val + reg->off) {
8432 verbose(env, "off %lld doesn't point to 'struct bpf_wq' that is at %d\n",
8433 val + reg->off, map->record->wq_off);
8434 return -EINVAL;
8435 }
8436 meta->map.uid = reg->map_uid;
8437 meta->map.ptr = map;
8438 return 0;
8439 }
8440
process_kptr_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8441 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
8442 struct bpf_call_arg_meta *meta)
8443 {
8444 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8445 struct btf_field *kptr_field;
8446 struct bpf_map *map_ptr;
8447 struct btf_record *rec;
8448 u32 kptr_off;
8449
8450 if (type_is_ptr_alloc_obj(reg->type)) {
8451 rec = reg_btf_record(reg);
8452 } else { /* PTR_TO_MAP_VALUE */
8453 map_ptr = reg->map_ptr;
8454 if (!map_ptr->btf) {
8455 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
8456 map_ptr->name);
8457 return -EINVAL;
8458 }
8459 rec = map_ptr->record;
8460 meta->map_ptr = map_ptr;
8461 }
8462
8463 if (!tnum_is_const(reg->var_off)) {
8464 verbose(env,
8465 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
8466 regno);
8467 return -EINVAL;
8468 }
8469
8470 if (!btf_record_has_field(rec, BPF_KPTR)) {
8471 verbose(env, "R%d has no valid kptr\n", regno);
8472 return -EINVAL;
8473 }
8474
8475 kptr_off = reg->off + reg->var_off.value;
8476 kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
8477 if (!kptr_field) {
8478 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
8479 return -EACCES;
8480 }
8481 if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
8482 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
8483 return -EACCES;
8484 }
8485 meta->kptr_field = kptr_field;
8486 return 0;
8487 }
8488
8489 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
8490 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
8491 *
8492 * In both cases we deal with the first 8 bytes, but need to mark the next 8
8493 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
8494 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
8495 *
8496 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
8497 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
8498 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
8499 * mutate the view of the dynptr and also possibly destroy it. In the latter
8500 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
8501 * memory that dynptr points to.
8502 *
8503 * The verifier will keep track both levels of mutation (bpf_dynptr's in
8504 * reg->type and the memory's in reg->dynptr.type), but there is no support for
8505 * readonly dynptr view yet, hence only the first case is tracked and checked.
8506 *
8507 * This is consistent with how C applies the const modifier to a struct object,
8508 * where the pointer itself inside bpf_dynptr becomes const but not what it
8509 * points to.
8510 *
8511 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
8512 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
8513 */
process_dynptr_func(struct bpf_verifier_env * env,int regno,int insn_idx,enum bpf_arg_type arg_type,int clone_ref_obj_id)8514 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
8515 enum bpf_arg_type arg_type, int clone_ref_obj_id)
8516 {
8517 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8518 int err;
8519
8520 if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
8521 verbose(env,
8522 "arg#%d expected pointer to stack or const struct bpf_dynptr\n",
8523 regno - 1);
8524 return -EINVAL;
8525 }
8526
8527 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
8528 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
8529 */
8530 if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
8531 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
8532 return -EFAULT;
8533 }
8534
8535 /* MEM_UNINIT - Points to memory that is an appropriate candidate for
8536 * constructing a mutable bpf_dynptr object.
8537 *
8538 * Currently, this is only possible with PTR_TO_STACK
8539 * pointing to a region of at least 16 bytes which doesn't
8540 * contain an existing bpf_dynptr.
8541 *
8542 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
8543 * mutated or destroyed. However, the memory it points to
8544 * may be mutated.
8545 *
8546 * None - Points to a initialized dynptr that can be mutated and
8547 * destroyed, including mutation of the memory it points
8548 * to.
8549 */
8550 if (arg_type & MEM_UNINIT) {
8551 int i;
8552
8553 if (!is_dynptr_reg_valid_uninit(env, reg)) {
8554 verbose(env, "Dynptr has to be an uninitialized dynptr\n");
8555 return -EINVAL;
8556 }
8557
8558 /* we write BPF_DW bits (8 bytes) at a time */
8559 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
8560 err = check_mem_access(env, insn_idx, regno,
8561 i, BPF_DW, BPF_WRITE, -1, false, false);
8562 if (err)
8563 return err;
8564 }
8565
8566 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
8567 } else /* MEM_RDONLY and None case from above */ {
8568 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
8569 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
8570 verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
8571 return -EINVAL;
8572 }
8573
8574 if (!is_dynptr_reg_valid_init(env, reg)) {
8575 verbose(env,
8576 "Expected an initialized dynptr as arg #%d\n",
8577 regno - 1);
8578 return -EINVAL;
8579 }
8580
8581 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
8582 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
8583 verbose(env,
8584 "Expected a dynptr of type %s as arg #%d\n",
8585 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
8586 return -EINVAL;
8587 }
8588
8589 err = mark_dynptr_read(env, reg);
8590 }
8591 return err;
8592 }
8593
iter_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi)8594 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
8595 {
8596 struct bpf_func_state *state = func(env, reg);
8597
8598 return state->stack[spi].spilled_ptr.ref_obj_id;
8599 }
8600
is_iter_kfunc(struct bpf_kfunc_call_arg_meta * meta)8601 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8602 {
8603 return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
8604 }
8605
is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta * meta)8606 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8607 {
8608 return meta->kfunc_flags & KF_ITER_NEW;
8609 }
8610
is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta * meta)8611 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8612 {
8613 return meta->kfunc_flags & KF_ITER_NEXT;
8614 }
8615
is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta * meta)8616 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8617 {
8618 return meta->kfunc_flags & KF_ITER_DESTROY;
8619 }
8620
is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta * meta,int arg_idx,const struct btf_param * arg)8621 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
8622 const struct btf_param *arg)
8623 {
8624 /* btf_check_iter_kfuncs() guarantees that first argument of any iter
8625 * kfunc is iter state pointer
8626 */
8627 if (is_iter_kfunc(meta))
8628 return arg_idx == 0;
8629
8630 /* iter passed as an argument to a generic kfunc */
8631 return btf_param_match_suffix(meta->btf, arg, "__iter");
8632 }
8633
process_iter_arg(struct bpf_verifier_env * env,int regno,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)8634 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
8635 struct bpf_kfunc_call_arg_meta *meta)
8636 {
8637 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8638 const struct btf_type *t;
8639 int spi, err, i, nr_slots, btf_id;
8640
8641 if (reg->type != PTR_TO_STACK) {
8642 verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
8643 return -EINVAL;
8644 }
8645
8646 /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
8647 * ensures struct convention, so we wouldn't need to do any BTF
8648 * validation here. But given iter state can be passed as a parameter
8649 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
8650 * conservative here.
8651 */
8652 btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
8653 if (btf_id < 0) {
8654 verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
8655 return -EINVAL;
8656 }
8657 t = btf_type_by_id(meta->btf, btf_id);
8658 nr_slots = t->size / BPF_REG_SIZE;
8659
8660 if (is_iter_new_kfunc(meta)) {
8661 /* bpf_iter_<type>_new() expects pointer to uninit iter state */
8662 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
8663 verbose(env, "expected uninitialized iter_%s as arg #%d\n",
8664 iter_type_str(meta->btf, btf_id), regno - 1);
8665 return -EINVAL;
8666 }
8667
8668 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
8669 err = check_mem_access(env, insn_idx, regno,
8670 i, BPF_DW, BPF_WRITE, -1, false, false);
8671 if (err)
8672 return err;
8673 }
8674
8675 err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
8676 if (err)
8677 return err;
8678 } else {
8679 /* iter_next() or iter_destroy(), as well as any kfunc
8680 * accepting iter argument, expect initialized iter state
8681 */
8682 err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
8683 switch (err) {
8684 case 0:
8685 break;
8686 case -EINVAL:
8687 verbose(env, "expected an initialized iter_%s as arg #%d\n",
8688 iter_type_str(meta->btf, btf_id), regno - 1);
8689 return err;
8690 case -EPROTO:
8691 verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
8692 return err;
8693 default:
8694 return err;
8695 }
8696
8697 spi = iter_get_spi(env, reg, nr_slots);
8698 if (spi < 0)
8699 return spi;
8700
8701 err = mark_iter_read(env, reg, spi, nr_slots);
8702 if (err)
8703 return err;
8704
8705 /* remember meta->iter info for process_iter_next_call() */
8706 meta->iter.spi = spi;
8707 meta->iter.frameno = reg->frameno;
8708 meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
8709
8710 if (is_iter_destroy_kfunc(meta)) {
8711 err = unmark_stack_slots_iter(env, reg, nr_slots);
8712 if (err)
8713 return err;
8714 }
8715 }
8716
8717 return 0;
8718 }
8719
8720 /* Look for a previous loop entry at insn_idx: nearest parent state
8721 * stopped at insn_idx with callsites matching those in cur->frame.
8722 */
find_prev_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_idx)8723 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
8724 struct bpf_verifier_state *cur,
8725 int insn_idx)
8726 {
8727 struct bpf_verifier_state_list *sl;
8728 struct bpf_verifier_state *st;
8729 struct list_head *pos, *head;
8730
8731 /* Explored states are pushed in stack order, most recent states come first */
8732 head = explored_state(env, insn_idx);
8733 list_for_each(pos, head) {
8734 sl = container_of(pos, struct bpf_verifier_state_list, node);
8735 /* If st->branches != 0 state is a part of current DFS verification path,
8736 * hence cur & st for a loop.
8737 */
8738 st = &sl->state;
8739 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
8740 st->dfs_depth < cur->dfs_depth)
8741 return st;
8742 }
8743
8744 return NULL;
8745 }
8746
8747 static void reset_idmap_scratch(struct bpf_verifier_env *env);
8748 static bool regs_exact(const struct bpf_reg_state *rold,
8749 const struct bpf_reg_state *rcur,
8750 struct bpf_idmap *idmap);
8751
maybe_widen_reg(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap)8752 static void maybe_widen_reg(struct bpf_verifier_env *env,
8753 struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8754 struct bpf_idmap *idmap)
8755 {
8756 if (rold->type != SCALAR_VALUE)
8757 return;
8758 if (rold->type != rcur->type)
8759 return;
8760 if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
8761 return;
8762 __mark_reg_unknown(env, rcur);
8763 }
8764
widen_imprecise_scalars(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)8765 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
8766 struct bpf_verifier_state *old,
8767 struct bpf_verifier_state *cur)
8768 {
8769 struct bpf_func_state *fold, *fcur;
8770 int i, fr;
8771
8772 reset_idmap_scratch(env);
8773 for (fr = old->curframe; fr >= 0; fr--) {
8774 fold = old->frame[fr];
8775 fcur = cur->frame[fr];
8776
8777 for (i = 0; i < MAX_BPF_REG; i++)
8778 maybe_widen_reg(env,
8779 &fold->regs[i],
8780 &fcur->regs[i],
8781 &env->idmap_scratch);
8782
8783 for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
8784 if (!is_spilled_reg(&fold->stack[i]) ||
8785 !is_spilled_reg(&fcur->stack[i]))
8786 continue;
8787
8788 maybe_widen_reg(env,
8789 &fold->stack[i].spilled_ptr,
8790 &fcur->stack[i].spilled_ptr,
8791 &env->idmap_scratch);
8792 }
8793 }
8794 return 0;
8795 }
8796
get_iter_from_state(struct bpf_verifier_state * cur_st,struct bpf_kfunc_call_arg_meta * meta)8797 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
8798 struct bpf_kfunc_call_arg_meta *meta)
8799 {
8800 int iter_frameno = meta->iter.frameno;
8801 int iter_spi = meta->iter.spi;
8802
8803 return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
8804 }
8805
8806 /* process_iter_next_call() is called when verifier gets to iterator's next
8807 * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
8808 * to it as just "iter_next()" in comments below.
8809 *
8810 * BPF verifier relies on a crucial contract for any iter_next()
8811 * implementation: it should *eventually* return NULL, and once that happens
8812 * it should keep returning NULL. That is, once iterator exhausts elements to
8813 * iterate, it should never reset or spuriously return new elements.
8814 *
8815 * With the assumption of such contract, process_iter_next_call() simulates
8816 * a fork in the verifier state to validate loop logic correctness and safety
8817 * without having to simulate infinite amount of iterations.
8818 *
8819 * In current state, we first assume that iter_next() returned NULL and
8820 * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
8821 * conditions we should not form an infinite loop and should eventually reach
8822 * exit.
8823 *
8824 * Besides that, we also fork current state and enqueue it for later
8825 * verification. In a forked state we keep iterator state as ACTIVE
8826 * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
8827 * also bump iteration depth to prevent erroneous infinite loop detection
8828 * later on (see iter_active_depths_differ() comment for details). In this
8829 * state we assume that we'll eventually loop back to another iter_next()
8830 * calls (it could be in exactly same location or in some other instruction,
8831 * it doesn't matter, we don't make any unnecessary assumptions about this,
8832 * everything revolves around iterator state in a stack slot, not which
8833 * instruction is calling iter_next()). When that happens, we either will come
8834 * to iter_next() with equivalent state and can conclude that next iteration
8835 * will proceed in exactly the same way as we just verified, so it's safe to
8836 * assume that loop converges. If not, we'll go on another iteration
8837 * simulation with a different input state, until all possible starting states
8838 * are validated or we reach maximum number of instructions limit.
8839 *
8840 * This way, we will either exhaustively discover all possible input states
8841 * that iterator loop can start with and eventually will converge, or we'll
8842 * effectively regress into bounded loop simulation logic and either reach
8843 * maximum number of instructions if loop is not provably convergent, or there
8844 * is some statically known limit on number of iterations (e.g., if there is
8845 * an explicit `if n > 100 then break;` statement somewhere in the loop).
8846 *
8847 * Iteration convergence logic in is_state_visited() relies on exact
8848 * states comparison, which ignores read and precision marks.
8849 * This is necessary because read and precision marks are not finalized
8850 * while in the loop. Exact comparison might preclude convergence for
8851 * simple programs like below:
8852 *
8853 * i = 0;
8854 * while(iter_next(&it))
8855 * i++;
8856 *
8857 * At each iteration step i++ would produce a new distinct state and
8858 * eventually instruction processing limit would be reached.
8859 *
8860 * To avoid such behavior speculatively forget (widen) range for
8861 * imprecise scalar registers, if those registers were not precise at the
8862 * end of the previous iteration and do not match exactly.
8863 *
8864 * This is a conservative heuristic that allows to verify wide range of programs,
8865 * however it precludes verification of programs that conjure an
8866 * imprecise value on the first loop iteration and use it as precise on a second.
8867 * For example, the following safe program would fail to verify:
8868 *
8869 * struct bpf_num_iter it;
8870 * int arr[10];
8871 * int i = 0, a = 0;
8872 * bpf_iter_num_new(&it, 0, 10);
8873 * while (bpf_iter_num_next(&it)) {
8874 * if (a == 0) {
8875 * a = 1;
8876 * i = 7; // Because i changed verifier would forget
8877 * // it's range on second loop entry.
8878 * } else {
8879 * arr[i] = 42; // This would fail to verify.
8880 * }
8881 * }
8882 * bpf_iter_num_destroy(&it);
8883 */
process_iter_next_call(struct bpf_verifier_env * env,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)8884 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
8885 struct bpf_kfunc_call_arg_meta *meta)
8886 {
8887 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
8888 struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
8889 struct bpf_reg_state *cur_iter, *queued_iter;
8890
8891 BTF_TYPE_EMIT(struct bpf_iter);
8892
8893 cur_iter = get_iter_from_state(cur_st, meta);
8894
8895 if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
8896 cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
8897 verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
8898 cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
8899 return -EFAULT;
8900 }
8901
8902 if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
8903 /* Because iter_next() call is a checkpoint is_state_visitied()
8904 * should guarantee parent state with same call sites and insn_idx.
8905 */
8906 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
8907 !same_callsites(cur_st->parent, cur_st)) {
8908 verbose(env, "bug: bad parent state for iter next call");
8909 return -EFAULT;
8910 }
8911 /* Note cur_st->parent in the call below, it is necessary to skip
8912 * checkpoint created for cur_st by is_state_visited()
8913 * right at this instruction.
8914 */
8915 prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
8916 /* branch out active iter state */
8917 queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
8918 if (!queued_st)
8919 return -ENOMEM;
8920
8921 queued_iter = get_iter_from_state(queued_st, meta);
8922 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
8923 queued_iter->iter.depth++;
8924 if (prev_st)
8925 widen_imprecise_scalars(env, prev_st, queued_st);
8926
8927 queued_fr = queued_st->frame[queued_st->curframe];
8928 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
8929 }
8930
8931 /* switch to DRAINED state, but keep the depth unchanged */
8932 /* mark current iter state as drained and assume returned NULL */
8933 cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
8934 __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
8935
8936 return 0;
8937 }
8938
arg_type_is_mem_size(enum bpf_arg_type type)8939 static bool arg_type_is_mem_size(enum bpf_arg_type type)
8940 {
8941 return type == ARG_CONST_SIZE ||
8942 type == ARG_CONST_SIZE_OR_ZERO;
8943 }
8944
arg_type_is_raw_mem(enum bpf_arg_type type)8945 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
8946 {
8947 return base_type(type) == ARG_PTR_TO_MEM &&
8948 type & MEM_UNINIT;
8949 }
8950
arg_type_is_release(enum bpf_arg_type type)8951 static bool arg_type_is_release(enum bpf_arg_type type)
8952 {
8953 return type & OBJ_RELEASE;
8954 }
8955
arg_type_is_dynptr(enum bpf_arg_type type)8956 static bool arg_type_is_dynptr(enum bpf_arg_type type)
8957 {
8958 return base_type(type) == ARG_PTR_TO_DYNPTR;
8959 }
8960
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)8961 static int resolve_map_arg_type(struct bpf_verifier_env *env,
8962 const struct bpf_call_arg_meta *meta,
8963 enum bpf_arg_type *arg_type)
8964 {
8965 if (!meta->map_ptr) {
8966 /* kernel subsystem misconfigured verifier */
8967 verbose(env, "invalid map_ptr to access map->type\n");
8968 return -EACCES;
8969 }
8970
8971 switch (meta->map_ptr->map_type) {
8972 case BPF_MAP_TYPE_SOCKMAP:
8973 case BPF_MAP_TYPE_SOCKHASH:
8974 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
8975 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
8976 } else {
8977 verbose(env, "invalid arg_type for sockmap/sockhash\n");
8978 return -EINVAL;
8979 }
8980 break;
8981 case BPF_MAP_TYPE_BLOOM_FILTER:
8982 if (meta->func_id == BPF_FUNC_map_peek_elem)
8983 *arg_type = ARG_PTR_TO_MAP_VALUE;
8984 break;
8985 default:
8986 break;
8987 }
8988 return 0;
8989 }
8990
8991 struct bpf_reg_types {
8992 const enum bpf_reg_type types[10];
8993 u32 *btf_id;
8994 };
8995
8996 static const struct bpf_reg_types sock_types = {
8997 .types = {
8998 PTR_TO_SOCK_COMMON,
8999 PTR_TO_SOCKET,
9000 PTR_TO_TCP_SOCK,
9001 PTR_TO_XDP_SOCK,
9002 },
9003 };
9004
9005 #ifdef CONFIG_NET
9006 static const struct bpf_reg_types btf_id_sock_common_types = {
9007 .types = {
9008 PTR_TO_SOCK_COMMON,
9009 PTR_TO_SOCKET,
9010 PTR_TO_TCP_SOCK,
9011 PTR_TO_XDP_SOCK,
9012 PTR_TO_BTF_ID,
9013 PTR_TO_BTF_ID | PTR_TRUSTED,
9014 },
9015 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
9016 };
9017 #endif
9018
9019 static const struct bpf_reg_types mem_types = {
9020 .types = {
9021 PTR_TO_STACK,
9022 PTR_TO_PACKET,
9023 PTR_TO_PACKET_META,
9024 PTR_TO_MAP_KEY,
9025 PTR_TO_MAP_VALUE,
9026 PTR_TO_MEM,
9027 PTR_TO_MEM | MEM_RINGBUF,
9028 PTR_TO_BUF,
9029 PTR_TO_BTF_ID | PTR_TRUSTED,
9030 },
9031 };
9032
9033 static const struct bpf_reg_types spin_lock_types = {
9034 .types = {
9035 PTR_TO_MAP_VALUE,
9036 PTR_TO_BTF_ID | MEM_ALLOC,
9037 }
9038 };
9039
9040 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
9041 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
9042 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
9043 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
9044 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
9045 static const struct bpf_reg_types btf_ptr_types = {
9046 .types = {
9047 PTR_TO_BTF_ID,
9048 PTR_TO_BTF_ID | PTR_TRUSTED,
9049 PTR_TO_BTF_ID | MEM_RCU,
9050 },
9051 };
9052 static const struct bpf_reg_types percpu_btf_ptr_types = {
9053 .types = {
9054 PTR_TO_BTF_ID | MEM_PERCPU,
9055 PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
9056 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
9057 }
9058 };
9059 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
9060 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
9061 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
9062 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
9063 static const struct bpf_reg_types kptr_xchg_dest_types = {
9064 .types = {
9065 PTR_TO_MAP_VALUE,
9066 PTR_TO_BTF_ID | MEM_ALLOC
9067 }
9068 };
9069 static const struct bpf_reg_types dynptr_types = {
9070 .types = {
9071 PTR_TO_STACK,
9072 CONST_PTR_TO_DYNPTR,
9073 }
9074 };
9075
9076 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
9077 [ARG_PTR_TO_MAP_KEY] = &mem_types,
9078 [ARG_PTR_TO_MAP_VALUE] = &mem_types,
9079 [ARG_CONST_SIZE] = &scalar_types,
9080 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
9081 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
9082 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
9083 [ARG_PTR_TO_CTX] = &context_types,
9084 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
9085 #ifdef CONFIG_NET
9086 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
9087 #endif
9088 [ARG_PTR_TO_SOCKET] = &fullsock_types,
9089 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
9090 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
9091 [ARG_PTR_TO_MEM] = &mem_types,
9092 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
9093 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
9094 [ARG_PTR_TO_FUNC] = &func_ptr_types,
9095 [ARG_PTR_TO_STACK] = &stack_ptr_types,
9096 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
9097 [ARG_PTR_TO_TIMER] = &timer_types,
9098 [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types,
9099 [ARG_PTR_TO_DYNPTR] = &dynptr_types,
9100 };
9101
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id,struct bpf_call_arg_meta * meta)9102 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
9103 enum bpf_arg_type arg_type,
9104 const u32 *arg_btf_id,
9105 struct bpf_call_arg_meta *meta)
9106 {
9107 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
9108 enum bpf_reg_type expected, type = reg->type;
9109 const struct bpf_reg_types *compatible;
9110 int i, j;
9111
9112 compatible = compatible_reg_types[base_type(arg_type)];
9113 if (!compatible) {
9114 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
9115 return -EFAULT;
9116 }
9117
9118 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
9119 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
9120 *
9121 * Same for MAYBE_NULL:
9122 *
9123 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
9124 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
9125 *
9126 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
9127 *
9128 * Therefore we fold these flags depending on the arg_type before comparison.
9129 */
9130 if (arg_type & MEM_RDONLY)
9131 type &= ~MEM_RDONLY;
9132 if (arg_type & PTR_MAYBE_NULL)
9133 type &= ~PTR_MAYBE_NULL;
9134 if (base_type(arg_type) == ARG_PTR_TO_MEM)
9135 type &= ~DYNPTR_TYPE_FLAG_MASK;
9136
9137 /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
9138 if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
9139 type &= ~MEM_ALLOC;
9140 type &= ~MEM_PERCPU;
9141 }
9142
9143 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
9144 expected = compatible->types[i];
9145 if (expected == NOT_INIT)
9146 break;
9147
9148 if (type == expected)
9149 goto found;
9150 }
9151
9152 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
9153 for (j = 0; j + 1 < i; j++)
9154 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
9155 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
9156 return -EACCES;
9157
9158 found:
9159 if (base_type(reg->type) != PTR_TO_BTF_ID)
9160 return 0;
9161
9162 if (compatible == &mem_types) {
9163 if (!(arg_type & MEM_RDONLY)) {
9164 verbose(env,
9165 "%s() may write into memory pointed by R%d type=%s\n",
9166 func_id_name(meta->func_id),
9167 regno, reg_type_str(env, reg->type));
9168 return -EACCES;
9169 }
9170 return 0;
9171 }
9172
9173 switch ((int)reg->type) {
9174 case PTR_TO_BTF_ID:
9175 case PTR_TO_BTF_ID | PTR_TRUSTED:
9176 case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
9177 case PTR_TO_BTF_ID | MEM_RCU:
9178 case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
9179 case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
9180 {
9181 /* For bpf_sk_release, it needs to match against first member
9182 * 'struct sock_common', hence make an exception for it. This
9183 * allows bpf_sk_release to work for multiple socket types.
9184 */
9185 bool strict_type_match = arg_type_is_release(arg_type) &&
9186 meta->func_id != BPF_FUNC_sk_release;
9187
9188 if (type_may_be_null(reg->type) &&
9189 (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
9190 verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
9191 return -EACCES;
9192 }
9193
9194 if (!arg_btf_id) {
9195 if (!compatible->btf_id) {
9196 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
9197 return -EFAULT;
9198 }
9199 arg_btf_id = compatible->btf_id;
9200 }
9201
9202 if (meta->func_id == BPF_FUNC_kptr_xchg) {
9203 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
9204 return -EACCES;
9205 } else {
9206 if (arg_btf_id == BPF_PTR_POISON) {
9207 verbose(env, "verifier internal error:");
9208 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
9209 regno);
9210 return -EACCES;
9211 }
9212
9213 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
9214 btf_vmlinux, *arg_btf_id,
9215 strict_type_match)) {
9216 verbose(env, "R%d is of type %s but %s is expected\n",
9217 regno, btf_type_name(reg->btf, reg->btf_id),
9218 btf_type_name(btf_vmlinux, *arg_btf_id));
9219 return -EACCES;
9220 }
9221 }
9222 break;
9223 }
9224 case PTR_TO_BTF_ID | MEM_ALLOC:
9225 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
9226 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
9227 meta->func_id != BPF_FUNC_kptr_xchg) {
9228 verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
9229 return -EFAULT;
9230 }
9231 /* Check if local kptr in src arg matches kptr in dst arg */
9232 if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
9233 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
9234 return -EACCES;
9235 }
9236 break;
9237 case PTR_TO_BTF_ID | MEM_PERCPU:
9238 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
9239 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
9240 /* Handled by helper specific checks */
9241 break;
9242 default:
9243 verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
9244 return -EFAULT;
9245 }
9246 return 0;
9247 }
9248
9249 static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state * reg,s32 off,u32 fields)9250 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
9251 {
9252 struct btf_field *field;
9253 struct btf_record *rec;
9254
9255 rec = reg_btf_record(reg);
9256 if (!rec)
9257 return NULL;
9258
9259 field = btf_record_find(rec, off, fields);
9260 if (!field)
9261 return NULL;
9262
9263 return field;
9264 }
9265
check_func_arg_reg_off(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,enum bpf_arg_type arg_type)9266 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
9267 const struct bpf_reg_state *reg, int regno,
9268 enum bpf_arg_type arg_type)
9269 {
9270 u32 type = reg->type;
9271
9272 /* When referenced register is passed to release function, its fixed
9273 * offset must be 0.
9274 *
9275 * We will check arg_type_is_release reg has ref_obj_id when storing
9276 * meta->release_regno.
9277 */
9278 if (arg_type_is_release(arg_type)) {
9279 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
9280 * may not directly point to the object being released, but to
9281 * dynptr pointing to such object, which might be at some offset
9282 * on the stack. In that case, we simply to fallback to the
9283 * default handling.
9284 */
9285 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
9286 return 0;
9287
9288 /* Doing check_ptr_off_reg check for the offset will catch this
9289 * because fixed_off_ok is false, but checking here allows us
9290 * to give the user a better error message.
9291 */
9292 if (reg->off) {
9293 verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
9294 regno);
9295 return -EINVAL;
9296 }
9297 return __check_ptr_off_reg(env, reg, regno, false);
9298 }
9299
9300 switch (type) {
9301 /* Pointer types where both fixed and variable offset is explicitly allowed: */
9302 case PTR_TO_STACK:
9303 case PTR_TO_PACKET:
9304 case PTR_TO_PACKET_META:
9305 case PTR_TO_MAP_KEY:
9306 case PTR_TO_MAP_VALUE:
9307 case PTR_TO_MEM:
9308 case PTR_TO_MEM | MEM_RDONLY:
9309 case PTR_TO_MEM | MEM_RINGBUF:
9310 case PTR_TO_BUF:
9311 case PTR_TO_BUF | MEM_RDONLY:
9312 case PTR_TO_ARENA:
9313 case SCALAR_VALUE:
9314 return 0;
9315 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
9316 * fixed offset.
9317 */
9318 case PTR_TO_BTF_ID:
9319 case PTR_TO_BTF_ID | MEM_ALLOC:
9320 case PTR_TO_BTF_ID | PTR_TRUSTED:
9321 case PTR_TO_BTF_ID | MEM_RCU:
9322 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
9323 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
9324 /* When referenced PTR_TO_BTF_ID is passed to release function,
9325 * its fixed offset must be 0. In the other cases, fixed offset
9326 * can be non-zero. This was already checked above. So pass
9327 * fixed_off_ok as true to allow fixed offset for all other
9328 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
9329 * still need to do checks instead of returning.
9330 */
9331 return __check_ptr_off_reg(env, reg, regno, true);
9332 default:
9333 return __check_ptr_off_reg(env, reg, regno, false);
9334 }
9335 }
9336
get_dynptr_arg_reg(struct bpf_verifier_env * env,const struct bpf_func_proto * fn,struct bpf_reg_state * regs)9337 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
9338 const struct bpf_func_proto *fn,
9339 struct bpf_reg_state *regs)
9340 {
9341 struct bpf_reg_state *state = NULL;
9342 int i;
9343
9344 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
9345 if (arg_type_is_dynptr(fn->arg_type[i])) {
9346 if (state) {
9347 verbose(env, "verifier internal error: multiple dynptr args\n");
9348 return NULL;
9349 }
9350 state = ®s[BPF_REG_1 + i];
9351 }
9352
9353 if (!state)
9354 verbose(env, "verifier internal error: no dynptr arg found\n");
9355
9356 return state;
9357 }
9358
dynptr_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9359 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9360 {
9361 struct bpf_func_state *state = func(env, reg);
9362 int spi;
9363
9364 if (reg->type == CONST_PTR_TO_DYNPTR)
9365 return reg->id;
9366 spi = dynptr_get_spi(env, reg);
9367 if (spi < 0)
9368 return spi;
9369 return state->stack[spi].spilled_ptr.id;
9370 }
9371
dynptr_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9372 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9373 {
9374 struct bpf_func_state *state = func(env, reg);
9375 int spi;
9376
9377 if (reg->type == CONST_PTR_TO_DYNPTR)
9378 return reg->ref_obj_id;
9379 spi = dynptr_get_spi(env, reg);
9380 if (spi < 0)
9381 return spi;
9382 return state->stack[spi].spilled_ptr.ref_obj_id;
9383 }
9384
dynptr_get_type(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9385 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
9386 struct bpf_reg_state *reg)
9387 {
9388 struct bpf_func_state *state = func(env, reg);
9389 int spi;
9390
9391 if (reg->type == CONST_PTR_TO_DYNPTR)
9392 return reg->dynptr.type;
9393
9394 spi = __get_spi(reg->off);
9395 if (spi < 0) {
9396 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
9397 return BPF_DYNPTR_TYPE_INVALID;
9398 }
9399
9400 return state->stack[spi].spilled_ptr.dynptr.type;
9401 }
9402
check_reg_const_str(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)9403 static int check_reg_const_str(struct bpf_verifier_env *env,
9404 struct bpf_reg_state *reg, u32 regno)
9405 {
9406 struct bpf_map *map = reg->map_ptr;
9407 int err;
9408 int map_off;
9409 u64 map_addr;
9410 char *str_ptr;
9411
9412 if (reg->type != PTR_TO_MAP_VALUE)
9413 return -EINVAL;
9414
9415 if (!bpf_map_is_rdonly(map)) {
9416 verbose(env, "R%d does not point to a readonly map'\n", regno);
9417 return -EACCES;
9418 }
9419
9420 if (!tnum_is_const(reg->var_off)) {
9421 verbose(env, "R%d is not a constant address'\n", regno);
9422 return -EACCES;
9423 }
9424
9425 if (!map->ops->map_direct_value_addr) {
9426 verbose(env, "no direct value access support for this map type\n");
9427 return -EACCES;
9428 }
9429
9430 err = check_map_access(env, regno, reg->off,
9431 map->value_size - reg->off, false,
9432 ACCESS_HELPER);
9433 if (err)
9434 return err;
9435
9436 map_off = reg->off + reg->var_off.value;
9437 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
9438 if (err) {
9439 verbose(env, "direct value access on string failed\n");
9440 return err;
9441 }
9442
9443 str_ptr = (char *)(long)(map_addr);
9444 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
9445 verbose(env, "string is not zero-terminated\n");
9446 return -EINVAL;
9447 }
9448 return 0;
9449 }
9450
9451 /* Returns constant key value in `value` if possible, else negative error */
get_constant_map_key(struct bpf_verifier_env * env,struct bpf_reg_state * key,u32 key_size,s64 * value)9452 static int get_constant_map_key(struct bpf_verifier_env *env,
9453 struct bpf_reg_state *key,
9454 u32 key_size,
9455 s64 *value)
9456 {
9457 struct bpf_func_state *state = func(env, key);
9458 struct bpf_reg_state *reg;
9459 int slot, spi, off;
9460 int spill_size = 0;
9461 int zero_size = 0;
9462 int stack_off;
9463 int i, err;
9464 u8 *stype;
9465
9466 if (!env->bpf_capable)
9467 return -EOPNOTSUPP;
9468 if (key->type != PTR_TO_STACK)
9469 return -EOPNOTSUPP;
9470 if (!tnum_is_const(key->var_off))
9471 return -EOPNOTSUPP;
9472
9473 stack_off = key->off + key->var_off.value;
9474 slot = -stack_off - 1;
9475 spi = slot / BPF_REG_SIZE;
9476 off = slot % BPF_REG_SIZE;
9477 stype = state->stack[spi].slot_type;
9478
9479 /* First handle precisely tracked STACK_ZERO */
9480 for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
9481 zero_size++;
9482 if (zero_size >= key_size) {
9483 *value = 0;
9484 return 0;
9485 }
9486
9487 /* Check that stack contains a scalar spill of expected size */
9488 if (!is_spilled_scalar_reg(&state->stack[spi]))
9489 return -EOPNOTSUPP;
9490 for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
9491 spill_size++;
9492 if (spill_size != key_size)
9493 return -EOPNOTSUPP;
9494
9495 reg = &state->stack[spi].spilled_ptr;
9496 if (!tnum_is_const(reg->var_off))
9497 /* Stack value not statically known */
9498 return -EOPNOTSUPP;
9499
9500 /* We are relying on a constant value. So mark as precise
9501 * to prevent pruning on it.
9502 */
9503 bt_set_frame_slot(&env->bt, key->frameno, spi);
9504 err = mark_chain_precision_batch(env);
9505 if (err < 0)
9506 return err;
9507
9508 *value = reg->var_off.value;
9509 return 0;
9510 }
9511
9512 static bool can_elide_value_nullness(enum bpf_map_type type);
9513
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn,int insn_idx)9514 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
9515 struct bpf_call_arg_meta *meta,
9516 const struct bpf_func_proto *fn,
9517 int insn_idx)
9518 {
9519 u32 regno = BPF_REG_1 + arg;
9520 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
9521 enum bpf_arg_type arg_type = fn->arg_type[arg];
9522 enum bpf_reg_type type = reg->type;
9523 u32 *arg_btf_id = NULL;
9524 u32 key_size;
9525 int err = 0;
9526
9527 if (arg_type == ARG_DONTCARE)
9528 return 0;
9529
9530 err = check_reg_arg(env, regno, SRC_OP);
9531 if (err)
9532 return err;
9533
9534 if (arg_type == ARG_ANYTHING) {
9535 if (is_pointer_value(env, regno)) {
9536 verbose(env, "R%d leaks addr into helper function\n",
9537 regno);
9538 return -EACCES;
9539 }
9540 return 0;
9541 }
9542
9543 if (type_is_pkt_pointer(type) &&
9544 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
9545 verbose(env, "helper access to the packet is not allowed\n");
9546 return -EACCES;
9547 }
9548
9549 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
9550 err = resolve_map_arg_type(env, meta, &arg_type);
9551 if (err)
9552 return err;
9553 }
9554
9555 if (register_is_null(reg) && type_may_be_null(arg_type))
9556 /* A NULL register has a SCALAR_VALUE type, so skip
9557 * type checking.
9558 */
9559 goto skip_type_check;
9560
9561 /* arg_btf_id and arg_size are in a union. */
9562 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
9563 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
9564 arg_btf_id = fn->arg_btf_id[arg];
9565
9566 err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
9567 if (err)
9568 return err;
9569
9570 err = check_func_arg_reg_off(env, reg, regno, arg_type);
9571 if (err)
9572 return err;
9573
9574 skip_type_check:
9575 if (arg_type_is_release(arg_type)) {
9576 if (arg_type_is_dynptr(arg_type)) {
9577 struct bpf_func_state *state = func(env, reg);
9578 int spi;
9579
9580 /* Only dynptr created on stack can be released, thus
9581 * the get_spi and stack state checks for spilled_ptr
9582 * should only be done before process_dynptr_func for
9583 * PTR_TO_STACK.
9584 */
9585 if (reg->type == PTR_TO_STACK) {
9586 spi = dynptr_get_spi(env, reg);
9587 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
9588 verbose(env, "arg %d is an unacquired reference\n", regno);
9589 return -EINVAL;
9590 }
9591 } else {
9592 verbose(env, "cannot release unowned const bpf_dynptr\n");
9593 return -EINVAL;
9594 }
9595 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
9596 verbose(env, "R%d must be referenced when passed to release function\n",
9597 regno);
9598 return -EINVAL;
9599 }
9600 if (meta->release_regno) {
9601 verbose(env, "verifier internal error: more than one release argument\n");
9602 return -EFAULT;
9603 }
9604 meta->release_regno = regno;
9605 }
9606
9607 if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
9608 if (meta->ref_obj_id) {
9609 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
9610 regno, reg->ref_obj_id,
9611 meta->ref_obj_id);
9612 return -EFAULT;
9613 }
9614 meta->ref_obj_id = reg->ref_obj_id;
9615 }
9616
9617 switch (base_type(arg_type)) {
9618 case ARG_CONST_MAP_PTR:
9619 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
9620 if (meta->map_ptr) {
9621 /* Use map_uid (which is unique id of inner map) to reject:
9622 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
9623 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
9624 * if (inner_map1 && inner_map2) {
9625 * timer = bpf_map_lookup_elem(inner_map1);
9626 * if (timer)
9627 * // mismatch would have been allowed
9628 * bpf_timer_init(timer, inner_map2);
9629 * }
9630 *
9631 * Comparing map_ptr is enough to distinguish normal and outer maps.
9632 */
9633 if (meta->map_ptr != reg->map_ptr ||
9634 meta->map_uid != reg->map_uid) {
9635 verbose(env,
9636 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
9637 meta->map_uid, reg->map_uid);
9638 return -EINVAL;
9639 }
9640 }
9641 meta->map_ptr = reg->map_ptr;
9642 meta->map_uid = reg->map_uid;
9643 break;
9644 case ARG_PTR_TO_MAP_KEY:
9645 /* bpf_map_xxx(..., map_ptr, ..., key) call:
9646 * check that [key, key + map->key_size) are within
9647 * stack limits and initialized
9648 */
9649 if (!meta->map_ptr) {
9650 /* in function declaration map_ptr must come before
9651 * map_key, so that it's verified and known before
9652 * we have to check map_key here. Otherwise it means
9653 * that kernel subsystem misconfigured verifier
9654 */
9655 verbose(env, "invalid map_ptr to access map->key\n");
9656 return -EACCES;
9657 }
9658 key_size = meta->map_ptr->key_size;
9659 err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
9660 if (err)
9661 return err;
9662 if (can_elide_value_nullness(meta->map_ptr->map_type)) {
9663 err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
9664 if (err < 0) {
9665 meta->const_map_key = -1;
9666 if (err == -EOPNOTSUPP)
9667 err = 0;
9668 else
9669 return err;
9670 }
9671 }
9672 break;
9673 case ARG_PTR_TO_MAP_VALUE:
9674 if (type_may_be_null(arg_type) && register_is_null(reg))
9675 return 0;
9676
9677 /* bpf_map_xxx(..., map_ptr, ..., value) call:
9678 * check [value, value + map->value_size) validity
9679 */
9680 if (!meta->map_ptr) {
9681 /* kernel subsystem misconfigured verifier */
9682 verbose(env, "invalid map_ptr to access map->value\n");
9683 return -EACCES;
9684 }
9685 meta->raw_mode = arg_type & MEM_UNINIT;
9686 err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
9687 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
9688 false, meta);
9689 break;
9690 case ARG_PTR_TO_PERCPU_BTF_ID:
9691 if (!reg->btf_id) {
9692 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
9693 return -EACCES;
9694 }
9695 meta->ret_btf = reg->btf;
9696 meta->ret_btf_id = reg->btf_id;
9697 break;
9698 case ARG_PTR_TO_SPIN_LOCK:
9699 if (in_rbtree_lock_required_cb(env)) {
9700 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
9701 return -EACCES;
9702 }
9703 if (meta->func_id == BPF_FUNC_spin_lock) {
9704 err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK);
9705 if (err)
9706 return err;
9707 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
9708 err = process_spin_lock(env, regno, 0);
9709 if (err)
9710 return err;
9711 } else {
9712 verbose(env, "verifier internal error\n");
9713 return -EFAULT;
9714 }
9715 break;
9716 case ARG_PTR_TO_TIMER:
9717 err = process_timer_func(env, regno, meta);
9718 if (err)
9719 return err;
9720 break;
9721 case ARG_PTR_TO_FUNC:
9722 meta->subprogno = reg->subprogno;
9723 break;
9724 case ARG_PTR_TO_MEM:
9725 /* The access to this pointer is only checked when we hit the
9726 * next is_mem_size argument below.
9727 */
9728 meta->raw_mode = arg_type & MEM_UNINIT;
9729 if (arg_type & MEM_FIXED_SIZE) {
9730 err = check_helper_mem_access(env, regno, fn->arg_size[arg],
9731 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
9732 false, meta);
9733 if (err)
9734 return err;
9735 if (arg_type & MEM_ALIGNED)
9736 err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
9737 }
9738 break;
9739 case ARG_CONST_SIZE:
9740 err = check_mem_size_reg(env, reg, regno,
9741 fn->arg_type[arg - 1] & MEM_WRITE ?
9742 BPF_WRITE : BPF_READ,
9743 false, meta);
9744 break;
9745 case ARG_CONST_SIZE_OR_ZERO:
9746 err = check_mem_size_reg(env, reg, regno,
9747 fn->arg_type[arg - 1] & MEM_WRITE ?
9748 BPF_WRITE : BPF_READ,
9749 true, meta);
9750 break;
9751 case ARG_PTR_TO_DYNPTR:
9752 err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
9753 if (err)
9754 return err;
9755 break;
9756 case ARG_CONST_ALLOC_SIZE_OR_ZERO:
9757 if (!tnum_is_const(reg->var_off)) {
9758 verbose(env, "R%d is not a known constant'\n",
9759 regno);
9760 return -EACCES;
9761 }
9762 meta->mem_size = reg->var_off.value;
9763 err = mark_chain_precision(env, regno);
9764 if (err)
9765 return err;
9766 break;
9767 case ARG_PTR_TO_CONST_STR:
9768 {
9769 err = check_reg_const_str(env, reg, regno);
9770 if (err)
9771 return err;
9772 break;
9773 }
9774 case ARG_KPTR_XCHG_DEST:
9775 err = process_kptr_func(env, regno, meta);
9776 if (err)
9777 return err;
9778 break;
9779 }
9780
9781 return err;
9782 }
9783
may_update_sockmap(struct bpf_verifier_env * env,int func_id)9784 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
9785 {
9786 enum bpf_attach_type eatype = env->prog->expected_attach_type;
9787 enum bpf_prog_type type = resolve_prog_type(env->prog);
9788
9789 if (func_id != BPF_FUNC_map_update_elem &&
9790 func_id != BPF_FUNC_map_delete_elem)
9791 return false;
9792
9793 /* It's not possible to get access to a locked struct sock in these
9794 * contexts, so updating is safe.
9795 */
9796 switch (type) {
9797 case BPF_PROG_TYPE_TRACING:
9798 if (eatype == BPF_TRACE_ITER)
9799 return true;
9800 break;
9801 case BPF_PROG_TYPE_SOCK_OPS:
9802 /* map_update allowed only via dedicated helpers with event type checks */
9803 if (func_id == BPF_FUNC_map_delete_elem)
9804 return true;
9805 break;
9806 case BPF_PROG_TYPE_SOCKET_FILTER:
9807 case BPF_PROG_TYPE_SCHED_CLS:
9808 case BPF_PROG_TYPE_SCHED_ACT:
9809 case BPF_PROG_TYPE_XDP:
9810 case BPF_PROG_TYPE_SK_REUSEPORT:
9811 case BPF_PROG_TYPE_FLOW_DISSECTOR:
9812 case BPF_PROG_TYPE_SK_LOOKUP:
9813 return true;
9814 default:
9815 break;
9816 }
9817
9818 verbose(env, "cannot update sockmap in this context\n");
9819 return false;
9820 }
9821
allow_tail_call_in_subprogs(struct bpf_verifier_env * env)9822 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
9823 {
9824 return env->prog->jit_requested &&
9825 bpf_jit_supports_subprog_tailcalls();
9826 }
9827
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)9828 static int check_map_func_compatibility(struct bpf_verifier_env *env,
9829 struct bpf_map *map, int func_id)
9830 {
9831 if (!map)
9832 return 0;
9833
9834 /* We need a two way check, first is from map perspective ... */
9835 switch (map->map_type) {
9836 case BPF_MAP_TYPE_PROG_ARRAY:
9837 if (func_id != BPF_FUNC_tail_call)
9838 goto error;
9839 break;
9840 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
9841 if (func_id != BPF_FUNC_perf_event_read &&
9842 func_id != BPF_FUNC_perf_event_output &&
9843 func_id != BPF_FUNC_skb_output &&
9844 func_id != BPF_FUNC_perf_event_read_value &&
9845 func_id != BPF_FUNC_xdp_output)
9846 goto error;
9847 break;
9848 case BPF_MAP_TYPE_RINGBUF:
9849 if (func_id != BPF_FUNC_ringbuf_output &&
9850 func_id != BPF_FUNC_ringbuf_reserve &&
9851 func_id != BPF_FUNC_ringbuf_query &&
9852 func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
9853 func_id != BPF_FUNC_ringbuf_submit_dynptr &&
9854 func_id != BPF_FUNC_ringbuf_discard_dynptr)
9855 goto error;
9856 break;
9857 case BPF_MAP_TYPE_USER_RINGBUF:
9858 if (func_id != BPF_FUNC_user_ringbuf_drain)
9859 goto error;
9860 break;
9861 case BPF_MAP_TYPE_STACK_TRACE:
9862 if (func_id != BPF_FUNC_get_stackid)
9863 goto error;
9864 break;
9865 case BPF_MAP_TYPE_CGROUP_ARRAY:
9866 if (func_id != BPF_FUNC_skb_under_cgroup &&
9867 func_id != BPF_FUNC_current_task_under_cgroup)
9868 goto error;
9869 break;
9870 case BPF_MAP_TYPE_CGROUP_STORAGE:
9871 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
9872 if (func_id != BPF_FUNC_get_local_storage)
9873 goto error;
9874 break;
9875 case BPF_MAP_TYPE_DEVMAP:
9876 case BPF_MAP_TYPE_DEVMAP_HASH:
9877 if (func_id != BPF_FUNC_redirect_map &&
9878 func_id != BPF_FUNC_map_lookup_elem)
9879 goto error;
9880 break;
9881 /* Restrict bpf side of cpumap and xskmap, open when use-cases
9882 * appear.
9883 */
9884 case BPF_MAP_TYPE_CPUMAP:
9885 if (func_id != BPF_FUNC_redirect_map)
9886 goto error;
9887 break;
9888 case BPF_MAP_TYPE_XSKMAP:
9889 if (func_id != BPF_FUNC_redirect_map &&
9890 func_id != BPF_FUNC_map_lookup_elem)
9891 goto error;
9892 break;
9893 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
9894 case BPF_MAP_TYPE_HASH_OF_MAPS:
9895 if (func_id != BPF_FUNC_map_lookup_elem)
9896 goto error;
9897 break;
9898 case BPF_MAP_TYPE_SOCKMAP:
9899 if (func_id != BPF_FUNC_sk_redirect_map &&
9900 func_id != BPF_FUNC_sock_map_update &&
9901 func_id != BPF_FUNC_msg_redirect_map &&
9902 func_id != BPF_FUNC_sk_select_reuseport &&
9903 func_id != BPF_FUNC_map_lookup_elem &&
9904 !may_update_sockmap(env, func_id))
9905 goto error;
9906 break;
9907 case BPF_MAP_TYPE_SOCKHASH:
9908 if (func_id != BPF_FUNC_sk_redirect_hash &&
9909 func_id != BPF_FUNC_sock_hash_update &&
9910 func_id != BPF_FUNC_msg_redirect_hash &&
9911 func_id != BPF_FUNC_sk_select_reuseport &&
9912 func_id != BPF_FUNC_map_lookup_elem &&
9913 !may_update_sockmap(env, func_id))
9914 goto error;
9915 break;
9916 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
9917 if (func_id != BPF_FUNC_sk_select_reuseport)
9918 goto error;
9919 break;
9920 case BPF_MAP_TYPE_QUEUE:
9921 case BPF_MAP_TYPE_STACK:
9922 if (func_id != BPF_FUNC_map_peek_elem &&
9923 func_id != BPF_FUNC_map_pop_elem &&
9924 func_id != BPF_FUNC_map_push_elem)
9925 goto error;
9926 break;
9927 case BPF_MAP_TYPE_SK_STORAGE:
9928 if (func_id != BPF_FUNC_sk_storage_get &&
9929 func_id != BPF_FUNC_sk_storage_delete &&
9930 func_id != BPF_FUNC_kptr_xchg)
9931 goto error;
9932 break;
9933 case BPF_MAP_TYPE_INODE_STORAGE:
9934 if (func_id != BPF_FUNC_inode_storage_get &&
9935 func_id != BPF_FUNC_inode_storage_delete &&
9936 func_id != BPF_FUNC_kptr_xchg)
9937 goto error;
9938 break;
9939 case BPF_MAP_TYPE_TASK_STORAGE:
9940 if (func_id != BPF_FUNC_task_storage_get &&
9941 func_id != BPF_FUNC_task_storage_delete &&
9942 func_id != BPF_FUNC_kptr_xchg)
9943 goto error;
9944 break;
9945 case BPF_MAP_TYPE_CGRP_STORAGE:
9946 if (func_id != BPF_FUNC_cgrp_storage_get &&
9947 func_id != BPF_FUNC_cgrp_storage_delete &&
9948 func_id != BPF_FUNC_kptr_xchg)
9949 goto error;
9950 break;
9951 case BPF_MAP_TYPE_BLOOM_FILTER:
9952 if (func_id != BPF_FUNC_map_peek_elem &&
9953 func_id != BPF_FUNC_map_push_elem)
9954 goto error;
9955 break;
9956 default:
9957 break;
9958 }
9959
9960 /* ... and second from the function itself. */
9961 switch (func_id) {
9962 case BPF_FUNC_tail_call:
9963 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
9964 goto error;
9965 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
9966 verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
9967 return -EINVAL;
9968 }
9969 break;
9970 case BPF_FUNC_perf_event_read:
9971 case BPF_FUNC_perf_event_output:
9972 case BPF_FUNC_perf_event_read_value:
9973 case BPF_FUNC_skb_output:
9974 case BPF_FUNC_xdp_output:
9975 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
9976 goto error;
9977 break;
9978 case BPF_FUNC_ringbuf_output:
9979 case BPF_FUNC_ringbuf_reserve:
9980 case BPF_FUNC_ringbuf_query:
9981 case BPF_FUNC_ringbuf_reserve_dynptr:
9982 case BPF_FUNC_ringbuf_submit_dynptr:
9983 case BPF_FUNC_ringbuf_discard_dynptr:
9984 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
9985 goto error;
9986 break;
9987 case BPF_FUNC_user_ringbuf_drain:
9988 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
9989 goto error;
9990 break;
9991 case BPF_FUNC_get_stackid:
9992 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
9993 goto error;
9994 break;
9995 case BPF_FUNC_current_task_under_cgroup:
9996 case BPF_FUNC_skb_under_cgroup:
9997 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
9998 goto error;
9999 break;
10000 case BPF_FUNC_redirect_map:
10001 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
10002 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
10003 map->map_type != BPF_MAP_TYPE_CPUMAP &&
10004 map->map_type != BPF_MAP_TYPE_XSKMAP)
10005 goto error;
10006 break;
10007 case BPF_FUNC_sk_redirect_map:
10008 case BPF_FUNC_msg_redirect_map:
10009 case BPF_FUNC_sock_map_update:
10010 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
10011 goto error;
10012 break;
10013 case BPF_FUNC_sk_redirect_hash:
10014 case BPF_FUNC_msg_redirect_hash:
10015 case BPF_FUNC_sock_hash_update:
10016 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
10017 goto error;
10018 break;
10019 case BPF_FUNC_get_local_storage:
10020 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
10021 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
10022 goto error;
10023 break;
10024 case BPF_FUNC_sk_select_reuseport:
10025 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
10026 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
10027 map->map_type != BPF_MAP_TYPE_SOCKHASH)
10028 goto error;
10029 break;
10030 case BPF_FUNC_map_pop_elem:
10031 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
10032 map->map_type != BPF_MAP_TYPE_STACK)
10033 goto error;
10034 break;
10035 case BPF_FUNC_map_peek_elem:
10036 case BPF_FUNC_map_push_elem:
10037 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
10038 map->map_type != BPF_MAP_TYPE_STACK &&
10039 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
10040 goto error;
10041 break;
10042 case BPF_FUNC_map_lookup_percpu_elem:
10043 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
10044 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10045 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
10046 goto error;
10047 break;
10048 case BPF_FUNC_sk_storage_get:
10049 case BPF_FUNC_sk_storage_delete:
10050 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
10051 goto error;
10052 break;
10053 case BPF_FUNC_inode_storage_get:
10054 case BPF_FUNC_inode_storage_delete:
10055 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
10056 goto error;
10057 break;
10058 case BPF_FUNC_task_storage_get:
10059 case BPF_FUNC_task_storage_delete:
10060 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
10061 goto error;
10062 break;
10063 case BPF_FUNC_cgrp_storage_get:
10064 case BPF_FUNC_cgrp_storage_delete:
10065 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
10066 goto error;
10067 break;
10068 default:
10069 break;
10070 }
10071
10072 return 0;
10073 error:
10074 verbose(env, "cannot pass map_type %d into func %s#%d\n",
10075 map->map_type, func_id_name(func_id), func_id);
10076 return -EINVAL;
10077 }
10078
check_raw_mode_ok(const struct bpf_func_proto * fn)10079 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
10080 {
10081 int count = 0;
10082
10083 if (arg_type_is_raw_mem(fn->arg1_type))
10084 count++;
10085 if (arg_type_is_raw_mem(fn->arg2_type))
10086 count++;
10087 if (arg_type_is_raw_mem(fn->arg3_type))
10088 count++;
10089 if (arg_type_is_raw_mem(fn->arg4_type))
10090 count++;
10091 if (arg_type_is_raw_mem(fn->arg5_type))
10092 count++;
10093
10094 /* We only support one arg being in raw mode at the moment,
10095 * which is sufficient for the helper functions we have
10096 * right now.
10097 */
10098 return count <= 1;
10099 }
10100
check_args_pair_invalid(const struct bpf_func_proto * fn,int arg)10101 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
10102 {
10103 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
10104 bool has_size = fn->arg_size[arg] != 0;
10105 bool is_next_size = false;
10106
10107 if (arg + 1 < ARRAY_SIZE(fn->arg_type))
10108 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
10109
10110 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
10111 return is_next_size;
10112
10113 return has_size == is_next_size || is_next_size == is_fixed;
10114 }
10115
check_arg_pair_ok(const struct bpf_func_proto * fn)10116 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
10117 {
10118 /* bpf_xxx(..., buf, len) call will access 'len'
10119 * bytes from memory 'buf'. Both arg types need
10120 * to be paired, so make sure there's no buggy
10121 * helper function specification.
10122 */
10123 if (arg_type_is_mem_size(fn->arg1_type) ||
10124 check_args_pair_invalid(fn, 0) ||
10125 check_args_pair_invalid(fn, 1) ||
10126 check_args_pair_invalid(fn, 2) ||
10127 check_args_pair_invalid(fn, 3) ||
10128 check_args_pair_invalid(fn, 4))
10129 return false;
10130
10131 return true;
10132 }
10133
check_btf_id_ok(const struct bpf_func_proto * fn)10134 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
10135 {
10136 int i;
10137
10138 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
10139 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
10140 return !!fn->arg_btf_id[i];
10141 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
10142 return fn->arg_btf_id[i] == BPF_PTR_POISON;
10143 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
10144 /* arg_btf_id and arg_size are in a union. */
10145 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
10146 !(fn->arg_type[i] & MEM_FIXED_SIZE)))
10147 return false;
10148 }
10149
10150 return true;
10151 }
10152
check_func_proto(const struct bpf_func_proto * fn,int func_id)10153 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
10154 {
10155 return check_raw_mode_ok(fn) &&
10156 check_arg_pair_ok(fn) &&
10157 check_btf_id_ok(fn) ? 0 : -EINVAL;
10158 }
10159
10160 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
10161 * are now invalid, so turn them into unknown SCALAR_VALUE.
10162 *
10163 * This also applies to dynptr slices belonging to skb and xdp dynptrs,
10164 * since these slices point to packet data.
10165 */
clear_all_pkt_pointers(struct bpf_verifier_env * env)10166 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
10167 {
10168 struct bpf_func_state *state;
10169 struct bpf_reg_state *reg;
10170
10171 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10172 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
10173 mark_reg_invalid(env, reg);
10174 }));
10175 }
10176
10177 enum {
10178 AT_PKT_END = -1,
10179 BEYOND_PKT_END = -2,
10180 };
10181
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)10182 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
10183 {
10184 struct bpf_func_state *state = vstate->frame[vstate->curframe];
10185 struct bpf_reg_state *reg = &state->regs[regn];
10186
10187 if (reg->type != PTR_TO_PACKET)
10188 /* PTR_TO_PACKET_META is not supported yet */
10189 return;
10190
10191 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
10192 * How far beyond pkt_end it goes is unknown.
10193 * if (!range_open) it's the case of pkt >= pkt_end
10194 * if (range_open) it's the case of pkt > pkt_end
10195 * hence this pointer is at least 1 byte bigger than pkt_end
10196 */
10197 if (range_open)
10198 reg->range = BEYOND_PKT_END;
10199 else
10200 reg->range = AT_PKT_END;
10201 }
10202
release_reference_nomark(struct bpf_verifier_state * state,int ref_obj_id)10203 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id)
10204 {
10205 int i;
10206
10207 for (i = 0; i < state->acquired_refs; i++) {
10208 if (state->refs[i].type != REF_TYPE_PTR)
10209 continue;
10210 if (state->refs[i].id == ref_obj_id) {
10211 release_reference_state(state, i);
10212 return 0;
10213 }
10214 }
10215 return -EINVAL;
10216 }
10217
10218 /* The pointer with the specified id has released its reference to kernel
10219 * resources. Identify all copies of the same pointer and clear the reference.
10220 *
10221 * This is the release function corresponding to acquire_reference(). Idempotent.
10222 */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)10223 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
10224 {
10225 struct bpf_verifier_state *vstate = env->cur_state;
10226 struct bpf_func_state *state;
10227 struct bpf_reg_state *reg;
10228 int err;
10229
10230 err = release_reference_nomark(vstate, ref_obj_id);
10231 if (err)
10232 return err;
10233
10234 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
10235 if (reg->ref_obj_id == ref_obj_id)
10236 mark_reg_invalid(env, reg);
10237 }));
10238
10239 return 0;
10240 }
10241
invalidate_non_owning_refs(struct bpf_verifier_env * env)10242 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
10243 {
10244 struct bpf_func_state *unused;
10245 struct bpf_reg_state *reg;
10246
10247 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
10248 if (type_is_non_owning_ref(reg->type))
10249 mark_reg_invalid(env, reg);
10250 }));
10251 }
10252
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)10253 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
10254 struct bpf_reg_state *regs)
10255 {
10256 int i;
10257
10258 /* after the call registers r0 - r5 were scratched */
10259 for (i = 0; i < CALLER_SAVED_REGS; i++) {
10260 mark_reg_not_init(env, regs, caller_saved[i]);
10261 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
10262 }
10263 }
10264
10265 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
10266 struct bpf_func_state *caller,
10267 struct bpf_func_state *callee,
10268 int insn_idx);
10269
10270 static int set_callee_state(struct bpf_verifier_env *env,
10271 struct bpf_func_state *caller,
10272 struct bpf_func_state *callee, int insn_idx);
10273
setup_func_entry(struct bpf_verifier_env * env,int subprog,int callsite,set_callee_state_fn set_callee_state_cb,struct bpf_verifier_state * state)10274 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
10275 set_callee_state_fn set_callee_state_cb,
10276 struct bpf_verifier_state *state)
10277 {
10278 struct bpf_func_state *caller, *callee;
10279 int err;
10280
10281 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
10282 verbose(env, "the call stack of %d frames is too deep\n",
10283 state->curframe + 2);
10284 return -E2BIG;
10285 }
10286
10287 if (state->frame[state->curframe + 1]) {
10288 verbose(env, "verifier bug. Frame %d already allocated\n",
10289 state->curframe + 1);
10290 return -EFAULT;
10291 }
10292
10293 caller = state->frame[state->curframe];
10294 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
10295 if (!callee)
10296 return -ENOMEM;
10297 state->frame[state->curframe + 1] = callee;
10298
10299 /* callee cannot access r0, r6 - r9 for reading and has to write
10300 * into its own stack before reading from it.
10301 * callee can read/write into caller's stack
10302 */
10303 init_func_state(env, callee,
10304 /* remember the callsite, it will be used by bpf_exit */
10305 callsite,
10306 state->curframe + 1 /* frameno within this callchain */,
10307 subprog /* subprog number within this prog */);
10308 err = set_callee_state_cb(env, caller, callee, callsite);
10309 if (err)
10310 goto err_out;
10311
10312 /* only increment it after check_reg_arg() finished */
10313 state->curframe++;
10314
10315 return 0;
10316
10317 err_out:
10318 free_func_state(callee);
10319 state->frame[state->curframe + 1] = NULL;
10320 return err;
10321 }
10322
btf_check_func_arg_match(struct bpf_verifier_env * env,int subprog,const struct btf * btf,struct bpf_reg_state * regs)10323 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
10324 const struct btf *btf,
10325 struct bpf_reg_state *regs)
10326 {
10327 struct bpf_subprog_info *sub = subprog_info(env, subprog);
10328 struct bpf_verifier_log *log = &env->log;
10329 u32 i;
10330 int ret;
10331
10332 ret = btf_prepare_func_args(env, subprog);
10333 if (ret)
10334 return ret;
10335
10336 /* check that BTF function arguments match actual types that the
10337 * verifier sees.
10338 */
10339 for (i = 0; i < sub->arg_cnt; i++) {
10340 u32 regno = i + 1;
10341 struct bpf_reg_state *reg = ®s[regno];
10342 struct bpf_subprog_arg_info *arg = &sub->args[i];
10343
10344 if (arg->arg_type == ARG_ANYTHING) {
10345 if (reg->type != SCALAR_VALUE) {
10346 bpf_log(log, "R%d is not a scalar\n", regno);
10347 return -EINVAL;
10348 }
10349 } else if (arg->arg_type == ARG_PTR_TO_CTX) {
10350 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10351 if (ret < 0)
10352 return ret;
10353 /* If function expects ctx type in BTF check that caller
10354 * is passing PTR_TO_CTX.
10355 */
10356 if (reg->type != PTR_TO_CTX) {
10357 bpf_log(log, "arg#%d expects pointer to ctx\n", i);
10358 return -EINVAL;
10359 }
10360 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
10361 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10362 if (ret < 0)
10363 return ret;
10364 if (check_mem_reg(env, reg, regno, arg->mem_size))
10365 return -EINVAL;
10366 if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
10367 bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
10368 return -EINVAL;
10369 }
10370 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
10371 /*
10372 * Can pass any value and the kernel won't crash, but
10373 * only PTR_TO_ARENA or SCALAR make sense. Everything
10374 * else is a bug in the bpf program. Point it out to
10375 * the user at the verification time instead of
10376 * run-time debug nightmare.
10377 */
10378 if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
10379 bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
10380 return -EINVAL;
10381 }
10382 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
10383 ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
10384 if (ret)
10385 return ret;
10386
10387 ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
10388 if (ret)
10389 return ret;
10390 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
10391 struct bpf_call_arg_meta meta;
10392 int err;
10393
10394 if (register_is_null(reg) && type_may_be_null(arg->arg_type))
10395 continue;
10396
10397 memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
10398 err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
10399 err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
10400 if (err)
10401 return err;
10402 } else {
10403 bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
10404 i, arg->arg_type);
10405 return -EFAULT;
10406 }
10407 }
10408
10409 return 0;
10410 }
10411
10412 /* Compare BTF of a function call with given bpf_reg_state.
10413 * Returns:
10414 * EFAULT - there is a verifier bug. Abort verification.
10415 * EINVAL - there is a type mismatch or BTF is not available.
10416 * 0 - BTF matches with what bpf_reg_state expects.
10417 * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
10418 */
btf_check_subprog_call(struct bpf_verifier_env * env,int subprog,struct bpf_reg_state * regs)10419 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
10420 struct bpf_reg_state *regs)
10421 {
10422 struct bpf_prog *prog = env->prog;
10423 struct btf *btf = prog->aux->btf;
10424 u32 btf_id;
10425 int err;
10426
10427 if (!prog->aux->func_info)
10428 return -EINVAL;
10429
10430 btf_id = prog->aux->func_info[subprog].type_id;
10431 if (!btf_id)
10432 return -EFAULT;
10433
10434 if (prog->aux->func_info_aux[subprog].unreliable)
10435 return -EINVAL;
10436
10437 err = btf_check_func_arg_match(env, subprog, btf, regs);
10438 /* Compiler optimizations can remove arguments from static functions
10439 * or mismatched type can be passed into a global function.
10440 * In such cases mark the function as unreliable from BTF point of view.
10441 */
10442 if (err)
10443 prog->aux->func_info_aux[subprog].unreliable = true;
10444 return err;
10445 }
10446
push_callback_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int insn_idx,int subprog,set_callee_state_fn set_callee_state_cb)10447 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10448 int insn_idx, int subprog,
10449 set_callee_state_fn set_callee_state_cb)
10450 {
10451 struct bpf_verifier_state *state = env->cur_state, *callback_state;
10452 struct bpf_func_state *caller, *callee;
10453 int err;
10454
10455 caller = state->frame[state->curframe];
10456 err = btf_check_subprog_call(env, subprog, caller->regs);
10457 if (err == -EFAULT)
10458 return err;
10459
10460 /* set_callee_state is used for direct subprog calls, but we are
10461 * interested in validating only BPF helpers that can call subprogs as
10462 * callbacks
10463 */
10464 env->subprog_info[subprog].is_cb = true;
10465 if (bpf_pseudo_kfunc_call(insn) &&
10466 !is_callback_calling_kfunc(insn->imm)) {
10467 verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
10468 func_id_name(insn->imm), insn->imm);
10469 return -EFAULT;
10470 } else if (!bpf_pseudo_kfunc_call(insn) &&
10471 !is_callback_calling_function(insn->imm)) { /* helper */
10472 verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
10473 func_id_name(insn->imm), insn->imm);
10474 return -EFAULT;
10475 }
10476
10477 if (is_async_callback_calling_insn(insn)) {
10478 struct bpf_verifier_state *async_cb;
10479
10480 /* there is no real recursion here. timer and workqueue callbacks are async */
10481 env->subprog_info[subprog].is_async_cb = true;
10482 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
10483 insn_idx, subprog,
10484 is_bpf_wq_set_callback_impl_kfunc(insn->imm));
10485 if (!async_cb)
10486 return -EFAULT;
10487 callee = async_cb->frame[0];
10488 callee->async_entry_cnt = caller->async_entry_cnt + 1;
10489
10490 /* Convert bpf_timer_set_callback() args into timer callback args */
10491 err = set_callee_state_cb(env, caller, callee, insn_idx);
10492 if (err)
10493 return err;
10494
10495 return 0;
10496 }
10497
10498 /* for callback functions enqueue entry to callback and
10499 * proceed with next instruction within current frame.
10500 */
10501 callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
10502 if (!callback_state)
10503 return -ENOMEM;
10504
10505 err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
10506 callback_state);
10507 if (err)
10508 return err;
10509
10510 callback_state->callback_unroll_depth++;
10511 callback_state->frame[callback_state->curframe - 1]->callback_depth++;
10512 caller->callback_depth = 0;
10513 return 0;
10514 }
10515
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)10516 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10517 int *insn_idx)
10518 {
10519 struct bpf_verifier_state *state = env->cur_state;
10520 struct bpf_func_state *caller;
10521 int err, subprog, target_insn;
10522
10523 target_insn = *insn_idx + insn->imm + 1;
10524 subprog = find_subprog(env, target_insn);
10525 if (subprog < 0) {
10526 verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
10527 return -EFAULT;
10528 }
10529
10530 caller = state->frame[state->curframe];
10531 err = btf_check_subprog_call(env, subprog, caller->regs);
10532 if (err == -EFAULT)
10533 return err;
10534 if (subprog_is_global(env, subprog)) {
10535 const char *sub_name = subprog_name(env, subprog);
10536
10537 if (env->cur_state->active_locks) {
10538 verbose(env, "global function calls are not allowed while holding a lock,\n"
10539 "use static function instead\n");
10540 return -EINVAL;
10541 }
10542
10543 if (env->subprog_info[subprog].might_sleep &&
10544 (env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks ||
10545 env->cur_state->active_irq_id || !in_sleepable(env))) {
10546 verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n"
10547 "i.e., in a RCU/IRQ/preempt-disabled section, or in\n"
10548 "a non-sleepable BPF program context\n");
10549 return -EINVAL;
10550 }
10551
10552 if (err) {
10553 verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
10554 subprog, sub_name);
10555 return err;
10556 }
10557
10558 verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
10559 subprog, sub_name);
10560 if (env->subprog_info[subprog].changes_pkt_data)
10561 clear_all_pkt_pointers(env);
10562 /* mark global subprog for verifying after main prog */
10563 subprog_aux(env, subprog)->called = true;
10564 clear_caller_saved_regs(env, caller->regs);
10565
10566 /* All global functions return a 64-bit SCALAR_VALUE */
10567 mark_reg_unknown(env, caller->regs, BPF_REG_0);
10568 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10569
10570 /* continue with next insn after call */
10571 return 0;
10572 }
10573
10574 /* for regular function entry setup new frame and continue
10575 * from that frame.
10576 */
10577 err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
10578 if (err)
10579 return err;
10580
10581 clear_caller_saved_regs(env, caller->regs);
10582
10583 /* and go analyze first insn of the callee */
10584 *insn_idx = env->subprog_info[subprog].start - 1;
10585
10586 if (env->log.level & BPF_LOG_LEVEL) {
10587 verbose(env, "caller:\n");
10588 print_verifier_state(env, state, caller->frameno, true);
10589 verbose(env, "callee:\n");
10590 print_verifier_state(env, state, state->curframe, true);
10591 }
10592
10593 return 0;
10594 }
10595
map_set_for_each_callback_args(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee)10596 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
10597 struct bpf_func_state *caller,
10598 struct bpf_func_state *callee)
10599 {
10600 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
10601 * void *callback_ctx, u64 flags);
10602 * callback_fn(struct bpf_map *map, void *key, void *value,
10603 * void *callback_ctx);
10604 */
10605 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
10606
10607 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
10608 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10609 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10610
10611 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
10612 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
10613 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10614
10615 /* pointer to stack or null */
10616 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
10617
10618 /* unused */
10619 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10620 return 0;
10621 }
10622
set_callee_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10623 static int set_callee_state(struct bpf_verifier_env *env,
10624 struct bpf_func_state *caller,
10625 struct bpf_func_state *callee, int insn_idx)
10626 {
10627 int i;
10628
10629 /* copy r1 - r5 args that callee can access. The copy includes parent
10630 * pointers, which connects us up to the liveness chain
10631 */
10632 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
10633 callee->regs[i] = caller->regs[i];
10634 return 0;
10635 }
10636
set_map_elem_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10637 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
10638 struct bpf_func_state *caller,
10639 struct bpf_func_state *callee,
10640 int insn_idx)
10641 {
10642 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
10643 struct bpf_map *map;
10644 int err;
10645
10646 /* valid map_ptr and poison value does not matter */
10647 map = insn_aux->map_ptr_state.map_ptr;
10648 if (!map->ops->map_set_for_each_callback_args ||
10649 !map->ops->map_for_each_callback) {
10650 verbose(env, "callback function not allowed for map\n");
10651 return -ENOTSUPP;
10652 }
10653
10654 err = map->ops->map_set_for_each_callback_args(env, caller, callee);
10655 if (err)
10656 return err;
10657
10658 callee->in_callback_fn = true;
10659 callee->callback_ret_range = retval_range(0, 1);
10660 return 0;
10661 }
10662
set_loop_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10663 static int set_loop_callback_state(struct bpf_verifier_env *env,
10664 struct bpf_func_state *caller,
10665 struct bpf_func_state *callee,
10666 int insn_idx)
10667 {
10668 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
10669 * u64 flags);
10670 * callback_fn(u64 index, void *callback_ctx);
10671 */
10672 callee->regs[BPF_REG_1].type = SCALAR_VALUE;
10673 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
10674
10675 /* unused */
10676 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10677 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10678 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10679
10680 callee->in_callback_fn = true;
10681 callee->callback_ret_range = retval_range(0, 1);
10682 return 0;
10683 }
10684
set_timer_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10685 static int set_timer_callback_state(struct bpf_verifier_env *env,
10686 struct bpf_func_state *caller,
10687 struct bpf_func_state *callee,
10688 int insn_idx)
10689 {
10690 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
10691
10692 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
10693 * callback_fn(struct bpf_map *map, void *key, void *value);
10694 */
10695 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
10696 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
10697 callee->regs[BPF_REG_1].map_ptr = map_ptr;
10698
10699 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
10700 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10701 callee->regs[BPF_REG_2].map_ptr = map_ptr;
10702
10703 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
10704 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
10705 callee->regs[BPF_REG_3].map_ptr = map_ptr;
10706
10707 /* unused */
10708 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10709 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10710 callee->in_async_callback_fn = true;
10711 callee->callback_ret_range = retval_range(0, 1);
10712 return 0;
10713 }
10714
set_find_vma_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10715 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
10716 struct bpf_func_state *caller,
10717 struct bpf_func_state *callee,
10718 int insn_idx)
10719 {
10720 /* bpf_find_vma(struct task_struct *task, u64 addr,
10721 * void *callback_fn, void *callback_ctx, u64 flags)
10722 * (callback_fn)(struct task_struct *task,
10723 * struct vm_area_struct *vma, void *callback_ctx);
10724 */
10725 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
10726
10727 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
10728 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10729 callee->regs[BPF_REG_2].btf = btf_vmlinux;
10730 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
10731
10732 /* pointer to stack or null */
10733 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
10734
10735 /* unused */
10736 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10737 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10738 callee->in_callback_fn = true;
10739 callee->callback_ret_range = retval_range(0, 1);
10740 return 0;
10741 }
10742
set_user_ringbuf_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10743 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
10744 struct bpf_func_state *caller,
10745 struct bpf_func_state *callee,
10746 int insn_idx)
10747 {
10748 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
10749 * callback_ctx, u64 flags);
10750 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
10751 */
10752 __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
10753 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
10754 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
10755
10756 /* unused */
10757 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10758 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10759 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10760
10761 callee->in_callback_fn = true;
10762 callee->callback_ret_range = retval_range(0, 1);
10763 return 0;
10764 }
10765
set_rbtree_add_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10766 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
10767 struct bpf_func_state *caller,
10768 struct bpf_func_state *callee,
10769 int insn_idx)
10770 {
10771 /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
10772 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
10773 *
10774 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
10775 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
10776 * by this point, so look at 'root'
10777 */
10778 struct btf_field *field;
10779
10780 field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
10781 BPF_RB_ROOT);
10782 if (!field || !field->graph_root.value_btf_id)
10783 return -EFAULT;
10784
10785 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
10786 ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
10787 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
10788 ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
10789
10790 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
10791 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
10792 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10793 callee->in_callback_fn = true;
10794 callee->callback_ret_range = retval_range(0, 1);
10795 return 0;
10796 }
10797
10798 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
10799
10800 /* Are we currently verifying the callback for a rbtree helper that must
10801 * be called with lock held? If so, no need to complain about unreleased
10802 * lock
10803 */
in_rbtree_lock_required_cb(struct bpf_verifier_env * env)10804 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
10805 {
10806 struct bpf_verifier_state *state = env->cur_state;
10807 struct bpf_insn *insn = env->prog->insnsi;
10808 struct bpf_func_state *callee;
10809 int kfunc_btf_id;
10810
10811 if (!state->curframe)
10812 return false;
10813
10814 callee = state->frame[state->curframe];
10815
10816 if (!callee->in_callback_fn)
10817 return false;
10818
10819 kfunc_btf_id = insn[callee->callsite].imm;
10820 return is_rbtree_lock_required_kfunc(kfunc_btf_id);
10821 }
10822
retval_range_within(struct bpf_retval_range range,const struct bpf_reg_state * reg,bool return_32bit)10823 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
10824 bool return_32bit)
10825 {
10826 if (return_32bit)
10827 return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
10828 else
10829 return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
10830 }
10831
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)10832 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
10833 {
10834 struct bpf_verifier_state *state = env->cur_state, *prev_st;
10835 struct bpf_func_state *caller, *callee;
10836 struct bpf_reg_state *r0;
10837 bool in_callback_fn;
10838 int err;
10839
10840 callee = state->frame[state->curframe];
10841 r0 = &callee->regs[BPF_REG_0];
10842 if (r0->type == PTR_TO_STACK) {
10843 /* technically it's ok to return caller's stack pointer
10844 * (or caller's caller's pointer) back to the caller,
10845 * since these pointers are valid. Only current stack
10846 * pointer will be invalid as soon as function exits,
10847 * but let's be conservative
10848 */
10849 verbose(env, "cannot return stack pointer to the caller\n");
10850 return -EINVAL;
10851 }
10852
10853 caller = state->frame[state->curframe - 1];
10854 if (callee->in_callback_fn) {
10855 if (r0->type != SCALAR_VALUE) {
10856 verbose(env, "R0 not a scalar value\n");
10857 return -EACCES;
10858 }
10859
10860 /* we are going to rely on register's precise value */
10861 err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
10862 err = err ?: mark_chain_precision(env, BPF_REG_0);
10863 if (err)
10864 return err;
10865
10866 /* enforce R0 return value range, and bpf_callback_t returns 64bit */
10867 if (!retval_range_within(callee->callback_ret_range, r0, false)) {
10868 verbose_invalid_scalar(env, r0, callee->callback_ret_range,
10869 "At callback return", "R0");
10870 return -EINVAL;
10871 }
10872 if (!calls_callback(env, callee->callsite)) {
10873 verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
10874 *insn_idx, callee->callsite);
10875 return -EFAULT;
10876 }
10877 } else {
10878 /* return to the caller whatever r0 had in the callee */
10879 caller->regs[BPF_REG_0] = *r0;
10880 }
10881
10882 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
10883 * there function call logic would reschedule callback visit. If iteration
10884 * converges is_state_visited() would prune that visit eventually.
10885 */
10886 in_callback_fn = callee->in_callback_fn;
10887 if (in_callback_fn)
10888 *insn_idx = callee->callsite;
10889 else
10890 *insn_idx = callee->callsite + 1;
10891
10892 if (env->log.level & BPF_LOG_LEVEL) {
10893 verbose(env, "returning from callee:\n");
10894 print_verifier_state(env, state, callee->frameno, true);
10895 verbose(env, "to caller at %d:\n", *insn_idx);
10896 print_verifier_state(env, state, caller->frameno, true);
10897 }
10898 /* clear everything in the callee. In case of exceptional exits using
10899 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
10900 free_func_state(callee);
10901 state->frame[state->curframe--] = NULL;
10902
10903 /* for callbacks widen imprecise scalars to make programs like below verify:
10904 *
10905 * struct ctx { int i; }
10906 * void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
10907 * ...
10908 * struct ctx = { .i = 0; }
10909 * bpf_loop(100, cb, &ctx, 0);
10910 *
10911 * This is similar to what is done in process_iter_next_call() for open
10912 * coded iterators.
10913 */
10914 prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
10915 if (prev_st) {
10916 err = widen_imprecise_scalars(env, prev_st, state);
10917 if (err)
10918 return err;
10919 }
10920 return 0;
10921 }
10922
do_refine_retval_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)10923 static int do_refine_retval_range(struct bpf_verifier_env *env,
10924 struct bpf_reg_state *regs, int ret_type,
10925 int func_id,
10926 struct bpf_call_arg_meta *meta)
10927 {
10928 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
10929
10930 if (ret_type != RET_INTEGER)
10931 return 0;
10932
10933 switch (func_id) {
10934 case BPF_FUNC_get_stack:
10935 case BPF_FUNC_get_task_stack:
10936 case BPF_FUNC_probe_read_str:
10937 case BPF_FUNC_probe_read_kernel_str:
10938 case BPF_FUNC_probe_read_user_str:
10939 ret_reg->smax_value = meta->msize_max_value;
10940 ret_reg->s32_max_value = meta->msize_max_value;
10941 ret_reg->smin_value = -MAX_ERRNO;
10942 ret_reg->s32_min_value = -MAX_ERRNO;
10943 reg_bounds_sync(ret_reg);
10944 break;
10945 case BPF_FUNC_get_smp_processor_id:
10946 ret_reg->umax_value = nr_cpu_ids - 1;
10947 ret_reg->u32_max_value = nr_cpu_ids - 1;
10948 ret_reg->smax_value = nr_cpu_ids - 1;
10949 ret_reg->s32_max_value = nr_cpu_ids - 1;
10950 ret_reg->umin_value = 0;
10951 ret_reg->u32_min_value = 0;
10952 ret_reg->smin_value = 0;
10953 ret_reg->s32_min_value = 0;
10954 reg_bounds_sync(ret_reg);
10955 break;
10956 }
10957
10958 return reg_bounds_sanity_check(env, ret_reg, "retval");
10959 }
10960
10961 static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)10962 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10963 int func_id, int insn_idx)
10964 {
10965 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10966 struct bpf_map *map = meta->map_ptr;
10967
10968 if (func_id != BPF_FUNC_tail_call &&
10969 func_id != BPF_FUNC_map_lookup_elem &&
10970 func_id != BPF_FUNC_map_update_elem &&
10971 func_id != BPF_FUNC_map_delete_elem &&
10972 func_id != BPF_FUNC_map_push_elem &&
10973 func_id != BPF_FUNC_map_pop_elem &&
10974 func_id != BPF_FUNC_map_peek_elem &&
10975 func_id != BPF_FUNC_for_each_map_elem &&
10976 func_id != BPF_FUNC_redirect_map &&
10977 func_id != BPF_FUNC_map_lookup_percpu_elem)
10978 return 0;
10979
10980 if (map == NULL) {
10981 verbose(env, "kernel subsystem misconfigured verifier\n");
10982 return -EINVAL;
10983 }
10984
10985 /* In case of read-only, some additional restrictions
10986 * need to be applied in order to prevent altering the
10987 * state of the map from program side.
10988 */
10989 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
10990 (func_id == BPF_FUNC_map_delete_elem ||
10991 func_id == BPF_FUNC_map_update_elem ||
10992 func_id == BPF_FUNC_map_push_elem ||
10993 func_id == BPF_FUNC_map_pop_elem)) {
10994 verbose(env, "write into map forbidden\n");
10995 return -EACCES;
10996 }
10997
10998 if (!aux->map_ptr_state.map_ptr)
10999 bpf_map_ptr_store(aux, meta->map_ptr,
11000 !meta->map_ptr->bypass_spec_v1, false);
11001 else if (aux->map_ptr_state.map_ptr != meta->map_ptr)
11002 bpf_map_ptr_store(aux, meta->map_ptr,
11003 !meta->map_ptr->bypass_spec_v1, true);
11004 return 0;
11005 }
11006
11007 static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)11008 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
11009 int func_id, int insn_idx)
11010 {
11011 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
11012 struct bpf_reg_state *regs = cur_regs(env), *reg;
11013 struct bpf_map *map = meta->map_ptr;
11014 u64 val, max;
11015 int err;
11016
11017 if (func_id != BPF_FUNC_tail_call)
11018 return 0;
11019 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
11020 verbose(env, "kernel subsystem misconfigured verifier\n");
11021 return -EINVAL;
11022 }
11023
11024 reg = ®s[BPF_REG_3];
11025 val = reg->var_off.value;
11026 max = map->max_entries;
11027
11028 if (!(is_reg_const(reg, false) && val < max)) {
11029 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
11030 return 0;
11031 }
11032
11033 err = mark_chain_precision(env, BPF_REG_3);
11034 if (err)
11035 return err;
11036 if (bpf_map_key_unseen(aux))
11037 bpf_map_key_store(aux, val);
11038 else if (!bpf_map_key_poisoned(aux) &&
11039 bpf_map_key_immediate(aux) != val)
11040 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
11041 return 0;
11042 }
11043
check_reference_leak(struct bpf_verifier_env * env,bool exception_exit)11044 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
11045 {
11046 struct bpf_verifier_state *state = env->cur_state;
11047 enum bpf_prog_type type = resolve_prog_type(env->prog);
11048 struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
11049 bool refs_lingering = false;
11050 int i;
11051
11052 if (!exception_exit && cur_func(env)->frameno)
11053 return 0;
11054
11055 for (i = 0; i < state->acquired_refs; i++) {
11056 if (state->refs[i].type != REF_TYPE_PTR)
11057 continue;
11058 /* Allow struct_ops programs to return a referenced kptr back to
11059 * kernel. Type checks are performed later in check_return_code.
11060 */
11061 if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
11062 reg->ref_obj_id == state->refs[i].id)
11063 continue;
11064 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
11065 state->refs[i].id, state->refs[i].insn_idx);
11066 refs_lingering = true;
11067 }
11068 return refs_lingering ? -EINVAL : 0;
11069 }
11070
check_resource_leak(struct bpf_verifier_env * env,bool exception_exit,bool check_lock,const char * prefix)11071 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
11072 {
11073 int err;
11074
11075 if (check_lock && env->cur_state->active_locks) {
11076 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
11077 return -EINVAL;
11078 }
11079
11080 err = check_reference_leak(env, exception_exit);
11081 if (err) {
11082 verbose(env, "%s would lead to reference leak\n", prefix);
11083 return err;
11084 }
11085
11086 if (check_lock && env->cur_state->active_irq_id) {
11087 verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
11088 return -EINVAL;
11089 }
11090
11091 if (check_lock && env->cur_state->active_rcu_lock) {
11092 verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
11093 return -EINVAL;
11094 }
11095
11096 if (check_lock && env->cur_state->active_preempt_locks) {
11097 verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
11098 return -EINVAL;
11099 }
11100
11101 return 0;
11102 }
11103
check_bpf_snprintf_call(struct bpf_verifier_env * env,struct bpf_reg_state * regs)11104 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
11105 struct bpf_reg_state *regs)
11106 {
11107 struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3];
11108 struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5];
11109 struct bpf_map *fmt_map = fmt_reg->map_ptr;
11110 struct bpf_bprintf_data data = {};
11111 int err, fmt_map_off, num_args;
11112 u64 fmt_addr;
11113 char *fmt;
11114
11115 /* data must be an array of u64 */
11116 if (data_len_reg->var_off.value % 8)
11117 return -EINVAL;
11118 num_args = data_len_reg->var_off.value / 8;
11119
11120 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
11121 * and map_direct_value_addr is set.
11122 */
11123 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
11124 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
11125 fmt_map_off);
11126 if (err) {
11127 verbose(env, "verifier bug\n");
11128 return -EFAULT;
11129 }
11130 fmt = (char *)(long)fmt_addr + fmt_map_off;
11131
11132 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
11133 * can focus on validating the format specifiers.
11134 */
11135 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
11136 if (err < 0)
11137 verbose(env, "Invalid format string\n");
11138
11139 return err;
11140 }
11141
check_get_func_ip(struct bpf_verifier_env * env)11142 static int check_get_func_ip(struct bpf_verifier_env *env)
11143 {
11144 enum bpf_prog_type type = resolve_prog_type(env->prog);
11145 int func_id = BPF_FUNC_get_func_ip;
11146
11147 if (type == BPF_PROG_TYPE_TRACING) {
11148 if (!bpf_prog_has_trampoline(env->prog)) {
11149 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
11150 func_id_name(func_id), func_id);
11151 return -ENOTSUPP;
11152 }
11153 return 0;
11154 } else if (type == BPF_PROG_TYPE_KPROBE) {
11155 return 0;
11156 }
11157
11158 verbose(env, "func %s#%d not supported for program type %d\n",
11159 func_id_name(func_id), func_id, type);
11160 return -ENOTSUPP;
11161 }
11162
cur_aux(struct bpf_verifier_env * env)11163 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
11164 {
11165 return &env->insn_aux_data[env->insn_idx];
11166 }
11167
loop_flag_is_zero(struct bpf_verifier_env * env)11168 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
11169 {
11170 struct bpf_reg_state *regs = cur_regs(env);
11171 struct bpf_reg_state *reg = ®s[BPF_REG_4];
11172 bool reg_is_null = register_is_null(reg);
11173
11174 if (reg_is_null)
11175 mark_chain_precision(env, BPF_REG_4);
11176
11177 return reg_is_null;
11178 }
11179
update_loop_inline_state(struct bpf_verifier_env * env,u32 subprogno)11180 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
11181 {
11182 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
11183
11184 if (!state->initialized) {
11185 state->initialized = 1;
11186 state->fit_for_inline = loop_flag_is_zero(env);
11187 state->callback_subprogno = subprogno;
11188 return;
11189 }
11190
11191 if (!state->fit_for_inline)
11192 return;
11193
11194 state->fit_for_inline = (loop_flag_is_zero(env) &&
11195 state->callback_subprogno == subprogno);
11196 }
11197
11198 /* Returns whether or not the given map type can potentially elide
11199 * lookup return value nullness check. This is possible if the key
11200 * is statically known.
11201 */
can_elide_value_nullness(enum bpf_map_type type)11202 static bool can_elide_value_nullness(enum bpf_map_type type)
11203 {
11204 switch (type) {
11205 case BPF_MAP_TYPE_ARRAY:
11206 case BPF_MAP_TYPE_PERCPU_ARRAY:
11207 return true;
11208 default:
11209 return false;
11210 }
11211 }
11212
get_helper_proto(struct bpf_verifier_env * env,int func_id,const struct bpf_func_proto ** ptr)11213 static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
11214 const struct bpf_func_proto **ptr)
11215 {
11216 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
11217 return -ERANGE;
11218
11219 if (!env->ops->get_func_proto)
11220 return -EINVAL;
11221
11222 *ptr = env->ops->get_func_proto(func_id, env->prog);
11223 return *ptr ? 0 : -EINVAL;
11224 }
11225
check_helper_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)11226 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
11227 int *insn_idx_p)
11228 {
11229 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11230 bool returns_cpu_specific_alloc_ptr = false;
11231 const struct bpf_func_proto *fn = NULL;
11232 enum bpf_return_type ret_type;
11233 enum bpf_type_flag ret_flag;
11234 struct bpf_reg_state *regs;
11235 struct bpf_call_arg_meta meta;
11236 int insn_idx = *insn_idx_p;
11237 bool changes_data;
11238 int i, err, func_id;
11239
11240 /* find function prototype */
11241 func_id = insn->imm;
11242 err = get_helper_proto(env, insn->imm, &fn);
11243 if (err == -ERANGE) {
11244 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
11245 return -EINVAL;
11246 }
11247
11248 if (err) {
11249 verbose(env, "program of this type cannot use helper %s#%d\n",
11250 func_id_name(func_id), func_id);
11251 return err;
11252 }
11253
11254 /* eBPF programs must be GPL compatible to use GPL-ed functions */
11255 if (!env->prog->gpl_compatible && fn->gpl_only) {
11256 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
11257 return -EINVAL;
11258 }
11259
11260 if (fn->allowed && !fn->allowed(env->prog)) {
11261 verbose(env, "helper call is not allowed in probe\n");
11262 return -EINVAL;
11263 }
11264
11265 if (!in_sleepable(env) && fn->might_sleep) {
11266 verbose(env, "helper call might sleep in a non-sleepable prog\n");
11267 return -EINVAL;
11268 }
11269
11270 /* With LD_ABS/IND some JITs save/restore skb from r1. */
11271 changes_data = bpf_helper_changes_pkt_data(func_id);
11272 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
11273 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
11274 func_id_name(func_id), func_id);
11275 return -EINVAL;
11276 }
11277
11278 memset(&meta, 0, sizeof(meta));
11279 meta.pkt_access = fn->pkt_access;
11280
11281 err = check_func_proto(fn, func_id);
11282 if (err) {
11283 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
11284 func_id_name(func_id), func_id);
11285 return err;
11286 }
11287
11288 if (env->cur_state->active_rcu_lock) {
11289 if (fn->might_sleep) {
11290 verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
11291 func_id_name(func_id), func_id);
11292 return -EINVAL;
11293 }
11294
11295 if (in_sleepable(env) && is_storage_get_function(func_id))
11296 env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
11297 }
11298
11299 if (env->cur_state->active_preempt_locks) {
11300 if (fn->might_sleep) {
11301 verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
11302 func_id_name(func_id), func_id);
11303 return -EINVAL;
11304 }
11305
11306 if (in_sleepable(env) && is_storage_get_function(func_id))
11307 env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
11308 }
11309
11310 if (env->cur_state->active_irq_id) {
11311 if (fn->might_sleep) {
11312 verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n",
11313 func_id_name(func_id), func_id);
11314 return -EINVAL;
11315 }
11316
11317 if (in_sleepable(env) && is_storage_get_function(func_id))
11318 env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
11319 }
11320
11321 meta.func_id = func_id;
11322 /* check args */
11323 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
11324 err = check_func_arg(env, i, &meta, fn, insn_idx);
11325 if (err)
11326 return err;
11327 }
11328
11329 err = record_func_map(env, &meta, func_id, insn_idx);
11330 if (err)
11331 return err;
11332
11333 err = record_func_key(env, &meta, func_id, insn_idx);
11334 if (err)
11335 return err;
11336
11337 /* Mark slots with STACK_MISC in case of raw mode, stack offset
11338 * is inferred from register state.
11339 */
11340 for (i = 0; i < meta.access_size; i++) {
11341 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
11342 BPF_WRITE, -1, false, false);
11343 if (err)
11344 return err;
11345 }
11346
11347 regs = cur_regs(env);
11348
11349 if (meta.release_regno) {
11350 err = -EINVAL;
11351 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
11352 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
11353 * is safe to do directly.
11354 */
11355 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
11356 if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
11357 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
11358 return -EFAULT;
11359 }
11360 err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]);
11361 } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
11362 u32 ref_obj_id = meta.ref_obj_id;
11363 bool in_rcu = in_rcu_cs(env);
11364 struct bpf_func_state *state;
11365 struct bpf_reg_state *reg;
11366
11367 err = release_reference_nomark(env->cur_state, ref_obj_id);
11368 if (!err) {
11369 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
11370 if (reg->ref_obj_id == ref_obj_id) {
11371 if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
11372 reg->ref_obj_id = 0;
11373 reg->type &= ~MEM_ALLOC;
11374 reg->type |= MEM_RCU;
11375 } else {
11376 mark_reg_invalid(env, reg);
11377 }
11378 }
11379 }));
11380 }
11381 } else if (meta.ref_obj_id) {
11382 err = release_reference(env, meta.ref_obj_id);
11383 } else if (register_is_null(®s[meta.release_regno])) {
11384 /* meta.ref_obj_id can only be 0 if register that is meant to be
11385 * released is NULL, which must be > R0.
11386 */
11387 err = 0;
11388 }
11389 if (err) {
11390 verbose(env, "func %s#%d reference has not been acquired before\n",
11391 func_id_name(func_id), func_id);
11392 return err;
11393 }
11394 }
11395
11396 switch (func_id) {
11397 case BPF_FUNC_tail_call:
11398 err = check_resource_leak(env, false, true, "tail_call");
11399 if (err)
11400 return err;
11401 break;
11402 case BPF_FUNC_get_local_storage:
11403 /* check that flags argument in get_local_storage(map, flags) is 0,
11404 * this is required because get_local_storage() can't return an error.
11405 */
11406 if (!register_is_null(®s[BPF_REG_2])) {
11407 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
11408 return -EINVAL;
11409 }
11410 break;
11411 case BPF_FUNC_for_each_map_elem:
11412 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11413 set_map_elem_callback_state);
11414 break;
11415 case BPF_FUNC_timer_set_callback:
11416 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11417 set_timer_callback_state);
11418 break;
11419 case BPF_FUNC_find_vma:
11420 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11421 set_find_vma_callback_state);
11422 break;
11423 case BPF_FUNC_snprintf:
11424 err = check_bpf_snprintf_call(env, regs);
11425 break;
11426 case BPF_FUNC_loop:
11427 update_loop_inline_state(env, meta.subprogno);
11428 /* Verifier relies on R1 value to determine if bpf_loop() iteration
11429 * is finished, thus mark it precise.
11430 */
11431 err = mark_chain_precision(env, BPF_REG_1);
11432 if (err)
11433 return err;
11434 if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
11435 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11436 set_loop_callback_state);
11437 } else {
11438 cur_func(env)->callback_depth = 0;
11439 if (env->log.level & BPF_LOG_LEVEL2)
11440 verbose(env, "frame%d bpf_loop iteration limit reached\n",
11441 env->cur_state->curframe);
11442 }
11443 break;
11444 case BPF_FUNC_dynptr_from_mem:
11445 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
11446 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
11447 reg_type_str(env, regs[BPF_REG_1].type));
11448 return -EACCES;
11449 }
11450 break;
11451 case BPF_FUNC_set_retval:
11452 if (prog_type == BPF_PROG_TYPE_LSM &&
11453 env->prog->expected_attach_type == BPF_LSM_CGROUP) {
11454 if (!env->prog->aux->attach_func_proto->type) {
11455 /* Make sure programs that attach to void
11456 * hooks don't try to modify return value.
11457 */
11458 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
11459 return -EINVAL;
11460 }
11461 }
11462 break;
11463 case BPF_FUNC_dynptr_data:
11464 {
11465 struct bpf_reg_state *reg;
11466 int id, ref_obj_id;
11467
11468 reg = get_dynptr_arg_reg(env, fn, regs);
11469 if (!reg)
11470 return -EFAULT;
11471
11472
11473 if (meta.dynptr_id) {
11474 verbose(env, "verifier internal error: meta.dynptr_id already set\n");
11475 return -EFAULT;
11476 }
11477 if (meta.ref_obj_id) {
11478 verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
11479 return -EFAULT;
11480 }
11481
11482 id = dynptr_id(env, reg);
11483 if (id < 0) {
11484 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
11485 return id;
11486 }
11487
11488 ref_obj_id = dynptr_ref_obj_id(env, reg);
11489 if (ref_obj_id < 0) {
11490 verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
11491 return ref_obj_id;
11492 }
11493
11494 meta.dynptr_id = id;
11495 meta.ref_obj_id = ref_obj_id;
11496
11497 break;
11498 }
11499 case BPF_FUNC_dynptr_write:
11500 {
11501 enum bpf_dynptr_type dynptr_type;
11502 struct bpf_reg_state *reg;
11503
11504 reg = get_dynptr_arg_reg(env, fn, regs);
11505 if (!reg)
11506 return -EFAULT;
11507
11508 dynptr_type = dynptr_get_type(env, reg);
11509 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
11510 return -EFAULT;
11511
11512 if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
11513 /* this will trigger clear_all_pkt_pointers(), which will
11514 * invalidate all dynptr slices associated with the skb
11515 */
11516 changes_data = true;
11517
11518 break;
11519 }
11520 case BPF_FUNC_per_cpu_ptr:
11521 case BPF_FUNC_this_cpu_ptr:
11522 {
11523 struct bpf_reg_state *reg = ®s[BPF_REG_1];
11524 const struct btf_type *type;
11525
11526 if (reg->type & MEM_RCU) {
11527 type = btf_type_by_id(reg->btf, reg->btf_id);
11528 if (!type || !btf_type_is_struct(type)) {
11529 verbose(env, "Helper has invalid btf/btf_id in R1\n");
11530 return -EFAULT;
11531 }
11532 returns_cpu_specific_alloc_ptr = true;
11533 env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
11534 }
11535 break;
11536 }
11537 case BPF_FUNC_user_ringbuf_drain:
11538 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11539 set_user_ringbuf_callback_state);
11540 break;
11541 }
11542
11543 if (err)
11544 return err;
11545
11546 /* reset caller saved regs */
11547 for (i = 0; i < CALLER_SAVED_REGS; i++) {
11548 mark_reg_not_init(env, regs, caller_saved[i]);
11549 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
11550 }
11551
11552 /* helper call returns 64-bit value. */
11553 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
11554
11555 /* update return register (already marked as written above) */
11556 ret_type = fn->ret_type;
11557 ret_flag = type_flag(ret_type);
11558
11559 switch (base_type(ret_type)) {
11560 case RET_INTEGER:
11561 /* sets type to SCALAR_VALUE */
11562 mark_reg_unknown(env, regs, BPF_REG_0);
11563 break;
11564 case RET_VOID:
11565 regs[BPF_REG_0].type = NOT_INIT;
11566 break;
11567 case RET_PTR_TO_MAP_VALUE:
11568 /* There is no offset yet applied, variable or fixed */
11569 mark_reg_known_zero(env, regs, BPF_REG_0);
11570 /* remember map_ptr, so that check_map_access()
11571 * can check 'value_size' boundary of memory access
11572 * to map element returned from bpf_map_lookup_elem()
11573 */
11574 if (meta.map_ptr == NULL) {
11575 verbose(env,
11576 "kernel subsystem misconfigured verifier\n");
11577 return -EINVAL;
11578 }
11579
11580 if (func_id == BPF_FUNC_map_lookup_elem &&
11581 can_elide_value_nullness(meta.map_ptr->map_type) &&
11582 meta.const_map_key >= 0 &&
11583 meta.const_map_key < meta.map_ptr->max_entries)
11584 ret_flag &= ~PTR_MAYBE_NULL;
11585
11586 regs[BPF_REG_0].map_ptr = meta.map_ptr;
11587 regs[BPF_REG_0].map_uid = meta.map_uid;
11588 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
11589 if (!type_may_be_null(ret_flag) &&
11590 btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
11591 regs[BPF_REG_0].id = ++env->id_gen;
11592 }
11593 break;
11594 case RET_PTR_TO_SOCKET:
11595 mark_reg_known_zero(env, regs, BPF_REG_0);
11596 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
11597 break;
11598 case RET_PTR_TO_SOCK_COMMON:
11599 mark_reg_known_zero(env, regs, BPF_REG_0);
11600 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
11601 break;
11602 case RET_PTR_TO_TCP_SOCK:
11603 mark_reg_known_zero(env, regs, BPF_REG_0);
11604 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
11605 break;
11606 case RET_PTR_TO_MEM:
11607 mark_reg_known_zero(env, regs, BPF_REG_0);
11608 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
11609 regs[BPF_REG_0].mem_size = meta.mem_size;
11610 break;
11611 case RET_PTR_TO_MEM_OR_BTF_ID:
11612 {
11613 const struct btf_type *t;
11614
11615 mark_reg_known_zero(env, regs, BPF_REG_0);
11616 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
11617 if (!btf_type_is_struct(t)) {
11618 u32 tsize;
11619 const struct btf_type *ret;
11620 const char *tname;
11621
11622 /* resolve the type size of ksym. */
11623 ret = btf_resolve_size(meta.ret_btf, t, &tsize);
11624 if (IS_ERR(ret)) {
11625 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
11626 verbose(env, "unable to resolve the size of type '%s': %ld\n",
11627 tname, PTR_ERR(ret));
11628 return -EINVAL;
11629 }
11630 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
11631 regs[BPF_REG_0].mem_size = tsize;
11632 } else {
11633 if (returns_cpu_specific_alloc_ptr) {
11634 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
11635 } else {
11636 /* MEM_RDONLY may be carried from ret_flag, but it
11637 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
11638 * it will confuse the check of PTR_TO_BTF_ID in
11639 * check_mem_access().
11640 */
11641 ret_flag &= ~MEM_RDONLY;
11642 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
11643 }
11644
11645 regs[BPF_REG_0].btf = meta.ret_btf;
11646 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
11647 }
11648 break;
11649 }
11650 case RET_PTR_TO_BTF_ID:
11651 {
11652 struct btf *ret_btf;
11653 int ret_btf_id;
11654
11655 mark_reg_known_zero(env, regs, BPF_REG_0);
11656 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
11657 if (func_id == BPF_FUNC_kptr_xchg) {
11658 ret_btf = meta.kptr_field->kptr.btf;
11659 ret_btf_id = meta.kptr_field->kptr.btf_id;
11660 if (!btf_is_kernel(ret_btf)) {
11661 regs[BPF_REG_0].type |= MEM_ALLOC;
11662 if (meta.kptr_field->type == BPF_KPTR_PERCPU)
11663 regs[BPF_REG_0].type |= MEM_PERCPU;
11664 }
11665 } else {
11666 if (fn->ret_btf_id == BPF_PTR_POISON) {
11667 verbose(env, "verifier internal error:");
11668 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
11669 func_id_name(func_id));
11670 return -EINVAL;
11671 }
11672 ret_btf = btf_vmlinux;
11673 ret_btf_id = *fn->ret_btf_id;
11674 }
11675 if (ret_btf_id == 0) {
11676 verbose(env, "invalid return type %u of func %s#%d\n",
11677 base_type(ret_type), func_id_name(func_id),
11678 func_id);
11679 return -EINVAL;
11680 }
11681 regs[BPF_REG_0].btf = ret_btf;
11682 regs[BPF_REG_0].btf_id = ret_btf_id;
11683 break;
11684 }
11685 default:
11686 verbose(env, "unknown return type %u of func %s#%d\n",
11687 base_type(ret_type), func_id_name(func_id), func_id);
11688 return -EINVAL;
11689 }
11690
11691 if (type_may_be_null(regs[BPF_REG_0].type))
11692 regs[BPF_REG_0].id = ++env->id_gen;
11693
11694 if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
11695 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
11696 func_id_name(func_id), func_id);
11697 return -EFAULT;
11698 }
11699
11700 if (is_dynptr_ref_function(func_id))
11701 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
11702
11703 if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
11704 /* For release_reference() */
11705 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
11706 } else if (is_acquire_function(func_id, meta.map_ptr)) {
11707 int id = acquire_reference(env, insn_idx);
11708
11709 if (id < 0)
11710 return id;
11711 /* For mark_ptr_or_null_reg() */
11712 regs[BPF_REG_0].id = id;
11713 /* For release_reference() */
11714 regs[BPF_REG_0].ref_obj_id = id;
11715 }
11716
11717 err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
11718 if (err)
11719 return err;
11720
11721 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
11722 if (err)
11723 return err;
11724
11725 if ((func_id == BPF_FUNC_get_stack ||
11726 func_id == BPF_FUNC_get_task_stack) &&
11727 !env->prog->has_callchain_buf) {
11728 const char *err_str;
11729
11730 #ifdef CONFIG_PERF_EVENTS
11731 err = get_callchain_buffers(sysctl_perf_event_max_stack);
11732 err_str = "cannot get callchain buffer for func %s#%d\n";
11733 #else
11734 err = -ENOTSUPP;
11735 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
11736 #endif
11737 if (err) {
11738 verbose(env, err_str, func_id_name(func_id), func_id);
11739 return err;
11740 }
11741
11742 env->prog->has_callchain_buf = true;
11743 }
11744
11745 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
11746 env->prog->call_get_stack = true;
11747
11748 if (func_id == BPF_FUNC_get_func_ip) {
11749 if (check_get_func_ip(env))
11750 return -ENOTSUPP;
11751 env->prog->call_get_func_ip = true;
11752 }
11753
11754 if (changes_data)
11755 clear_all_pkt_pointers(env);
11756 return 0;
11757 }
11758
11759 /* mark_btf_func_reg_size() is used when the reg size is determined by
11760 * the BTF func_proto's return value size and argument.
11761 */
__mark_btf_func_reg_size(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,size_t reg_size)11762 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
11763 u32 regno, size_t reg_size)
11764 {
11765 struct bpf_reg_state *reg = ®s[regno];
11766
11767 if (regno == BPF_REG_0) {
11768 /* Function return value */
11769 reg->live |= REG_LIVE_WRITTEN;
11770 reg->subreg_def = reg_size == sizeof(u64) ?
11771 DEF_NOT_SUBREG : env->insn_idx + 1;
11772 } else {
11773 /* Function argument */
11774 if (reg_size == sizeof(u64)) {
11775 mark_insn_zext(env, reg);
11776 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
11777 } else {
11778 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
11779 }
11780 }
11781 }
11782
mark_btf_func_reg_size(struct bpf_verifier_env * env,u32 regno,size_t reg_size)11783 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
11784 size_t reg_size)
11785 {
11786 return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
11787 }
11788
is_kfunc_acquire(struct bpf_kfunc_call_arg_meta * meta)11789 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
11790 {
11791 return meta->kfunc_flags & KF_ACQUIRE;
11792 }
11793
is_kfunc_release(struct bpf_kfunc_call_arg_meta * meta)11794 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
11795 {
11796 return meta->kfunc_flags & KF_RELEASE;
11797 }
11798
is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta * meta)11799 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
11800 {
11801 return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
11802 }
11803
is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta * meta)11804 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
11805 {
11806 return meta->kfunc_flags & KF_SLEEPABLE;
11807 }
11808
is_kfunc_destructive(struct bpf_kfunc_call_arg_meta * meta)11809 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
11810 {
11811 return meta->kfunc_flags & KF_DESTRUCTIVE;
11812 }
11813
is_kfunc_rcu(struct bpf_kfunc_call_arg_meta * meta)11814 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
11815 {
11816 return meta->kfunc_flags & KF_RCU;
11817 }
11818
is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta * meta)11819 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
11820 {
11821 return meta->kfunc_flags & KF_RCU_PROTECTED;
11822 }
11823
is_kfunc_arg_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)11824 static bool is_kfunc_arg_mem_size(const struct btf *btf,
11825 const struct btf_param *arg,
11826 const struct bpf_reg_state *reg)
11827 {
11828 const struct btf_type *t;
11829
11830 t = btf_type_skip_modifiers(btf, arg->type, NULL);
11831 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
11832 return false;
11833
11834 return btf_param_match_suffix(btf, arg, "__sz");
11835 }
11836
is_kfunc_arg_const_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)11837 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
11838 const struct btf_param *arg,
11839 const struct bpf_reg_state *reg)
11840 {
11841 const struct btf_type *t;
11842
11843 t = btf_type_skip_modifiers(btf, arg->type, NULL);
11844 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
11845 return false;
11846
11847 return btf_param_match_suffix(btf, arg, "__szk");
11848 }
11849
is_kfunc_arg_optional(const struct btf * btf,const struct btf_param * arg)11850 static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
11851 {
11852 return btf_param_match_suffix(btf, arg, "__opt");
11853 }
11854
is_kfunc_arg_constant(const struct btf * btf,const struct btf_param * arg)11855 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
11856 {
11857 return btf_param_match_suffix(btf, arg, "__k");
11858 }
11859
is_kfunc_arg_ignore(const struct btf * btf,const struct btf_param * arg)11860 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
11861 {
11862 return btf_param_match_suffix(btf, arg, "__ign");
11863 }
11864
is_kfunc_arg_map(const struct btf * btf,const struct btf_param * arg)11865 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
11866 {
11867 return btf_param_match_suffix(btf, arg, "__map");
11868 }
11869
is_kfunc_arg_alloc_obj(const struct btf * btf,const struct btf_param * arg)11870 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
11871 {
11872 return btf_param_match_suffix(btf, arg, "__alloc");
11873 }
11874
is_kfunc_arg_uninit(const struct btf * btf,const struct btf_param * arg)11875 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
11876 {
11877 return btf_param_match_suffix(btf, arg, "__uninit");
11878 }
11879
is_kfunc_arg_refcounted_kptr(const struct btf * btf,const struct btf_param * arg)11880 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
11881 {
11882 return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
11883 }
11884
is_kfunc_arg_nullable(const struct btf * btf,const struct btf_param * arg)11885 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
11886 {
11887 return btf_param_match_suffix(btf, arg, "__nullable");
11888 }
11889
is_kfunc_arg_const_str(const struct btf * btf,const struct btf_param * arg)11890 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
11891 {
11892 return btf_param_match_suffix(btf, arg, "__str");
11893 }
11894
is_kfunc_arg_irq_flag(const struct btf * btf,const struct btf_param * arg)11895 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
11896 {
11897 return btf_param_match_suffix(btf, arg, "__irq_flag");
11898 }
11899
is_kfunc_arg_scalar_with_name(const struct btf * btf,const struct btf_param * arg,const char * name)11900 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
11901 const struct btf_param *arg,
11902 const char *name)
11903 {
11904 int len, target_len = strlen(name);
11905 const char *param_name;
11906
11907 param_name = btf_name_by_offset(btf, arg->name_off);
11908 if (str_is_empty(param_name))
11909 return false;
11910 len = strlen(param_name);
11911 if (len != target_len)
11912 return false;
11913 if (strcmp(param_name, name))
11914 return false;
11915
11916 return true;
11917 }
11918
11919 enum {
11920 KF_ARG_DYNPTR_ID,
11921 KF_ARG_LIST_HEAD_ID,
11922 KF_ARG_LIST_NODE_ID,
11923 KF_ARG_RB_ROOT_ID,
11924 KF_ARG_RB_NODE_ID,
11925 KF_ARG_WORKQUEUE_ID,
11926 KF_ARG_RES_SPIN_LOCK_ID,
11927 };
11928
11929 BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct,bpf_dynptr)11930 BTF_ID(struct, bpf_dynptr)
11931 BTF_ID(struct, bpf_list_head)
11932 BTF_ID(struct, bpf_list_node)
11933 BTF_ID(struct, bpf_rb_root)
11934 BTF_ID(struct, bpf_rb_node)
11935 BTF_ID(struct, bpf_wq)
11936 BTF_ID(struct, bpf_res_spin_lock)
11937
11938 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
11939 const struct btf_param *arg, int type)
11940 {
11941 const struct btf_type *t;
11942 u32 res_id;
11943
11944 t = btf_type_skip_modifiers(btf, arg->type, NULL);
11945 if (!t)
11946 return false;
11947 if (!btf_type_is_ptr(t))
11948 return false;
11949 t = btf_type_skip_modifiers(btf, t->type, &res_id);
11950 if (!t)
11951 return false;
11952 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
11953 }
11954
is_kfunc_arg_dynptr(const struct btf * btf,const struct btf_param * arg)11955 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
11956 {
11957 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
11958 }
11959
is_kfunc_arg_list_head(const struct btf * btf,const struct btf_param * arg)11960 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
11961 {
11962 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
11963 }
11964
is_kfunc_arg_list_node(const struct btf * btf,const struct btf_param * arg)11965 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
11966 {
11967 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
11968 }
11969
is_kfunc_arg_rbtree_root(const struct btf * btf,const struct btf_param * arg)11970 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
11971 {
11972 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
11973 }
11974
is_kfunc_arg_rbtree_node(const struct btf * btf,const struct btf_param * arg)11975 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
11976 {
11977 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
11978 }
11979
is_kfunc_arg_wq(const struct btf * btf,const struct btf_param * arg)11980 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
11981 {
11982 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
11983 }
11984
is_kfunc_arg_res_spin_lock(const struct btf * btf,const struct btf_param * arg)11985 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
11986 {
11987 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
11988 }
11989
is_kfunc_arg_callback(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_param * arg)11990 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
11991 const struct btf_param *arg)
11992 {
11993 const struct btf_type *t;
11994
11995 t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
11996 if (!t)
11997 return false;
11998
11999 return true;
12000 }
12001
12002 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
__btf_type_is_scalar_struct(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_type * t,int rec)12003 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
12004 const struct btf *btf,
12005 const struct btf_type *t, int rec)
12006 {
12007 const struct btf_type *member_type;
12008 const struct btf_member *member;
12009 u32 i;
12010
12011 if (!btf_type_is_struct(t))
12012 return false;
12013
12014 for_each_member(i, t, member) {
12015 const struct btf_array *array;
12016
12017 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
12018 if (btf_type_is_struct(member_type)) {
12019 if (rec >= 3) {
12020 verbose(env, "max struct nesting depth exceeded\n");
12021 return false;
12022 }
12023 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
12024 return false;
12025 continue;
12026 }
12027 if (btf_type_is_array(member_type)) {
12028 array = btf_array(member_type);
12029 if (!array->nelems)
12030 return false;
12031 member_type = btf_type_skip_modifiers(btf, array->type, NULL);
12032 if (!btf_type_is_scalar(member_type))
12033 return false;
12034 continue;
12035 }
12036 if (!btf_type_is_scalar(member_type))
12037 return false;
12038 }
12039 return true;
12040 }
12041
12042 enum kfunc_ptr_arg_type {
12043 KF_ARG_PTR_TO_CTX,
12044 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
12045 KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
12046 KF_ARG_PTR_TO_DYNPTR,
12047 KF_ARG_PTR_TO_ITER,
12048 KF_ARG_PTR_TO_LIST_HEAD,
12049 KF_ARG_PTR_TO_LIST_NODE,
12050 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
12051 KF_ARG_PTR_TO_MEM,
12052 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
12053 KF_ARG_PTR_TO_CALLBACK,
12054 KF_ARG_PTR_TO_RB_ROOT,
12055 KF_ARG_PTR_TO_RB_NODE,
12056 KF_ARG_PTR_TO_NULL,
12057 KF_ARG_PTR_TO_CONST_STR,
12058 KF_ARG_PTR_TO_MAP,
12059 KF_ARG_PTR_TO_WORKQUEUE,
12060 KF_ARG_PTR_TO_IRQ_FLAG,
12061 KF_ARG_PTR_TO_RES_SPIN_LOCK,
12062 };
12063
12064 enum special_kfunc_type {
12065 KF_bpf_obj_new_impl,
12066 KF_bpf_obj_drop_impl,
12067 KF_bpf_refcount_acquire_impl,
12068 KF_bpf_list_push_front_impl,
12069 KF_bpf_list_push_back_impl,
12070 KF_bpf_list_pop_front,
12071 KF_bpf_list_pop_back,
12072 KF_bpf_cast_to_kern_ctx,
12073 KF_bpf_rdonly_cast,
12074 KF_bpf_rcu_read_lock,
12075 KF_bpf_rcu_read_unlock,
12076 KF_bpf_rbtree_remove,
12077 KF_bpf_rbtree_add_impl,
12078 KF_bpf_rbtree_first,
12079 KF_bpf_dynptr_from_skb,
12080 KF_bpf_dynptr_from_xdp,
12081 KF_bpf_dynptr_slice,
12082 KF_bpf_dynptr_slice_rdwr,
12083 KF_bpf_dynptr_clone,
12084 KF_bpf_percpu_obj_new_impl,
12085 KF_bpf_percpu_obj_drop_impl,
12086 KF_bpf_throw,
12087 KF_bpf_wq_set_callback_impl,
12088 KF_bpf_preempt_disable,
12089 KF_bpf_preempt_enable,
12090 KF_bpf_iter_css_task_new,
12091 KF_bpf_session_cookie,
12092 KF_bpf_get_kmem_cache,
12093 KF_bpf_local_irq_save,
12094 KF_bpf_local_irq_restore,
12095 KF_bpf_iter_num_new,
12096 KF_bpf_iter_num_next,
12097 KF_bpf_iter_num_destroy,
12098 KF_bpf_set_dentry_xattr,
12099 KF_bpf_remove_dentry_xattr,
12100 KF_bpf_res_spin_lock,
12101 KF_bpf_res_spin_unlock,
12102 KF_bpf_res_spin_lock_irqsave,
12103 KF_bpf_res_spin_unlock_irqrestore,
12104 };
12105
12106 BTF_SET_START(special_kfunc_set)
BTF_ID(func,bpf_obj_new_impl)12107 BTF_ID(func, bpf_obj_new_impl)
12108 BTF_ID(func, bpf_obj_drop_impl)
12109 BTF_ID(func, bpf_refcount_acquire_impl)
12110 BTF_ID(func, bpf_list_push_front_impl)
12111 BTF_ID(func, bpf_list_push_back_impl)
12112 BTF_ID(func, bpf_list_pop_front)
12113 BTF_ID(func, bpf_list_pop_back)
12114 BTF_ID(func, bpf_cast_to_kern_ctx)
12115 BTF_ID(func, bpf_rdonly_cast)
12116 BTF_ID(func, bpf_rbtree_remove)
12117 BTF_ID(func, bpf_rbtree_add_impl)
12118 BTF_ID(func, bpf_rbtree_first)
12119 #ifdef CONFIG_NET
12120 BTF_ID(func, bpf_dynptr_from_skb)
12121 BTF_ID(func, bpf_dynptr_from_xdp)
12122 #endif
12123 BTF_ID(func, bpf_dynptr_slice)
12124 BTF_ID(func, bpf_dynptr_slice_rdwr)
12125 BTF_ID(func, bpf_dynptr_clone)
12126 BTF_ID(func, bpf_percpu_obj_new_impl)
12127 BTF_ID(func, bpf_percpu_obj_drop_impl)
12128 BTF_ID(func, bpf_throw)
12129 BTF_ID(func, bpf_wq_set_callback_impl)
12130 #ifdef CONFIG_CGROUPS
12131 BTF_ID(func, bpf_iter_css_task_new)
12132 #endif
12133 #ifdef CONFIG_BPF_LSM
12134 BTF_ID(func, bpf_set_dentry_xattr)
12135 BTF_ID(func, bpf_remove_dentry_xattr)
12136 #endif
12137 BTF_SET_END(special_kfunc_set)
12138
12139 BTF_ID_LIST(special_kfunc_list)
12140 BTF_ID(func, bpf_obj_new_impl)
12141 BTF_ID(func, bpf_obj_drop_impl)
12142 BTF_ID(func, bpf_refcount_acquire_impl)
12143 BTF_ID(func, bpf_list_push_front_impl)
12144 BTF_ID(func, bpf_list_push_back_impl)
12145 BTF_ID(func, bpf_list_pop_front)
12146 BTF_ID(func, bpf_list_pop_back)
12147 BTF_ID(func, bpf_cast_to_kern_ctx)
12148 BTF_ID(func, bpf_rdonly_cast)
12149 BTF_ID(func, bpf_rcu_read_lock)
12150 BTF_ID(func, bpf_rcu_read_unlock)
12151 BTF_ID(func, bpf_rbtree_remove)
12152 BTF_ID(func, bpf_rbtree_add_impl)
12153 BTF_ID(func, bpf_rbtree_first)
12154 #ifdef CONFIG_NET
12155 BTF_ID(func, bpf_dynptr_from_skb)
12156 BTF_ID(func, bpf_dynptr_from_xdp)
12157 #else
12158 BTF_ID_UNUSED
12159 BTF_ID_UNUSED
12160 #endif
12161 BTF_ID(func, bpf_dynptr_slice)
12162 BTF_ID(func, bpf_dynptr_slice_rdwr)
12163 BTF_ID(func, bpf_dynptr_clone)
12164 BTF_ID(func, bpf_percpu_obj_new_impl)
12165 BTF_ID(func, bpf_percpu_obj_drop_impl)
12166 BTF_ID(func, bpf_throw)
12167 BTF_ID(func, bpf_wq_set_callback_impl)
12168 BTF_ID(func, bpf_preempt_disable)
12169 BTF_ID(func, bpf_preempt_enable)
12170 #ifdef CONFIG_CGROUPS
12171 BTF_ID(func, bpf_iter_css_task_new)
12172 #else
12173 BTF_ID_UNUSED
12174 #endif
12175 #ifdef CONFIG_BPF_EVENTS
12176 BTF_ID(func, bpf_session_cookie)
12177 #else
12178 BTF_ID_UNUSED
12179 #endif
12180 BTF_ID(func, bpf_get_kmem_cache)
12181 BTF_ID(func, bpf_local_irq_save)
12182 BTF_ID(func, bpf_local_irq_restore)
12183 BTF_ID(func, bpf_iter_num_new)
12184 BTF_ID(func, bpf_iter_num_next)
12185 BTF_ID(func, bpf_iter_num_destroy)
12186 #ifdef CONFIG_BPF_LSM
12187 BTF_ID(func, bpf_set_dentry_xattr)
12188 BTF_ID(func, bpf_remove_dentry_xattr)
12189 #else
12190 BTF_ID_UNUSED
12191 BTF_ID_UNUSED
12192 #endif
12193 BTF_ID(func, bpf_res_spin_lock)
12194 BTF_ID(func, bpf_res_spin_unlock)
12195 BTF_ID(func, bpf_res_spin_lock_irqsave)
12196 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
12197
12198 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
12199 {
12200 if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
12201 meta->arg_owning_ref) {
12202 return false;
12203 }
12204
12205 return meta->kfunc_flags & KF_RET_NULL;
12206 }
12207
is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta * meta)12208 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
12209 {
12210 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
12211 }
12212
is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta * meta)12213 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
12214 {
12215 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
12216 }
12217
is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta * meta)12218 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
12219 {
12220 return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
12221 }
12222
is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta * meta)12223 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
12224 {
12225 return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
12226 }
12227
12228 static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,const struct btf_type * t,const struct btf_type * ref_t,const char * ref_tname,const struct btf_param * args,int argno,int nargs)12229 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
12230 struct bpf_kfunc_call_arg_meta *meta,
12231 const struct btf_type *t, const struct btf_type *ref_t,
12232 const char *ref_tname, const struct btf_param *args,
12233 int argno, int nargs)
12234 {
12235 u32 regno = argno + 1;
12236 struct bpf_reg_state *regs = cur_regs(env);
12237 struct bpf_reg_state *reg = ®s[regno];
12238 bool arg_mem_size = false;
12239
12240 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
12241 return KF_ARG_PTR_TO_CTX;
12242
12243 /* In this function, we verify the kfunc's BTF as per the argument type,
12244 * leaving the rest of the verification with respect to the register
12245 * type to our caller. When a set of conditions hold in the BTF type of
12246 * arguments, we resolve it to a known kfunc_ptr_arg_type.
12247 */
12248 if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
12249 return KF_ARG_PTR_TO_CTX;
12250
12251 if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg))
12252 return KF_ARG_PTR_TO_NULL;
12253
12254 if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
12255 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
12256
12257 if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
12258 return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
12259
12260 if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
12261 return KF_ARG_PTR_TO_DYNPTR;
12262
12263 if (is_kfunc_arg_iter(meta, argno, &args[argno]))
12264 return KF_ARG_PTR_TO_ITER;
12265
12266 if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
12267 return KF_ARG_PTR_TO_LIST_HEAD;
12268
12269 if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
12270 return KF_ARG_PTR_TO_LIST_NODE;
12271
12272 if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
12273 return KF_ARG_PTR_TO_RB_ROOT;
12274
12275 if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
12276 return KF_ARG_PTR_TO_RB_NODE;
12277
12278 if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
12279 return KF_ARG_PTR_TO_CONST_STR;
12280
12281 if (is_kfunc_arg_map(meta->btf, &args[argno]))
12282 return KF_ARG_PTR_TO_MAP;
12283
12284 if (is_kfunc_arg_wq(meta->btf, &args[argno]))
12285 return KF_ARG_PTR_TO_WORKQUEUE;
12286
12287 if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
12288 return KF_ARG_PTR_TO_IRQ_FLAG;
12289
12290 if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno]))
12291 return KF_ARG_PTR_TO_RES_SPIN_LOCK;
12292
12293 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
12294 if (!btf_type_is_struct(ref_t)) {
12295 verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
12296 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
12297 return -EINVAL;
12298 }
12299 return KF_ARG_PTR_TO_BTF_ID;
12300 }
12301
12302 if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
12303 return KF_ARG_PTR_TO_CALLBACK;
12304
12305 if (argno + 1 < nargs &&
12306 (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) ||
12307 is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])))
12308 arg_mem_size = true;
12309
12310 /* This is the catch all argument type of register types supported by
12311 * check_helper_mem_access. However, we only allow when argument type is
12312 * pointer to scalar, or struct composed (recursively) of scalars. When
12313 * arg_mem_size is true, the pointer can be void *.
12314 */
12315 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
12316 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
12317 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
12318 argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
12319 return -EINVAL;
12320 }
12321 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
12322 }
12323
process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const struct btf_type * ref_t,const char * ref_tname,u32 ref_id,struct bpf_kfunc_call_arg_meta * meta,int argno)12324 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
12325 struct bpf_reg_state *reg,
12326 const struct btf_type *ref_t,
12327 const char *ref_tname, u32 ref_id,
12328 struct bpf_kfunc_call_arg_meta *meta,
12329 int argno)
12330 {
12331 const struct btf_type *reg_ref_t;
12332 bool strict_type_match = false;
12333 const struct btf *reg_btf;
12334 const char *reg_ref_tname;
12335 bool taking_projection;
12336 bool struct_same;
12337 u32 reg_ref_id;
12338
12339 if (base_type(reg->type) == PTR_TO_BTF_ID) {
12340 reg_btf = reg->btf;
12341 reg_ref_id = reg->btf_id;
12342 } else {
12343 reg_btf = btf_vmlinux;
12344 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
12345 }
12346
12347 /* Enforce strict type matching for calls to kfuncs that are acquiring
12348 * or releasing a reference, or are no-cast aliases. We do _not_
12349 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
12350 * as we want to enable BPF programs to pass types that are bitwise
12351 * equivalent without forcing them to explicitly cast with something
12352 * like bpf_cast_to_kern_ctx().
12353 *
12354 * For example, say we had a type like the following:
12355 *
12356 * struct bpf_cpumask {
12357 * cpumask_t cpumask;
12358 * refcount_t usage;
12359 * };
12360 *
12361 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
12362 * to a struct cpumask, so it would be safe to pass a struct
12363 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
12364 *
12365 * The philosophy here is similar to how we allow scalars of different
12366 * types to be passed to kfuncs as long as the size is the same. The
12367 * only difference here is that we're simply allowing
12368 * btf_struct_ids_match() to walk the struct at the 0th offset, and
12369 * resolve types.
12370 */
12371 if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
12372 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
12373 strict_type_match = true;
12374
12375 WARN_ON_ONCE(is_kfunc_release(meta) &&
12376 (reg->off || !tnum_is_const(reg->var_off) ||
12377 reg->var_off.value));
12378
12379 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id);
12380 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
12381 struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
12382 /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
12383 * actually use it -- it must cast to the underlying type. So we allow
12384 * caller to pass in the underlying type.
12385 */
12386 taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
12387 if (!taking_projection && !struct_same) {
12388 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
12389 meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
12390 btf_type_str(reg_ref_t), reg_ref_tname);
12391 return -EINVAL;
12392 }
12393 return 0;
12394 }
12395
process_irq_flag(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)12396 static int process_irq_flag(struct bpf_verifier_env *env, int regno,
12397 struct bpf_kfunc_call_arg_meta *meta)
12398 {
12399 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
12400 int err, kfunc_class = IRQ_NATIVE_KFUNC;
12401 bool irq_save;
12402
12403 if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
12404 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
12405 irq_save = true;
12406 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12407 kfunc_class = IRQ_LOCK_KFUNC;
12408 } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
12409 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
12410 irq_save = false;
12411 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12412 kfunc_class = IRQ_LOCK_KFUNC;
12413 } else {
12414 verbose(env, "verifier internal error: unknown irq flags kfunc\n");
12415 return -EFAULT;
12416 }
12417
12418 if (irq_save) {
12419 if (!is_irq_flag_reg_valid_uninit(env, reg)) {
12420 verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1);
12421 return -EINVAL;
12422 }
12423
12424 err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false);
12425 if (err)
12426 return err;
12427
12428 err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
12429 if (err)
12430 return err;
12431 } else {
12432 err = is_irq_flag_reg_valid_init(env, reg);
12433 if (err) {
12434 verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1);
12435 return err;
12436 }
12437
12438 err = mark_irq_flag_read(env, reg);
12439 if (err)
12440 return err;
12441
12442 err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
12443 if (err)
12444 return err;
12445 }
12446 return 0;
12447 }
12448
12449
ref_set_non_owning(struct bpf_verifier_env * env,struct bpf_reg_state * reg)12450 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
12451 {
12452 struct btf_record *rec = reg_btf_record(reg);
12453
12454 if (!env->cur_state->active_locks) {
12455 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
12456 return -EFAULT;
12457 }
12458
12459 if (type_flag(reg->type) & NON_OWN_REF) {
12460 verbose(env, "verifier internal error: NON_OWN_REF already set\n");
12461 return -EFAULT;
12462 }
12463
12464 reg->type |= NON_OWN_REF;
12465 if (rec->refcount_off >= 0)
12466 reg->type |= MEM_RCU;
12467
12468 return 0;
12469 }
12470
ref_convert_owning_non_owning(struct bpf_verifier_env * env,u32 ref_obj_id)12471 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
12472 {
12473 struct bpf_verifier_state *state = env->cur_state;
12474 struct bpf_func_state *unused;
12475 struct bpf_reg_state *reg;
12476 int i;
12477
12478 if (!ref_obj_id) {
12479 verbose(env, "verifier internal error: ref_obj_id is zero for "
12480 "owning -> non-owning conversion\n");
12481 return -EFAULT;
12482 }
12483
12484 for (i = 0; i < state->acquired_refs; i++) {
12485 if (state->refs[i].id != ref_obj_id)
12486 continue;
12487
12488 /* Clear ref_obj_id here so release_reference doesn't clobber
12489 * the whole reg
12490 */
12491 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
12492 if (reg->ref_obj_id == ref_obj_id) {
12493 reg->ref_obj_id = 0;
12494 ref_set_non_owning(env, reg);
12495 }
12496 }));
12497 return 0;
12498 }
12499
12500 verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
12501 return -EFAULT;
12502 }
12503
12504 /* Implementation details:
12505 *
12506 * Each register points to some region of memory, which we define as an
12507 * allocation. Each allocation may embed a bpf_spin_lock which protects any
12508 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
12509 * allocation. The lock and the data it protects are colocated in the same
12510 * memory region.
12511 *
12512 * Hence, everytime a register holds a pointer value pointing to such
12513 * allocation, the verifier preserves a unique reg->id for it.
12514 *
12515 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
12516 * bpf_spin_lock is called.
12517 *
12518 * To enable this, lock state in the verifier captures two values:
12519 * active_lock.ptr = Register's type specific pointer
12520 * active_lock.id = A unique ID for each register pointer value
12521 *
12522 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
12523 * supported register types.
12524 *
12525 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
12526 * allocated objects is the reg->btf pointer.
12527 *
12528 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
12529 * can establish the provenance of the map value statically for each distinct
12530 * lookup into such maps. They always contain a single map value hence unique
12531 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
12532 *
12533 * So, in case of global variables, they use array maps with max_entries = 1,
12534 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
12535 * into the same map value as max_entries is 1, as described above).
12536 *
12537 * In case of inner map lookups, the inner map pointer has same map_ptr as the
12538 * outer map pointer (in verifier context), but each lookup into an inner map
12539 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
12540 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
12541 * will get different reg->id assigned to each lookup, hence different
12542 * active_lock.id.
12543 *
12544 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
12545 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
12546 * returned from bpf_obj_new. Each allocation receives a new reg->id.
12547 */
check_reg_allocation_locked(struct bpf_verifier_env * env,struct bpf_reg_state * reg)12548 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
12549 {
12550 struct bpf_reference_state *s;
12551 void *ptr;
12552 u32 id;
12553
12554 switch ((int)reg->type) {
12555 case PTR_TO_MAP_VALUE:
12556 ptr = reg->map_ptr;
12557 break;
12558 case PTR_TO_BTF_ID | MEM_ALLOC:
12559 ptr = reg->btf;
12560 break;
12561 default:
12562 verbose(env, "verifier internal error: unknown reg type for lock check\n");
12563 return -EFAULT;
12564 }
12565 id = reg->id;
12566
12567 if (!env->cur_state->active_locks)
12568 return -EINVAL;
12569 s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
12570 if (!s) {
12571 verbose(env, "held lock and object are not in the same allocation\n");
12572 return -EINVAL;
12573 }
12574 return 0;
12575 }
12576
is_bpf_list_api_kfunc(u32 btf_id)12577 static bool is_bpf_list_api_kfunc(u32 btf_id)
12578 {
12579 return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
12580 btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
12581 btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
12582 btf_id == special_kfunc_list[KF_bpf_list_pop_back];
12583 }
12584
is_bpf_rbtree_api_kfunc(u32 btf_id)12585 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
12586 {
12587 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
12588 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12589 btf_id == special_kfunc_list[KF_bpf_rbtree_first];
12590 }
12591
is_bpf_iter_num_api_kfunc(u32 btf_id)12592 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
12593 {
12594 return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
12595 btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
12596 btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
12597 }
12598
is_bpf_graph_api_kfunc(u32 btf_id)12599 static bool is_bpf_graph_api_kfunc(u32 btf_id)
12600 {
12601 return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
12602 btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
12603 }
12604
is_bpf_res_spin_lock_kfunc(u32 btf_id)12605 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
12606 {
12607 return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12608 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
12609 btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12610 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
12611 }
12612
kfunc_spin_allowed(u32 btf_id)12613 static bool kfunc_spin_allowed(u32 btf_id)
12614 {
12615 return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
12616 is_bpf_res_spin_lock_kfunc(btf_id);
12617 }
12618
is_sync_callback_calling_kfunc(u32 btf_id)12619 static bool is_sync_callback_calling_kfunc(u32 btf_id)
12620 {
12621 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
12622 }
12623
is_async_callback_calling_kfunc(u32 btf_id)12624 static bool is_async_callback_calling_kfunc(u32 btf_id)
12625 {
12626 return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
12627 }
12628
is_bpf_throw_kfunc(struct bpf_insn * insn)12629 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
12630 {
12631 return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
12632 insn->imm == special_kfunc_list[KF_bpf_throw];
12633 }
12634
is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)12635 static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
12636 {
12637 return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
12638 }
12639
is_callback_calling_kfunc(u32 btf_id)12640 static bool is_callback_calling_kfunc(u32 btf_id)
12641 {
12642 return is_sync_callback_calling_kfunc(btf_id) ||
12643 is_async_callback_calling_kfunc(btf_id);
12644 }
12645
is_rbtree_lock_required_kfunc(u32 btf_id)12646 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
12647 {
12648 return is_bpf_rbtree_api_kfunc(btf_id);
12649 }
12650
check_kfunc_is_graph_root_api(struct bpf_verifier_env * env,enum btf_field_type head_field_type,u32 kfunc_btf_id)12651 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
12652 enum btf_field_type head_field_type,
12653 u32 kfunc_btf_id)
12654 {
12655 bool ret;
12656
12657 switch (head_field_type) {
12658 case BPF_LIST_HEAD:
12659 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
12660 break;
12661 case BPF_RB_ROOT:
12662 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
12663 break;
12664 default:
12665 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
12666 btf_field_type_name(head_field_type));
12667 return false;
12668 }
12669
12670 if (!ret)
12671 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
12672 btf_field_type_name(head_field_type));
12673 return ret;
12674 }
12675
check_kfunc_is_graph_node_api(struct bpf_verifier_env * env,enum btf_field_type node_field_type,u32 kfunc_btf_id)12676 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
12677 enum btf_field_type node_field_type,
12678 u32 kfunc_btf_id)
12679 {
12680 bool ret;
12681
12682 switch (node_field_type) {
12683 case BPF_LIST_NODE:
12684 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
12685 kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
12686 break;
12687 case BPF_RB_NODE:
12688 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12689 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
12690 break;
12691 default:
12692 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
12693 btf_field_type_name(node_field_type));
12694 return false;
12695 }
12696
12697 if (!ret)
12698 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
12699 btf_field_type_name(node_field_type));
12700 return ret;
12701 }
12702
12703 static int
__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,struct btf_field ** head_field)12704 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
12705 struct bpf_reg_state *reg, u32 regno,
12706 struct bpf_kfunc_call_arg_meta *meta,
12707 enum btf_field_type head_field_type,
12708 struct btf_field **head_field)
12709 {
12710 const char *head_type_name;
12711 struct btf_field *field;
12712 struct btf_record *rec;
12713 u32 head_off;
12714
12715 if (meta->btf != btf_vmlinux) {
12716 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
12717 return -EFAULT;
12718 }
12719
12720 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
12721 return -EFAULT;
12722
12723 head_type_name = btf_field_type_name(head_field_type);
12724 if (!tnum_is_const(reg->var_off)) {
12725 verbose(env,
12726 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
12727 regno, head_type_name);
12728 return -EINVAL;
12729 }
12730
12731 rec = reg_btf_record(reg);
12732 head_off = reg->off + reg->var_off.value;
12733 field = btf_record_find(rec, head_off, head_field_type);
12734 if (!field) {
12735 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
12736 return -EINVAL;
12737 }
12738
12739 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
12740 if (check_reg_allocation_locked(env, reg)) {
12741 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
12742 rec->spin_lock_off, head_type_name);
12743 return -EINVAL;
12744 }
12745
12746 if (*head_field) {
12747 verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
12748 return -EFAULT;
12749 }
12750 *head_field = field;
12751 return 0;
12752 }
12753
process_kf_arg_ptr_to_list_head(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12754 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
12755 struct bpf_reg_state *reg, u32 regno,
12756 struct bpf_kfunc_call_arg_meta *meta)
12757 {
12758 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
12759 &meta->arg_list_head.field);
12760 }
12761
process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12762 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
12763 struct bpf_reg_state *reg, u32 regno,
12764 struct bpf_kfunc_call_arg_meta *meta)
12765 {
12766 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
12767 &meta->arg_rbtree_root.field);
12768 }
12769
12770 static int
__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,enum btf_field_type node_field_type,struct btf_field ** node_field)12771 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
12772 struct bpf_reg_state *reg, u32 regno,
12773 struct bpf_kfunc_call_arg_meta *meta,
12774 enum btf_field_type head_field_type,
12775 enum btf_field_type node_field_type,
12776 struct btf_field **node_field)
12777 {
12778 const char *node_type_name;
12779 const struct btf_type *et, *t;
12780 struct btf_field *field;
12781 u32 node_off;
12782
12783 if (meta->btf != btf_vmlinux) {
12784 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
12785 return -EFAULT;
12786 }
12787
12788 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
12789 return -EFAULT;
12790
12791 node_type_name = btf_field_type_name(node_field_type);
12792 if (!tnum_is_const(reg->var_off)) {
12793 verbose(env,
12794 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
12795 regno, node_type_name);
12796 return -EINVAL;
12797 }
12798
12799 node_off = reg->off + reg->var_off.value;
12800 field = reg_find_field_offset(reg, node_off, node_field_type);
12801 if (!field) {
12802 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
12803 return -EINVAL;
12804 }
12805
12806 field = *node_field;
12807
12808 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
12809 t = btf_type_by_id(reg->btf, reg->btf_id);
12810 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
12811 field->graph_root.value_btf_id, true)) {
12812 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
12813 "in struct %s, but arg is at offset=%d in struct %s\n",
12814 btf_field_type_name(head_field_type),
12815 btf_field_type_name(node_field_type),
12816 field->graph_root.node_offset,
12817 btf_name_by_offset(field->graph_root.btf, et->name_off),
12818 node_off, btf_name_by_offset(reg->btf, t->name_off));
12819 return -EINVAL;
12820 }
12821 meta->arg_btf = reg->btf;
12822 meta->arg_btf_id = reg->btf_id;
12823
12824 if (node_off != field->graph_root.node_offset) {
12825 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
12826 node_off, btf_field_type_name(node_field_type),
12827 field->graph_root.node_offset,
12828 btf_name_by_offset(field->graph_root.btf, et->name_off));
12829 return -EINVAL;
12830 }
12831
12832 return 0;
12833 }
12834
process_kf_arg_ptr_to_list_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12835 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
12836 struct bpf_reg_state *reg, u32 regno,
12837 struct bpf_kfunc_call_arg_meta *meta)
12838 {
12839 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
12840 BPF_LIST_HEAD, BPF_LIST_NODE,
12841 &meta->arg_list_head.field);
12842 }
12843
process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)12844 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
12845 struct bpf_reg_state *reg, u32 regno,
12846 struct bpf_kfunc_call_arg_meta *meta)
12847 {
12848 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
12849 BPF_RB_ROOT, BPF_RB_NODE,
12850 &meta->arg_rbtree_root.field);
12851 }
12852
12853 /*
12854 * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
12855 * LSM hooks and iters (both sleepable and non-sleepable) are safe.
12856 * Any sleepable progs are also safe since bpf_check_attach_target() enforce
12857 * them can only be attached to some specific hook points.
12858 */
check_css_task_iter_allowlist(struct bpf_verifier_env * env)12859 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
12860 {
12861 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12862
12863 switch (prog_type) {
12864 case BPF_PROG_TYPE_LSM:
12865 return true;
12866 case BPF_PROG_TYPE_TRACING:
12867 if (env->prog->expected_attach_type == BPF_TRACE_ITER)
12868 return true;
12869 fallthrough;
12870 default:
12871 return in_sleepable(env);
12872 }
12873 }
12874
check_kfunc_args(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,int insn_idx)12875 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12876 int insn_idx)
12877 {
12878 const char *func_name = meta->func_name, *ref_tname;
12879 const struct btf *btf = meta->btf;
12880 const struct btf_param *args;
12881 struct btf_record *rec;
12882 u32 i, nargs;
12883 int ret;
12884
12885 args = (const struct btf_param *)(meta->func_proto + 1);
12886 nargs = btf_type_vlen(meta->func_proto);
12887 if (nargs > MAX_BPF_FUNC_REG_ARGS) {
12888 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
12889 MAX_BPF_FUNC_REG_ARGS);
12890 return -EINVAL;
12891 }
12892
12893 /* Check that BTF function arguments match actual types that the
12894 * verifier sees.
12895 */
12896 for (i = 0; i < nargs; i++) {
12897 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1];
12898 const struct btf_type *t, *ref_t, *resolve_ret;
12899 enum bpf_arg_type arg_type = ARG_DONTCARE;
12900 u32 regno = i + 1, ref_id, type_size;
12901 bool is_ret_buf_sz = false;
12902 int kf_arg_type;
12903
12904 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
12905
12906 if (is_kfunc_arg_ignore(btf, &args[i]))
12907 continue;
12908
12909 if (btf_type_is_scalar(t)) {
12910 if (reg->type != SCALAR_VALUE) {
12911 verbose(env, "R%d is not a scalar\n", regno);
12912 return -EINVAL;
12913 }
12914
12915 if (is_kfunc_arg_constant(meta->btf, &args[i])) {
12916 if (meta->arg_constant.found) {
12917 verbose(env, "verifier internal error: only one constant argument permitted\n");
12918 return -EFAULT;
12919 }
12920 if (!tnum_is_const(reg->var_off)) {
12921 verbose(env, "R%d must be a known constant\n", regno);
12922 return -EINVAL;
12923 }
12924 ret = mark_chain_precision(env, regno);
12925 if (ret < 0)
12926 return ret;
12927 meta->arg_constant.found = true;
12928 meta->arg_constant.value = reg->var_off.value;
12929 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
12930 meta->r0_rdonly = true;
12931 is_ret_buf_sz = true;
12932 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
12933 is_ret_buf_sz = true;
12934 }
12935
12936 if (is_ret_buf_sz) {
12937 if (meta->r0_size) {
12938 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12939 return -EINVAL;
12940 }
12941
12942 if (!tnum_is_const(reg->var_off)) {
12943 verbose(env, "R%d is not a const\n", regno);
12944 return -EINVAL;
12945 }
12946
12947 meta->r0_size = reg->var_off.value;
12948 ret = mark_chain_precision(env, regno);
12949 if (ret)
12950 return ret;
12951 }
12952 continue;
12953 }
12954
12955 if (!btf_type_is_ptr(t)) {
12956 verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
12957 return -EINVAL;
12958 }
12959
12960 if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
12961 (register_is_null(reg) || type_may_be_null(reg->type)) &&
12962 !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12963 verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
12964 return -EACCES;
12965 }
12966
12967 if (reg->ref_obj_id) {
12968 if (is_kfunc_release(meta) && meta->ref_obj_id) {
12969 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
12970 regno, reg->ref_obj_id,
12971 meta->ref_obj_id);
12972 return -EFAULT;
12973 }
12974 meta->ref_obj_id = reg->ref_obj_id;
12975 if (is_kfunc_release(meta))
12976 meta->release_regno = regno;
12977 }
12978
12979 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12980 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12981
12982 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
12983 if (kf_arg_type < 0)
12984 return kf_arg_type;
12985
12986 switch (kf_arg_type) {
12987 case KF_ARG_PTR_TO_NULL:
12988 continue;
12989 case KF_ARG_PTR_TO_MAP:
12990 if (!reg->map_ptr) {
12991 verbose(env, "pointer in R%d isn't map pointer\n", regno);
12992 return -EINVAL;
12993 }
12994 if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) {
12995 /* Use map_uid (which is unique id of inner map) to reject:
12996 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12997 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12998 * if (inner_map1 && inner_map2) {
12999 * wq = bpf_map_lookup_elem(inner_map1);
13000 * if (wq)
13001 * // mismatch would have been allowed
13002 * bpf_wq_init(wq, inner_map2);
13003 * }
13004 *
13005 * Comparing map_ptr is enough to distinguish normal and outer maps.
13006 */
13007 if (meta->map.ptr != reg->map_ptr ||
13008 meta->map.uid != reg->map_uid) {
13009 verbose(env,
13010 "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
13011 meta->map.uid, reg->map_uid);
13012 return -EINVAL;
13013 }
13014 }
13015 meta->map.ptr = reg->map_ptr;
13016 meta->map.uid = reg->map_uid;
13017 fallthrough;
13018 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
13019 case KF_ARG_PTR_TO_BTF_ID:
13020 if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
13021 break;
13022
13023 if (!is_trusted_reg(reg)) {
13024 if (!is_kfunc_rcu(meta)) {
13025 verbose(env, "R%d must be referenced or trusted\n", regno);
13026 return -EINVAL;
13027 }
13028 if (!is_rcu_reg(reg)) {
13029 verbose(env, "R%d must be a rcu pointer\n", regno);
13030 return -EINVAL;
13031 }
13032 }
13033 fallthrough;
13034 case KF_ARG_PTR_TO_CTX:
13035 case KF_ARG_PTR_TO_DYNPTR:
13036 case KF_ARG_PTR_TO_ITER:
13037 case KF_ARG_PTR_TO_LIST_HEAD:
13038 case KF_ARG_PTR_TO_LIST_NODE:
13039 case KF_ARG_PTR_TO_RB_ROOT:
13040 case KF_ARG_PTR_TO_RB_NODE:
13041 case KF_ARG_PTR_TO_MEM:
13042 case KF_ARG_PTR_TO_MEM_SIZE:
13043 case KF_ARG_PTR_TO_CALLBACK:
13044 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
13045 case KF_ARG_PTR_TO_CONST_STR:
13046 case KF_ARG_PTR_TO_WORKQUEUE:
13047 case KF_ARG_PTR_TO_IRQ_FLAG:
13048 case KF_ARG_PTR_TO_RES_SPIN_LOCK:
13049 break;
13050 default:
13051 WARN_ON_ONCE(1);
13052 return -EFAULT;
13053 }
13054
13055 if (is_kfunc_release(meta) && reg->ref_obj_id)
13056 arg_type |= OBJ_RELEASE;
13057 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
13058 if (ret < 0)
13059 return ret;
13060
13061 switch (kf_arg_type) {
13062 case KF_ARG_PTR_TO_CTX:
13063 if (reg->type != PTR_TO_CTX) {
13064 verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
13065 i, reg_type_str(env, reg->type));
13066 return -EINVAL;
13067 }
13068
13069 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
13070 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
13071 if (ret < 0)
13072 return -EINVAL;
13073 meta->ret_btf_id = ret;
13074 }
13075 break;
13076 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
13077 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
13078 if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
13079 verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
13080 return -EINVAL;
13081 }
13082 } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
13083 if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
13084 verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
13085 return -EINVAL;
13086 }
13087 } else {
13088 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13089 return -EINVAL;
13090 }
13091 if (!reg->ref_obj_id) {
13092 verbose(env, "allocated object must be referenced\n");
13093 return -EINVAL;
13094 }
13095 if (meta->btf == btf_vmlinux) {
13096 meta->arg_btf = reg->btf;
13097 meta->arg_btf_id = reg->btf_id;
13098 }
13099 break;
13100 case KF_ARG_PTR_TO_DYNPTR:
13101 {
13102 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
13103 int clone_ref_obj_id = 0;
13104
13105 if (reg->type == CONST_PTR_TO_DYNPTR)
13106 dynptr_arg_type |= MEM_RDONLY;
13107
13108 if (is_kfunc_arg_uninit(btf, &args[i]))
13109 dynptr_arg_type |= MEM_UNINIT;
13110
13111 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
13112 dynptr_arg_type |= DYNPTR_TYPE_SKB;
13113 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
13114 dynptr_arg_type |= DYNPTR_TYPE_XDP;
13115 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
13116 (dynptr_arg_type & MEM_UNINIT)) {
13117 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
13118
13119 if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
13120 verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
13121 return -EFAULT;
13122 }
13123
13124 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
13125 clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
13126 if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
13127 verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
13128 return -EFAULT;
13129 }
13130 }
13131
13132 ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
13133 if (ret < 0)
13134 return ret;
13135
13136 if (!(dynptr_arg_type & MEM_UNINIT)) {
13137 int id = dynptr_id(env, reg);
13138
13139 if (id < 0) {
13140 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
13141 return id;
13142 }
13143 meta->initialized_dynptr.id = id;
13144 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
13145 meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
13146 }
13147
13148 break;
13149 }
13150 case KF_ARG_PTR_TO_ITER:
13151 if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
13152 if (!check_css_task_iter_allowlist(env)) {
13153 verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
13154 return -EINVAL;
13155 }
13156 }
13157 ret = process_iter_arg(env, regno, insn_idx, meta);
13158 if (ret < 0)
13159 return ret;
13160 break;
13161 case KF_ARG_PTR_TO_LIST_HEAD:
13162 if (reg->type != PTR_TO_MAP_VALUE &&
13163 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13164 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
13165 return -EINVAL;
13166 }
13167 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
13168 verbose(env, "allocated object must be referenced\n");
13169 return -EINVAL;
13170 }
13171 ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
13172 if (ret < 0)
13173 return ret;
13174 break;
13175 case KF_ARG_PTR_TO_RB_ROOT:
13176 if (reg->type != PTR_TO_MAP_VALUE &&
13177 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13178 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
13179 return -EINVAL;
13180 }
13181 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
13182 verbose(env, "allocated object must be referenced\n");
13183 return -EINVAL;
13184 }
13185 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
13186 if (ret < 0)
13187 return ret;
13188 break;
13189 case KF_ARG_PTR_TO_LIST_NODE:
13190 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13191 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13192 return -EINVAL;
13193 }
13194 if (!reg->ref_obj_id) {
13195 verbose(env, "allocated object must be referenced\n");
13196 return -EINVAL;
13197 }
13198 ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
13199 if (ret < 0)
13200 return ret;
13201 break;
13202 case KF_ARG_PTR_TO_RB_NODE:
13203 if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
13204 if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
13205 verbose(env, "rbtree_remove node input must be non-owning ref\n");
13206 return -EINVAL;
13207 }
13208 if (in_rbtree_lock_required_cb(env)) {
13209 verbose(env, "rbtree_remove not allowed in rbtree cb\n");
13210 return -EINVAL;
13211 }
13212 } else {
13213 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13214 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13215 return -EINVAL;
13216 }
13217 if (!reg->ref_obj_id) {
13218 verbose(env, "allocated object must be referenced\n");
13219 return -EINVAL;
13220 }
13221 }
13222
13223 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
13224 if (ret < 0)
13225 return ret;
13226 break;
13227 case KF_ARG_PTR_TO_MAP:
13228 /* If argument has '__map' suffix expect 'struct bpf_map *' */
13229 ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
13230 ref_t = btf_type_by_id(btf_vmlinux, ref_id);
13231 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
13232 fallthrough;
13233 case KF_ARG_PTR_TO_BTF_ID:
13234 /* Only base_type is checked, further checks are done here */
13235 if ((base_type(reg->type) != PTR_TO_BTF_ID ||
13236 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
13237 !reg2btf_ids[base_type(reg->type)]) {
13238 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
13239 verbose(env, "expected %s or socket\n",
13240 reg_type_str(env, base_type(reg->type) |
13241 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
13242 return -EINVAL;
13243 }
13244 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
13245 if (ret < 0)
13246 return ret;
13247 break;
13248 case KF_ARG_PTR_TO_MEM:
13249 resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
13250 if (IS_ERR(resolve_ret)) {
13251 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
13252 i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
13253 return -EINVAL;
13254 }
13255 ret = check_mem_reg(env, reg, regno, type_size);
13256 if (ret < 0)
13257 return ret;
13258 break;
13259 case KF_ARG_PTR_TO_MEM_SIZE:
13260 {
13261 struct bpf_reg_state *buff_reg = ®s[regno];
13262 const struct btf_param *buff_arg = &args[i];
13263 struct bpf_reg_state *size_reg = ®s[regno + 1];
13264 const struct btf_param *size_arg = &args[i + 1];
13265
13266 if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
13267 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
13268 if (ret < 0) {
13269 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
13270 return ret;
13271 }
13272 }
13273
13274 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
13275 if (meta->arg_constant.found) {
13276 verbose(env, "verifier internal error: only one constant argument permitted\n");
13277 return -EFAULT;
13278 }
13279 if (!tnum_is_const(size_reg->var_off)) {
13280 verbose(env, "R%d must be a known constant\n", regno + 1);
13281 return -EINVAL;
13282 }
13283 meta->arg_constant.found = true;
13284 meta->arg_constant.value = size_reg->var_off.value;
13285 }
13286
13287 /* Skip next '__sz' or '__szk' argument */
13288 i++;
13289 break;
13290 }
13291 case KF_ARG_PTR_TO_CALLBACK:
13292 if (reg->type != PTR_TO_FUNC) {
13293 verbose(env, "arg%d expected pointer to func\n", i);
13294 return -EINVAL;
13295 }
13296 meta->subprogno = reg->subprogno;
13297 break;
13298 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
13299 if (!type_is_ptr_alloc_obj(reg->type)) {
13300 verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
13301 return -EINVAL;
13302 }
13303 if (!type_is_non_owning_ref(reg->type))
13304 meta->arg_owning_ref = true;
13305
13306 rec = reg_btf_record(reg);
13307 if (!rec) {
13308 verbose(env, "verifier internal error: Couldn't find btf_record\n");
13309 return -EFAULT;
13310 }
13311
13312 if (rec->refcount_off < 0) {
13313 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
13314 return -EINVAL;
13315 }
13316
13317 meta->arg_btf = reg->btf;
13318 meta->arg_btf_id = reg->btf_id;
13319 break;
13320 case KF_ARG_PTR_TO_CONST_STR:
13321 if (reg->type != PTR_TO_MAP_VALUE) {
13322 verbose(env, "arg#%d doesn't point to a const string\n", i);
13323 return -EINVAL;
13324 }
13325 ret = check_reg_const_str(env, reg, regno);
13326 if (ret)
13327 return ret;
13328 break;
13329 case KF_ARG_PTR_TO_WORKQUEUE:
13330 if (reg->type != PTR_TO_MAP_VALUE) {
13331 verbose(env, "arg#%d doesn't point to a map value\n", i);
13332 return -EINVAL;
13333 }
13334 ret = process_wq_func(env, regno, meta);
13335 if (ret < 0)
13336 return ret;
13337 break;
13338 case KF_ARG_PTR_TO_IRQ_FLAG:
13339 if (reg->type != PTR_TO_STACK) {
13340 verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
13341 return -EINVAL;
13342 }
13343 ret = process_irq_flag(env, regno, meta);
13344 if (ret < 0)
13345 return ret;
13346 break;
13347 case KF_ARG_PTR_TO_RES_SPIN_LOCK:
13348 {
13349 int flags = PROCESS_RES_LOCK;
13350
13351 if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13352 verbose(env, "arg#%d doesn't point to map value or allocated object\n", i);
13353 return -EINVAL;
13354 }
13355
13356 if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
13357 return -EFAULT;
13358 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13359 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
13360 flags |= PROCESS_SPIN_LOCK;
13361 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
13362 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
13363 flags |= PROCESS_LOCK_IRQ;
13364 ret = process_spin_lock(env, regno, flags);
13365 if (ret < 0)
13366 return ret;
13367 break;
13368 }
13369 }
13370 }
13371
13372 if (is_kfunc_release(meta) && !meta->release_regno) {
13373 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
13374 func_name);
13375 return -EINVAL;
13376 }
13377
13378 return 0;
13379 }
13380
fetch_kfunc_meta(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_kfunc_call_arg_meta * meta,const char ** kfunc_name)13381 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
13382 struct bpf_insn *insn,
13383 struct bpf_kfunc_call_arg_meta *meta,
13384 const char **kfunc_name)
13385 {
13386 const struct btf_type *func, *func_proto;
13387 u32 func_id, *kfunc_flags;
13388 const char *func_name;
13389 struct btf *desc_btf;
13390
13391 if (kfunc_name)
13392 *kfunc_name = NULL;
13393
13394 if (!insn->imm)
13395 return -EINVAL;
13396
13397 desc_btf = find_kfunc_desc_btf(env, insn->off);
13398 if (IS_ERR(desc_btf))
13399 return PTR_ERR(desc_btf);
13400
13401 func_id = insn->imm;
13402 func = btf_type_by_id(desc_btf, func_id);
13403 func_name = btf_name_by_offset(desc_btf, func->name_off);
13404 if (kfunc_name)
13405 *kfunc_name = func_name;
13406 func_proto = btf_type_by_id(desc_btf, func->type);
13407
13408 kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
13409 if (!kfunc_flags) {
13410 return -EACCES;
13411 }
13412
13413 memset(meta, 0, sizeof(*meta));
13414 meta->btf = desc_btf;
13415 meta->func_id = func_id;
13416 meta->kfunc_flags = *kfunc_flags;
13417 meta->func_proto = func_proto;
13418 meta->func_name = func_name;
13419
13420 return 0;
13421 }
13422
13423 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
13424
check_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)13425 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
13426 int *insn_idx_p)
13427 {
13428 bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
13429 u32 i, nargs, ptr_type_id, release_ref_obj_id;
13430 struct bpf_reg_state *regs = cur_regs(env);
13431 const char *func_name, *ptr_type_name;
13432 const struct btf_type *t, *ptr_type;
13433 struct bpf_kfunc_call_arg_meta meta;
13434 struct bpf_insn_aux_data *insn_aux;
13435 int err, insn_idx = *insn_idx_p;
13436 const struct btf_param *args;
13437 const struct btf_type *ret_t;
13438 struct btf *desc_btf;
13439
13440 /* skip for now, but return error when we find this in fixup_kfunc_call */
13441 if (!insn->imm)
13442 return 0;
13443
13444 err = fetch_kfunc_meta(env, insn, &meta, &func_name);
13445 if (err == -EACCES && func_name)
13446 verbose(env, "calling kernel function %s is not allowed\n", func_name);
13447 if (err)
13448 return err;
13449 desc_btf = meta.btf;
13450 insn_aux = &env->insn_aux_data[insn_idx];
13451
13452 insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
13453
13454 if (!insn->off &&
13455 (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
13456 insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
13457 struct bpf_verifier_state *branch;
13458 struct bpf_reg_state *regs;
13459
13460 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
13461 if (!branch) {
13462 verbose(env, "failed to push state for failed lock acquisition\n");
13463 return -ENOMEM;
13464 }
13465
13466 regs = branch->frame[branch->curframe]->regs;
13467
13468 /* Clear r0-r5 registers in forked state */
13469 for (i = 0; i < CALLER_SAVED_REGS; i++)
13470 mark_reg_not_init(env, regs, caller_saved[i]);
13471
13472 mark_reg_unknown(env, regs, BPF_REG_0);
13473 err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
13474 if (err) {
13475 verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
13476 return err;
13477 }
13478 __mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
13479 }
13480
13481 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
13482 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
13483 return -EACCES;
13484 }
13485
13486 sleepable = is_kfunc_sleepable(&meta);
13487 if (sleepable && !in_sleepable(env)) {
13488 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
13489 return -EACCES;
13490 }
13491
13492 /* Check the arguments */
13493 err = check_kfunc_args(env, &meta, insn_idx);
13494 if (err < 0)
13495 return err;
13496
13497 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13498 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13499 set_rbtree_add_callback_state);
13500 if (err) {
13501 verbose(env, "kfunc %s#%d failed callback verification\n",
13502 func_name, meta.func_id);
13503 return err;
13504 }
13505 }
13506
13507 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
13508 meta.r0_size = sizeof(u64);
13509 meta.r0_rdonly = false;
13510 }
13511
13512 if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
13513 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13514 set_timer_callback_state);
13515 if (err) {
13516 verbose(env, "kfunc %s#%d failed callback verification\n",
13517 func_name, meta.func_id);
13518 return err;
13519 }
13520 }
13521
13522 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13523 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13524
13525 preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13526 preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13527
13528 if (env->cur_state->active_rcu_lock) {
13529 struct bpf_func_state *state;
13530 struct bpf_reg_state *reg;
13531 u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
13532
13533 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13534 verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13535 return -EACCES;
13536 }
13537
13538 if (rcu_lock) {
13539 verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
13540 return -EINVAL;
13541 } else if (rcu_unlock) {
13542 bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
13543 if (reg->type & MEM_RCU) {
13544 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
13545 reg->type |= PTR_UNTRUSTED;
13546 }
13547 }));
13548 env->cur_state->active_rcu_lock = false;
13549 } else if (sleepable) {
13550 verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
13551 return -EACCES;
13552 }
13553 } else if (rcu_lock) {
13554 env->cur_state->active_rcu_lock = true;
13555 } else if (rcu_unlock) {
13556 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13557 return -EINVAL;
13558 }
13559
13560 if (env->cur_state->active_preempt_locks) {
13561 if (preempt_disable) {
13562 env->cur_state->active_preempt_locks++;
13563 } else if (preempt_enable) {
13564 env->cur_state->active_preempt_locks--;
13565 } else if (sleepable) {
13566 verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
13567 return -EACCES;
13568 }
13569 } else if (preempt_disable) {
13570 env->cur_state->active_preempt_locks++;
13571 } else if (preempt_enable) {
13572 verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13573 return -EINVAL;
13574 }
13575
13576 if (env->cur_state->active_irq_id && sleepable) {
13577 verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name);
13578 return -EACCES;
13579 }
13580
13581 /* In case of release function, we get register number of refcounted
13582 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13583 */
13584 if (meta.release_regno) {
13585 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
13586 if (err) {
13587 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13588 func_name, meta.func_id);
13589 return err;
13590 }
13591 }
13592
13593 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
13594 meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
13595 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13596 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
13597 insn_aux->insert_off = regs[BPF_REG_2].off;
13598 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13599 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
13600 if (err) {
13601 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
13602 func_name, meta.func_id);
13603 return err;
13604 }
13605
13606 err = release_reference(env, release_ref_obj_id);
13607 if (err) {
13608 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13609 func_name, meta.func_id);
13610 return err;
13611 }
13612 }
13613
13614 if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13615 if (!bpf_jit_supports_exceptions()) {
13616 verbose(env, "JIT does not support calling kfunc %s#%d\n",
13617 func_name, meta.func_id);
13618 return -ENOTSUPP;
13619 }
13620 env->seen_exception = true;
13621
13622 /* In the case of the default callback, the cookie value passed
13623 * to bpf_throw becomes the return value of the program.
13624 */
13625 if (!env->exception_callback_subprog) {
13626 err = check_return_code(env, BPF_REG_1, "R1");
13627 if (err < 0)
13628 return err;
13629 }
13630 }
13631
13632 for (i = 0; i < CALLER_SAVED_REGS; i++)
13633 mark_reg_not_init(env, regs, caller_saved[i]);
13634
13635 /* Check return type */
13636 t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13637
13638 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13639 /* Only exception is bpf_obj_new_impl */
13640 if (meta.btf != btf_vmlinux ||
13641 (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
13642 meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
13643 meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
13644 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13645 return -EINVAL;
13646 }
13647 }
13648
13649 if (btf_type_is_scalar(t)) {
13650 mark_reg_unknown(env, regs, BPF_REG_0);
13651 if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13652 meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13653 __mark_reg_const_zero(env, ®s[BPF_REG_0]);
13654 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13655 } else if (btf_type_is_ptr(t)) {
13656 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13657
13658 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
13659 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
13660 meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13661 struct btf_struct_meta *struct_meta;
13662 struct btf *ret_btf;
13663 u32 ret_btf_id;
13664
13665 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
13666 return -ENOMEM;
13667
13668 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
13669 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
13670 return -EINVAL;
13671 }
13672
13673 ret_btf = env->prog->aux->btf;
13674 ret_btf_id = meta.arg_constant.value;
13675
13676 /* This may be NULL due to user not supplying a BTF */
13677 if (!ret_btf) {
13678 verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
13679 return -EINVAL;
13680 }
13681
13682 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
13683 if (!ret_t || !__btf_type_is_struct(ret_t)) {
13684 verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
13685 return -EINVAL;
13686 }
13687
13688 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13689 if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
13690 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
13691 ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
13692 return -EINVAL;
13693 }
13694
13695 if (!bpf_global_percpu_ma_set) {
13696 mutex_lock(&bpf_percpu_ma_lock);
13697 if (!bpf_global_percpu_ma_set) {
13698 /* Charge memory allocated with bpf_global_percpu_ma to
13699 * root memcg. The obj_cgroup for root memcg is NULL.
13700 */
13701 err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
13702 if (!err)
13703 bpf_global_percpu_ma_set = true;
13704 }
13705 mutex_unlock(&bpf_percpu_ma_lock);
13706 if (err)
13707 return err;
13708 }
13709
13710 mutex_lock(&bpf_percpu_ma_lock);
13711 err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
13712 mutex_unlock(&bpf_percpu_ma_lock);
13713 if (err)
13714 return err;
13715 }
13716
13717 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
13718 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13719 if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
13720 verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
13721 return -EINVAL;
13722 }
13723
13724 if (struct_meta) {
13725 verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
13726 return -EINVAL;
13727 }
13728 }
13729
13730 mark_reg_known_zero(env, regs, BPF_REG_0);
13731 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
13732 regs[BPF_REG_0].btf = ret_btf;
13733 regs[BPF_REG_0].btf_id = ret_btf_id;
13734 if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
13735 regs[BPF_REG_0].type |= MEM_PERCPU;
13736
13737 insn_aux->obj_new_size = ret_t->size;
13738 insn_aux->kptr_struct_meta = struct_meta;
13739 } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
13740 mark_reg_known_zero(env, regs, BPF_REG_0);
13741 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
13742 regs[BPF_REG_0].btf = meta.arg_btf;
13743 regs[BPF_REG_0].btf_id = meta.arg_btf_id;
13744
13745 insn_aux->kptr_struct_meta =
13746 btf_find_struct_meta(meta.arg_btf,
13747 meta.arg_btf_id);
13748 } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
13749 meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
13750 struct btf_field *field = meta.arg_list_head.field;
13751
13752 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
13753 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
13754 meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13755 struct btf_field *field = meta.arg_rbtree_root.field;
13756
13757 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
13758 } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
13759 mark_reg_known_zero(env, regs, BPF_REG_0);
13760 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
13761 regs[BPF_REG_0].btf = desc_btf;
13762 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
13763 } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
13764 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
13765 if (!ret_t || !btf_type_is_struct(ret_t)) {
13766 verbose(env,
13767 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
13768 return -EINVAL;
13769 }
13770
13771 mark_reg_known_zero(env, regs, BPF_REG_0);
13772 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
13773 regs[BPF_REG_0].btf = desc_btf;
13774 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
13775 } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
13776 meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
13777 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
13778
13779 mark_reg_known_zero(env, regs, BPF_REG_0);
13780
13781 if (!meta.arg_constant.found) {
13782 verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
13783 return -EFAULT;
13784 }
13785
13786 regs[BPF_REG_0].mem_size = meta.arg_constant.value;
13787
13788 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
13789 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
13790
13791 if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
13792 regs[BPF_REG_0].type |= MEM_RDONLY;
13793 } else {
13794 /* this will set env->seen_direct_write to true */
13795 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
13796 verbose(env, "the prog does not allow writes to packet data\n");
13797 return -EINVAL;
13798 }
13799 }
13800
13801 if (!meta.initialized_dynptr.id) {
13802 verbose(env, "verifier internal error: no dynptr id\n");
13803 return -EFAULT;
13804 }
13805 regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
13806
13807 /* we don't need to set BPF_REG_0's ref obj id
13808 * because packet slices are not refcounted (see
13809 * dynptr_type_refcounted)
13810 */
13811 } else {
13812 verbose(env, "kernel function %s unhandled dynamic return type\n",
13813 meta.func_name);
13814 return -EFAULT;
13815 }
13816 } else if (btf_type_is_void(ptr_type)) {
13817 /* kfunc returning 'void *' is equivalent to returning scalar */
13818 mark_reg_unknown(env, regs, BPF_REG_0);
13819 } else if (!__btf_type_is_struct(ptr_type)) {
13820 if (!meta.r0_size) {
13821 __u32 sz;
13822
13823 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13824 meta.r0_size = sz;
13825 meta.r0_rdonly = true;
13826 }
13827 }
13828 if (!meta.r0_size) {
13829 ptr_type_name = btf_name_by_offset(desc_btf,
13830 ptr_type->name_off);
13831 verbose(env,
13832 "kernel function %s returns pointer type %s %s is not supported\n",
13833 func_name,
13834 btf_type_str(ptr_type),
13835 ptr_type_name);
13836 return -EINVAL;
13837 }
13838
13839 mark_reg_known_zero(env, regs, BPF_REG_0);
13840 regs[BPF_REG_0].type = PTR_TO_MEM;
13841 regs[BPF_REG_0].mem_size = meta.r0_size;
13842
13843 if (meta.r0_rdonly)
13844 regs[BPF_REG_0].type |= MEM_RDONLY;
13845
13846 /* Ensures we don't access the memory after a release_reference() */
13847 if (meta.ref_obj_id)
13848 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
13849 } else {
13850 mark_reg_known_zero(env, regs, BPF_REG_0);
13851 regs[BPF_REG_0].btf = desc_btf;
13852 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
13853 regs[BPF_REG_0].btf_id = ptr_type_id;
13854
13855 if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13856 regs[BPF_REG_0].type |= PTR_UNTRUSTED;
13857
13858 if (is_iter_next_kfunc(&meta)) {
13859 struct bpf_reg_state *cur_iter;
13860
13861 cur_iter = get_iter_from_state(env->cur_state, &meta);
13862
13863 if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
13864 regs[BPF_REG_0].type |= MEM_RCU;
13865 else
13866 regs[BPF_REG_0].type |= PTR_TRUSTED;
13867 }
13868 }
13869
13870 if (is_kfunc_ret_null(&meta)) {
13871 regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13872 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13873 regs[BPF_REG_0].id = ++env->id_gen;
13874 }
13875 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13876 if (is_kfunc_acquire(&meta)) {
13877 int id = acquire_reference(env, insn_idx);
13878
13879 if (id < 0)
13880 return id;
13881 if (is_kfunc_ret_null(&meta))
13882 regs[BPF_REG_0].id = id;
13883 regs[BPF_REG_0].ref_obj_id = id;
13884 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13885 ref_set_non_owning(env, ®s[BPF_REG_0]);
13886 }
13887
13888 if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id)
13889 regs[BPF_REG_0].id = ++env->id_gen;
13890 } else if (btf_type_is_void(t)) {
13891 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
13892 if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
13893 meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
13894 insn_aux->kptr_struct_meta =
13895 btf_find_struct_meta(meta.arg_btf,
13896 meta.arg_btf_id);
13897 }
13898 }
13899 }
13900
13901 nargs = btf_type_vlen(meta.func_proto);
13902 args = (const struct btf_param *)(meta.func_proto + 1);
13903 for (i = 0; i < nargs; i++) {
13904 u32 regno = i + 1;
13905
13906 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13907 if (btf_type_is_ptr(t))
13908 mark_btf_func_reg_size(env, regno, sizeof(void *));
13909 else
13910 /* scalar. ensured by btf_check_kfunc_arg_match() */
13911 mark_btf_func_reg_size(env, regno, t->size);
13912 }
13913
13914 if (is_iter_next_kfunc(&meta)) {
13915 err = process_iter_next_call(env, insn_idx, &meta);
13916 if (err)
13917 return err;
13918 }
13919
13920 return 0;
13921 }
13922
check_reg_sane_offset(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)13923 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
13924 const struct bpf_reg_state *reg,
13925 enum bpf_reg_type type)
13926 {
13927 bool known = tnum_is_const(reg->var_off);
13928 s64 val = reg->var_off.value;
13929 s64 smin = reg->smin_value;
13930
13931 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13932 verbose(env, "math between %s pointer and %lld is not allowed\n",
13933 reg_type_str(env, type), val);
13934 return false;
13935 }
13936
13937 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
13938 verbose(env, "%s pointer offset %d is not allowed\n",
13939 reg_type_str(env, type), reg->off);
13940 return false;
13941 }
13942
13943 if (smin == S64_MIN) {
13944 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13945 reg_type_str(env, type));
13946 return false;
13947 }
13948
13949 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13950 verbose(env, "value %lld makes %s pointer be out of bounds\n",
13951 smin, reg_type_str(env, type));
13952 return false;
13953 }
13954
13955 return true;
13956 }
13957
13958 enum {
13959 REASON_BOUNDS = -1,
13960 REASON_TYPE = -2,
13961 REASON_PATHS = -3,
13962 REASON_LIMIT = -4,
13963 REASON_STACK = -5,
13964 };
13965
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)13966 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13967 u32 *alu_limit, bool mask_to_left)
13968 {
13969 u32 max = 0, ptr_limit = 0;
13970
13971 switch (ptr_reg->type) {
13972 case PTR_TO_STACK:
13973 /* Offset 0 is out-of-bounds, but acceptable start for the
13974 * left direction, see BPF_REG_FP. Also, unknown scalar
13975 * offset where we would need to deal with min/max bounds is
13976 * currently prohibited for unprivileged.
13977 */
13978 max = MAX_BPF_STACK + mask_to_left;
13979 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
13980 break;
13981 case PTR_TO_MAP_VALUE:
13982 max = ptr_reg->map_ptr->value_size;
13983 ptr_limit = (mask_to_left ?
13984 ptr_reg->smin_value :
13985 ptr_reg->umax_value) + ptr_reg->off;
13986 break;
13987 default:
13988 return REASON_TYPE;
13989 }
13990
13991 if (ptr_limit >= max)
13992 return REASON_LIMIT;
13993 *alu_limit = ptr_limit;
13994 return 0;
13995 }
13996
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)13997 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13998 const struct bpf_insn *insn)
13999 {
14000 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
14001 }
14002
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)14003 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
14004 u32 alu_state, u32 alu_limit)
14005 {
14006 /* If we arrived here from different branches with different
14007 * state or limits to sanitize, then this won't work.
14008 */
14009 if (aux->alu_state &&
14010 (aux->alu_state != alu_state ||
14011 aux->alu_limit != alu_limit))
14012 return REASON_PATHS;
14013
14014 /* Corresponding fixup done in do_misc_fixups(). */
14015 aux->alu_state = alu_state;
14016 aux->alu_limit = alu_limit;
14017 return 0;
14018 }
14019
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)14020 static int sanitize_val_alu(struct bpf_verifier_env *env,
14021 struct bpf_insn *insn)
14022 {
14023 struct bpf_insn_aux_data *aux = cur_aux(env);
14024
14025 if (can_skip_alu_sanitation(env, insn))
14026 return 0;
14027
14028 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
14029 }
14030
sanitize_needed(u8 opcode)14031 static bool sanitize_needed(u8 opcode)
14032 {
14033 return opcode == BPF_ADD || opcode == BPF_SUB;
14034 }
14035
14036 struct bpf_sanitize_info {
14037 struct bpf_insn_aux_data aux;
14038 bool mask_to_left;
14039 };
14040
14041 static struct bpf_verifier_state *
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)14042 sanitize_speculative_path(struct bpf_verifier_env *env,
14043 const struct bpf_insn *insn,
14044 u32 next_idx, u32 curr_idx)
14045 {
14046 struct bpf_verifier_state *branch;
14047 struct bpf_reg_state *regs;
14048
14049 branch = push_stack(env, next_idx, curr_idx, true);
14050 if (branch && insn) {
14051 regs = branch->frame[branch->curframe]->regs;
14052 if (BPF_SRC(insn->code) == BPF_K) {
14053 mark_reg_unknown(env, regs, insn->dst_reg);
14054 } else if (BPF_SRC(insn->code) == BPF_X) {
14055 mark_reg_unknown(env, regs, insn->dst_reg);
14056 mark_reg_unknown(env, regs, insn->src_reg);
14057 }
14058 }
14059 return branch;
14060 }
14061
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)14062 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
14063 struct bpf_insn *insn,
14064 const struct bpf_reg_state *ptr_reg,
14065 const struct bpf_reg_state *off_reg,
14066 struct bpf_reg_state *dst_reg,
14067 struct bpf_sanitize_info *info,
14068 const bool commit_window)
14069 {
14070 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
14071 struct bpf_verifier_state *vstate = env->cur_state;
14072 bool off_is_imm = tnum_is_const(off_reg->var_off);
14073 bool off_is_neg = off_reg->smin_value < 0;
14074 bool ptr_is_dst_reg = ptr_reg == dst_reg;
14075 u8 opcode = BPF_OP(insn->code);
14076 u32 alu_state, alu_limit;
14077 struct bpf_reg_state tmp;
14078 bool ret;
14079 int err;
14080
14081 if (can_skip_alu_sanitation(env, insn))
14082 return 0;
14083
14084 /* We already marked aux for masking from non-speculative
14085 * paths, thus we got here in the first place. We only care
14086 * to explore bad access from here.
14087 */
14088 if (vstate->speculative)
14089 goto do_sim;
14090
14091 if (!commit_window) {
14092 if (!tnum_is_const(off_reg->var_off) &&
14093 (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
14094 return REASON_BOUNDS;
14095
14096 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
14097 (opcode == BPF_SUB && !off_is_neg);
14098 }
14099
14100 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
14101 if (err < 0)
14102 return err;
14103
14104 if (commit_window) {
14105 /* In commit phase we narrow the masking window based on
14106 * the observed pointer move after the simulated operation.
14107 */
14108 alu_state = info->aux.alu_state;
14109 alu_limit = abs(info->aux.alu_limit - alu_limit);
14110 } else {
14111 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
14112 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
14113 alu_state |= ptr_is_dst_reg ?
14114 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
14115
14116 /* Limit pruning on unknown scalars to enable deep search for
14117 * potential masking differences from other program paths.
14118 */
14119 if (!off_is_imm)
14120 env->explore_alu_limits = true;
14121 }
14122
14123 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
14124 if (err < 0)
14125 return err;
14126 do_sim:
14127 /* If we're in commit phase, we're done here given we already
14128 * pushed the truncated dst_reg into the speculative verification
14129 * stack.
14130 *
14131 * Also, when register is a known constant, we rewrite register-based
14132 * operation to immediate-based, and thus do not need masking (and as
14133 * a consequence, do not need to simulate the zero-truncation either).
14134 */
14135 if (commit_window || off_is_imm)
14136 return 0;
14137
14138 /* Simulate and find potential out-of-bounds access under
14139 * speculative execution from truncation as a result of
14140 * masking when off was not within expected range. If off
14141 * sits in dst, then we temporarily need to move ptr there
14142 * to simulate dst (== 0) +/-= ptr. Needed, for example,
14143 * for cases where we use K-based arithmetic in one direction
14144 * and truncated reg-based in the other in order to explore
14145 * bad access.
14146 */
14147 if (!ptr_is_dst_reg) {
14148 tmp = *dst_reg;
14149 copy_register_state(dst_reg, ptr_reg);
14150 }
14151 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
14152 env->insn_idx);
14153 if (!ptr_is_dst_reg && ret)
14154 *dst_reg = tmp;
14155 return !ret ? REASON_STACK : 0;
14156 }
14157
sanitize_mark_insn_seen(struct bpf_verifier_env * env)14158 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
14159 {
14160 struct bpf_verifier_state *vstate = env->cur_state;
14161
14162 /* If we simulate paths under speculation, we don't update the
14163 * insn as 'seen' such that when we verify unreachable paths in
14164 * the non-speculative domain, sanitize_dead_code() can still
14165 * rewrite/sanitize them.
14166 */
14167 if (!vstate->speculative)
14168 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
14169 }
14170
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)14171 static int sanitize_err(struct bpf_verifier_env *env,
14172 const struct bpf_insn *insn, int reason,
14173 const struct bpf_reg_state *off_reg,
14174 const struct bpf_reg_state *dst_reg)
14175 {
14176 static const char *err = "pointer arithmetic with it prohibited for !root";
14177 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
14178 u32 dst = insn->dst_reg, src = insn->src_reg;
14179
14180 switch (reason) {
14181 case REASON_BOUNDS:
14182 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
14183 off_reg == dst_reg ? dst : src, err);
14184 break;
14185 case REASON_TYPE:
14186 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
14187 off_reg == dst_reg ? src : dst, err);
14188 break;
14189 case REASON_PATHS:
14190 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
14191 dst, op, err);
14192 break;
14193 case REASON_LIMIT:
14194 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
14195 dst, op, err);
14196 break;
14197 case REASON_STACK:
14198 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
14199 dst, err);
14200 break;
14201 default:
14202 verbose(env, "verifier internal error: unknown reason (%d)\n",
14203 reason);
14204 break;
14205 }
14206
14207 return -EACCES;
14208 }
14209
14210 /* check that stack access falls within stack limits and that 'reg' doesn't
14211 * have a variable offset.
14212 *
14213 * Variable offset is prohibited for unprivileged mode for simplicity since it
14214 * requires corresponding support in Spectre masking for stack ALU. See also
14215 * retrieve_ptr_limit().
14216 *
14217 *
14218 * 'off' includes 'reg->off'.
14219 */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)14220 static int check_stack_access_for_ptr_arithmetic(
14221 struct bpf_verifier_env *env,
14222 int regno,
14223 const struct bpf_reg_state *reg,
14224 int off)
14225 {
14226 if (!tnum_is_const(reg->var_off)) {
14227 char tn_buf[48];
14228
14229 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
14230 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
14231 regno, tn_buf, off);
14232 return -EACCES;
14233 }
14234
14235 if (off >= 0 || off < -MAX_BPF_STACK) {
14236 verbose(env, "R%d stack pointer arithmetic goes out of range, "
14237 "prohibited for !root; off=%d\n", regno, off);
14238 return -EACCES;
14239 }
14240
14241 return 0;
14242 }
14243
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)14244 static int sanitize_check_bounds(struct bpf_verifier_env *env,
14245 const struct bpf_insn *insn,
14246 const struct bpf_reg_state *dst_reg)
14247 {
14248 u32 dst = insn->dst_reg;
14249
14250 /* For unprivileged we require that resulting offset must be in bounds
14251 * in order to be able to sanitize access later on.
14252 */
14253 if (env->bypass_spec_v1)
14254 return 0;
14255
14256 switch (dst_reg->type) {
14257 case PTR_TO_STACK:
14258 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
14259 dst_reg->off + dst_reg->var_off.value))
14260 return -EACCES;
14261 break;
14262 case PTR_TO_MAP_VALUE:
14263 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
14264 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
14265 "prohibited for !root\n", dst);
14266 return -EACCES;
14267 }
14268 break;
14269 default:
14270 break;
14271 }
14272
14273 return 0;
14274 }
14275
14276 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
14277 * Caller should also handle BPF_MOV case separately.
14278 * If we return -EACCES, caller may want to try again treating pointer as a
14279 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
14280 */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)14281 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
14282 struct bpf_insn *insn,
14283 const struct bpf_reg_state *ptr_reg,
14284 const struct bpf_reg_state *off_reg)
14285 {
14286 struct bpf_verifier_state *vstate = env->cur_state;
14287 struct bpf_func_state *state = vstate->frame[vstate->curframe];
14288 struct bpf_reg_state *regs = state->regs, *dst_reg;
14289 bool known = tnum_is_const(off_reg->var_off);
14290 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
14291 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
14292 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
14293 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
14294 struct bpf_sanitize_info info = {};
14295 u8 opcode = BPF_OP(insn->code);
14296 u32 dst = insn->dst_reg;
14297 int ret;
14298
14299 dst_reg = ®s[dst];
14300
14301 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
14302 smin_val > smax_val || umin_val > umax_val) {
14303 /* Taint dst register if offset had invalid bounds derived from
14304 * e.g. dead branches.
14305 */
14306 __mark_reg_unknown(env, dst_reg);
14307 return 0;
14308 }
14309
14310 if (BPF_CLASS(insn->code) != BPF_ALU64) {
14311 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
14312 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14313 __mark_reg_unknown(env, dst_reg);
14314 return 0;
14315 }
14316
14317 verbose(env,
14318 "R%d 32-bit pointer arithmetic prohibited\n",
14319 dst);
14320 return -EACCES;
14321 }
14322
14323 if (ptr_reg->type & PTR_MAYBE_NULL) {
14324 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
14325 dst, reg_type_str(env, ptr_reg->type));
14326 return -EACCES;
14327 }
14328
14329 switch (base_type(ptr_reg->type)) {
14330 case PTR_TO_CTX:
14331 case PTR_TO_MAP_VALUE:
14332 case PTR_TO_MAP_KEY:
14333 case PTR_TO_STACK:
14334 case PTR_TO_PACKET_META:
14335 case PTR_TO_PACKET:
14336 case PTR_TO_TP_BUFFER:
14337 case PTR_TO_BTF_ID:
14338 case PTR_TO_MEM:
14339 case PTR_TO_BUF:
14340 case PTR_TO_FUNC:
14341 case CONST_PTR_TO_DYNPTR:
14342 break;
14343 case PTR_TO_FLOW_KEYS:
14344 if (known)
14345 break;
14346 fallthrough;
14347 case CONST_PTR_TO_MAP:
14348 /* smin_val represents the known value */
14349 if (known && smin_val == 0 && opcode == BPF_ADD)
14350 break;
14351 fallthrough;
14352 default:
14353 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
14354 dst, reg_type_str(env, ptr_reg->type));
14355 return -EACCES;
14356 }
14357
14358 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
14359 * The id may be overwritten later if we create a new variable offset.
14360 */
14361 dst_reg->type = ptr_reg->type;
14362 dst_reg->id = ptr_reg->id;
14363
14364 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
14365 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
14366 return -EINVAL;
14367
14368 /* pointer types do not carry 32-bit bounds at the moment. */
14369 __mark_reg32_unbounded(dst_reg);
14370
14371 if (sanitize_needed(opcode)) {
14372 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
14373 &info, false);
14374 if (ret < 0)
14375 return sanitize_err(env, insn, ret, off_reg, dst_reg);
14376 }
14377
14378 switch (opcode) {
14379 case BPF_ADD:
14380 /* We can take a fixed offset as long as it doesn't overflow
14381 * the s32 'off' field
14382 */
14383 if (known && (ptr_reg->off + smin_val ==
14384 (s64)(s32)(ptr_reg->off + smin_val))) {
14385 /* pointer += K. Accumulate it into fixed offset */
14386 dst_reg->smin_value = smin_ptr;
14387 dst_reg->smax_value = smax_ptr;
14388 dst_reg->umin_value = umin_ptr;
14389 dst_reg->umax_value = umax_ptr;
14390 dst_reg->var_off = ptr_reg->var_off;
14391 dst_reg->off = ptr_reg->off + smin_val;
14392 dst_reg->raw = ptr_reg->raw;
14393 break;
14394 }
14395 /* A new variable offset is created. Note that off_reg->off
14396 * == 0, since it's a scalar.
14397 * dst_reg gets the pointer type and since some positive
14398 * integer value was added to the pointer, give it a new 'id'
14399 * if it's a PTR_TO_PACKET.
14400 * this creates a new 'base' pointer, off_reg (variable) gets
14401 * added into the variable offset, and we copy the fixed offset
14402 * from ptr_reg.
14403 */
14404 if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
14405 check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
14406 dst_reg->smin_value = S64_MIN;
14407 dst_reg->smax_value = S64_MAX;
14408 }
14409 if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
14410 check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
14411 dst_reg->umin_value = 0;
14412 dst_reg->umax_value = U64_MAX;
14413 }
14414 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
14415 dst_reg->off = ptr_reg->off;
14416 dst_reg->raw = ptr_reg->raw;
14417 if (reg_is_pkt_pointer(ptr_reg)) {
14418 dst_reg->id = ++env->id_gen;
14419 /* something was added to pkt_ptr, set range to zero */
14420 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
14421 }
14422 break;
14423 case BPF_SUB:
14424 if (dst_reg == off_reg) {
14425 /* scalar -= pointer. Creates an unknown scalar */
14426 verbose(env, "R%d tried to subtract pointer from scalar\n",
14427 dst);
14428 return -EACCES;
14429 }
14430 /* We don't allow subtraction from FP, because (according to
14431 * test_verifier.c test "invalid fp arithmetic", JITs might not
14432 * be able to deal with it.
14433 */
14434 if (ptr_reg->type == PTR_TO_STACK) {
14435 verbose(env, "R%d subtraction from stack pointer prohibited\n",
14436 dst);
14437 return -EACCES;
14438 }
14439 if (known && (ptr_reg->off - smin_val ==
14440 (s64)(s32)(ptr_reg->off - smin_val))) {
14441 /* pointer -= K. Subtract it from fixed offset */
14442 dst_reg->smin_value = smin_ptr;
14443 dst_reg->smax_value = smax_ptr;
14444 dst_reg->umin_value = umin_ptr;
14445 dst_reg->umax_value = umax_ptr;
14446 dst_reg->var_off = ptr_reg->var_off;
14447 dst_reg->id = ptr_reg->id;
14448 dst_reg->off = ptr_reg->off - smin_val;
14449 dst_reg->raw = ptr_reg->raw;
14450 break;
14451 }
14452 /* A new variable offset is created. If the subtrahend is known
14453 * nonnegative, then any reg->range we had before is still good.
14454 */
14455 if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
14456 check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
14457 /* Overflow possible, we know nothing */
14458 dst_reg->smin_value = S64_MIN;
14459 dst_reg->smax_value = S64_MAX;
14460 }
14461 if (umin_ptr < umax_val) {
14462 /* Overflow possible, we know nothing */
14463 dst_reg->umin_value = 0;
14464 dst_reg->umax_value = U64_MAX;
14465 } else {
14466 /* Cannot overflow (as long as bounds are consistent) */
14467 dst_reg->umin_value = umin_ptr - umax_val;
14468 dst_reg->umax_value = umax_ptr - umin_val;
14469 }
14470 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
14471 dst_reg->off = ptr_reg->off;
14472 dst_reg->raw = ptr_reg->raw;
14473 if (reg_is_pkt_pointer(ptr_reg)) {
14474 dst_reg->id = ++env->id_gen;
14475 /* something was added to pkt_ptr, set range to zero */
14476 if (smin_val < 0)
14477 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
14478 }
14479 break;
14480 case BPF_AND:
14481 case BPF_OR:
14482 case BPF_XOR:
14483 /* bitwise ops on pointers are troublesome, prohibit. */
14484 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
14485 dst, bpf_alu_string[opcode >> 4]);
14486 return -EACCES;
14487 default:
14488 /* other operators (e.g. MUL,LSH) produce non-pointer results */
14489 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
14490 dst, bpf_alu_string[opcode >> 4]);
14491 return -EACCES;
14492 }
14493
14494 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
14495 return -EINVAL;
14496 reg_bounds_sync(dst_reg);
14497 if (sanitize_check_bounds(env, insn, dst_reg) < 0)
14498 return -EACCES;
14499 if (sanitize_needed(opcode)) {
14500 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
14501 &info, true);
14502 if (ret < 0)
14503 return sanitize_err(env, insn, ret, off_reg, dst_reg);
14504 }
14505
14506 return 0;
14507 }
14508
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14509 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
14510 struct bpf_reg_state *src_reg)
14511 {
14512 s32 *dst_smin = &dst_reg->s32_min_value;
14513 s32 *dst_smax = &dst_reg->s32_max_value;
14514 u32 *dst_umin = &dst_reg->u32_min_value;
14515 u32 *dst_umax = &dst_reg->u32_max_value;
14516
14517 if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
14518 check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
14519 *dst_smin = S32_MIN;
14520 *dst_smax = S32_MAX;
14521 }
14522 if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) ||
14523 check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) {
14524 *dst_umin = 0;
14525 *dst_umax = U32_MAX;
14526 }
14527 }
14528
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14529 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
14530 struct bpf_reg_state *src_reg)
14531 {
14532 s64 *dst_smin = &dst_reg->smin_value;
14533 s64 *dst_smax = &dst_reg->smax_value;
14534 u64 *dst_umin = &dst_reg->umin_value;
14535 u64 *dst_umax = &dst_reg->umax_value;
14536
14537 if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
14538 check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
14539 *dst_smin = S64_MIN;
14540 *dst_smax = S64_MAX;
14541 }
14542 if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) ||
14543 check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) {
14544 *dst_umin = 0;
14545 *dst_umax = U64_MAX;
14546 }
14547 }
14548
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14549 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
14550 struct bpf_reg_state *src_reg)
14551 {
14552 s32 *dst_smin = &dst_reg->s32_min_value;
14553 s32 *dst_smax = &dst_reg->s32_max_value;
14554 u32 umin_val = src_reg->u32_min_value;
14555 u32 umax_val = src_reg->u32_max_value;
14556
14557 if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
14558 check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
14559 /* Overflow possible, we know nothing */
14560 *dst_smin = S32_MIN;
14561 *dst_smax = S32_MAX;
14562 }
14563 if (dst_reg->u32_min_value < umax_val) {
14564 /* Overflow possible, we know nothing */
14565 dst_reg->u32_min_value = 0;
14566 dst_reg->u32_max_value = U32_MAX;
14567 } else {
14568 /* Cannot overflow (as long as bounds are consistent) */
14569 dst_reg->u32_min_value -= umax_val;
14570 dst_reg->u32_max_value -= umin_val;
14571 }
14572 }
14573
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14574 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
14575 struct bpf_reg_state *src_reg)
14576 {
14577 s64 *dst_smin = &dst_reg->smin_value;
14578 s64 *dst_smax = &dst_reg->smax_value;
14579 u64 umin_val = src_reg->umin_value;
14580 u64 umax_val = src_reg->umax_value;
14581
14582 if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
14583 check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
14584 /* Overflow possible, we know nothing */
14585 *dst_smin = S64_MIN;
14586 *dst_smax = S64_MAX;
14587 }
14588 if (dst_reg->umin_value < umax_val) {
14589 /* Overflow possible, we know nothing */
14590 dst_reg->umin_value = 0;
14591 dst_reg->umax_value = U64_MAX;
14592 } else {
14593 /* Cannot overflow (as long as bounds are consistent) */
14594 dst_reg->umin_value -= umax_val;
14595 dst_reg->umax_value -= umin_val;
14596 }
14597 }
14598
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14599 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
14600 struct bpf_reg_state *src_reg)
14601 {
14602 s32 *dst_smin = &dst_reg->s32_min_value;
14603 s32 *dst_smax = &dst_reg->s32_max_value;
14604 u32 *dst_umin = &dst_reg->u32_min_value;
14605 u32 *dst_umax = &dst_reg->u32_max_value;
14606 s32 tmp_prod[4];
14607
14608 if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) ||
14609 check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) {
14610 /* Overflow possible, we know nothing */
14611 *dst_umin = 0;
14612 *dst_umax = U32_MAX;
14613 }
14614 if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) ||
14615 check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) ||
14616 check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) ||
14617 check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) {
14618 /* Overflow possible, we know nothing */
14619 *dst_smin = S32_MIN;
14620 *dst_smax = S32_MAX;
14621 } else {
14622 *dst_smin = min_array(tmp_prod, 4);
14623 *dst_smax = max_array(tmp_prod, 4);
14624 }
14625 }
14626
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14627 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
14628 struct bpf_reg_state *src_reg)
14629 {
14630 s64 *dst_smin = &dst_reg->smin_value;
14631 s64 *dst_smax = &dst_reg->smax_value;
14632 u64 *dst_umin = &dst_reg->umin_value;
14633 u64 *dst_umax = &dst_reg->umax_value;
14634 s64 tmp_prod[4];
14635
14636 if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) ||
14637 check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) {
14638 /* Overflow possible, we know nothing */
14639 *dst_umin = 0;
14640 *dst_umax = U64_MAX;
14641 }
14642 if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) ||
14643 check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) ||
14644 check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) ||
14645 check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) {
14646 /* Overflow possible, we know nothing */
14647 *dst_smin = S64_MIN;
14648 *dst_smax = S64_MAX;
14649 } else {
14650 *dst_smin = min_array(tmp_prod, 4);
14651 *dst_smax = max_array(tmp_prod, 4);
14652 }
14653 }
14654
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14655 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14656 struct bpf_reg_state *src_reg)
14657 {
14658 bool src_known = tnum_subreg_is_const(src_reg->var_off);
14659 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14660 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14661 u32 umax_val = src_reg->u32_max_value;
14662
14663 if (src_known && dst_known) {
14664 __mark_reg32_known(dst_reg, var32_off.value);
14665 return;
14666 }
14667
14668 /* We get our minimum from the var_off, since that's inherently
14669 * bitwise. Our maximum is the minimum of the operands' maxima.
14670 */
14671 dst_reg->u32_min_value = var32_off.value;
14672 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
14673
14674 /* Safe to set s32 bounds by casting u32 result into s32 when u32
14675 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14676 */
14677 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14678 dst_reg->s32_min_value = dst_reg->u32_min_value;
14679 dst_reg->s32_max_value = dst_reg->u32_max_value;
14680 } else {
14681 dst_reg->s32_min_value = S32_MIN;
14682 dst_reg->s32_max_value = S32_MAX;
14683 }
14684 }
14685
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14686 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14687 struct bpf_reg_state *src_reg)
14688 {
14689 bool src_known = tnum_is_const(src_reg->var_off);
14690 bool dst_known = tnum_is_const(dst_reg->var_off);
14691 u64 umax_val = src_reg->umax_value;
14692
14693 if (src_known && dst_known) {
14694 __mark_reg_known(dst_reg, dst_reg->var_off.value);
14695 return;
14696 }
14697
14698 /* We get our minimum from the var_off, since that's inherently
14699 * bitwise. Our maximum is the minimum of the operands' maxima.
14700 */
14701 dst_reg->umin_value = dst_reg->var_off.value;
14702 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
14703
14704 /* Safe to set s64 bounds by casting u64 result into s64 when u64
14705 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14706 */
14707 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14708 dst_reg->smin_value = dst_reg->umin_value;
14709 dst_reg->smax_value = dst_reg->umax_value;
14710 } else {
14711 dst_reg->smin_value = S64_MIN;
14712 dst_reg->smax_value = S64_MAX;
14713 }
14714 /* We may learn something more from the var_off */
14715 __update_reg_bounds(dst_reg);
14716 }
14717
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14718 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14719 struct bpf_reg_state *src_reg)
14720 {
14721 bool src_known = tnum_subreg_is_const(src_reg->var_off);
14722 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14723 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14724 u32 umin_val = src_reg->u32_min_value;
14725
14726 if (src_known && dst_known) {
14727 __mark_reg32_known(dst_reg, var32_off.value);
14728 return;
14729 }
14730
14731 /* We get our maximum from the var_off, and our minimum is the
14732 * maximum of the operands' minima
14733 */
14734 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
14735 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14736
14737 /* Safe to set s32 bounds by casting u32 result into s32 when u32
14738 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14739 */
14740 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14741 dst_reg->s32_min_value = dst_reg->u32_min_value;
14742 dst_reg->s32_max_value = dst_reg->u32_max_value;
14743 } else {
14744 dst_reg->s32_min_value = S32_MIN;
14745 dst_reg->s32_max_value = S32_MAX;
14746 }
14747 }
14748
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14749 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14750 struct bpf_reg_state *src_reg)
14751 {
14752 bool src_known = tnum_is_const(src_reg->var_off);
14753 bool dst_known = tnum_is_const(dst_reg->var_off);
14754 u64 umin_val = src_reg->umin_value;
14755
14756 if (src_known && dst_known) {
14757 __mark_reg_known(dst_reg, dst_reg->var_off.value);
14758 return;
14759 }
14760
14761 /* We get our maximum from the var_off, and our minimum is the
14762 * maximum of the operands' minima
14763 */
14764 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
14765 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14766
14767 /* Safe to set s64 bounds by casting u64 result into s64 when u64
14768 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14769 */
14770 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14771 dst_reg->smin_value = dst_reg->umin_value;
14772 dst_reg->smax_value = dst_reg->umax_value;
14773 } else {
14774 dst_reg->smin_value = S64_MIN;
14775 dst_reg->smax_value = S64_MAX;
14776 }
14777 /* We may learn something more from the var_off */
14778 __update_reg_bounds(dst_reg);
14779 }
14780
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14781 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14782 struct bpf_reg_state *src_reg)
14783 {
14784 bool src_known = tnum_subreg_is_const(src_reg->var_off);
14785 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14786 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14787
14788 if (src_known && dst_known) {
14789 __mark_reg32_known(dst_reg, var32_off.value);
14790 return;
14791 }
14792
14793 /* We get both minimum and maximum from the var32_off. */
14794 dst_reg->u32_min_value = var32_off.value;
14795 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14796
14797 /* Safe to set s32 bounds by casting u32 result into s32 when u32
14798 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14799 */
14800 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14801 dst_reg->s32_min_value = dst_reg->u32_min_value;
14802 dst_reg->s32_max_value = dst_reg->u32_max_value;
14803 } else {
14804 dst_reg->s32_min_value = S32_MIN;
14805 dst_reg->s32_max_value = S32_MAX;
14806 }
14807 }
14808
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14809 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14810 struct bpf_reg_state *src_reg)
14811 {
14812 bool src_known = tnum_is_const(src_reg->var_off);
14813 bool dst_known = tnum_is_const(dst_reg->var_off);
14814
14815 if (src_known && dst_known) {
14816 /* dst_reg->var_off.value has been updated earlier */
14817 __mark_reg_known(dst_reg, dst_reg->var_off.value);
14818 return;
14819 }
14820
14821 /* We get both minimum and maximum from the var_off. */
14822 dst_reg->umin_value = dst_reg->var_off.value;
14823 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14824
14825 /* Safe to set s64 bounds by casting u64 result into s64 when u64
14826 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14827 */
14828 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14829 dst_reg->smin_value = dst_reg->umin_value;
14830 dst_reg->smax_value = dst_reg->umax_value;
14831 } else {
14832 dst_reg->smin_value = S64_MIN;
14833 dst_reg->smax_value = S64_MAX;
14834 }
14835
14836 __update_reg_bounds(dst_reg);
14837 }
14838
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14839 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14840 u64 umin_val, u64 umax_val)
14841 {
14842 /* We lose all sign bit information (except what we can pick
14843 * up from var_off)
14844 */
14845 dst_reg->s32_min_value = S32_MIN;
14846 dst_reg->s32_max_value = S32_MAX;
14847 /* If we might shift our top bit out, then we know nothing */
14848 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
14849 dst_reg->u32_min_value = 0;
14850 dst_reg->u32_max_value = U32_MAX;
14851 } else {
14852 dst_reg->u32_min_value <<= umin_val;
14853 dst_reg->u32_max_value <<= umax_val;
14854 }
14855 }
14856
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14857 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14858 struct bpf_reg_state *src_reg)
14859 {
14860 u32 umax_val = src_reg->u32_max_value;
14861 u32 umin_val = src_reg->u32_min_value;
14862 /* u32 alu operation will zext upper bits */
14863 struct tnum subreg = tnum_subreg(dst_reg->var_off);
14864
14865 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14866 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14867 /* Not required but being careful mark reg64 bounds as unknown so
14868 * that we are forced to pick them up from tnum and zext later and
14869 * if some path skips this step we are still safe.
14870 */
14871 __mark_reg64_unbounded(dst_reg);
14872 __update_reg32_bounds(dst_reg);
14873 }
14874
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14875 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14876 u64 umin_val, u64 umax_val)
14877 {
14878 /* Special case <<32 because it is a common compiler pattern to sign
14879 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
14880 * positive we know this shift will also be positive so we can track
14881 * bounds correctly. Otherwise we lose all sign bit information except
14882 * what we can pick up from var_off. Perhaps we can generalize this
14883 * later to shifts of any length.
14884 */
14885 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
14886 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
14887 else
14888 dst_reg->smax_value = S64_MAX;
14889
14890 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
14891 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
14892 else
14893 dst_reg->smin_value = S64_MIN;
14894
14895 /* If we might shift our top bit out, then we know nothing */
14896 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
14897 dst_reg->umin_value = 0;
14898 dst_reg->umax_value = U64_MAX;
14899 } else {
14900 dst_reg->umin_value <<= umin_val;
14901 dst_reg->umax_value <<= umax_val;
14902 }
14903 }
14904
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14905 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14906 struct bpf_reg_state *src_reg)
14907 {
14908 u64 umax_val = src_reg->umax_value;
14909 u64 umin_val = src_reg->umin_value;
14910
14911 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
14912 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14913 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14914
14915 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14916 /* We may learn something more from the var_off */
14917 __update_reg_bounds(dst_reg);
14918 }
14919
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14920 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14921 struct bpf_reg_state *src_reg)
14922 {
14923 struct tnum subreg = tnum_subreg(dst_reg->var_off);
14924 u32 umax_val = src_reg->u32_max_value;
14925 u32 umin_val = src_reg->u32_min_value;
14926
14927 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
14928 * be negative, then either:
14929 * 1) src_reg might be zero, so the sign bit of the result is
14930 * unknown, so we lose our signed bounds
14931 * 2) it's known negative, thus the unsigned bounds capture the
14932 * signed bounds
14933 * 3) the signed bounds cross zero, so they tell us nothing
14934 * about the result
14935 * If the value in dst_reg is known nonnegative, then again the
14936 * unsigned bounds capture the signed bounds.
14937 * Thus, in all cases it suffices to blow away our signed bounds
14938 * and rely on inferring new ones from the unsigned bounds and
14939 * var_off of the result.
14940 */
14941 dst_reg->s32_min_value = S32_MIN;
14942 dst_reg->s32_max_value = S32_MAX;
14943
14944 dst_reg->var_off = tnum_rshift(subreg, umin_val);
14945 dst_reg->u32_min_value >>= umax_val;
14946 dst_reg->u32_max_value >>= umin_val;
14947
14948 __mark_reg64_unbounded(dst_reg);
14949 __update_reg32_bounds(dst_reg);
14950 }
14951
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14952 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14953 struct bpf_reg_state *src_reg)
14954 {
14955 u64 umax_val = src_reg->umax_value;
14956 u64 umin_val = src_reg->umin_value;
14957
14958 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
14959 * be negative, then either:
14960 * 1) src_reg might be zero, so the sign bit of the result is
14961 * unknown, so we lose our signed bounds
14962 * 2) it's known negative, thus the unsigned bounds capture the
14963 * signed bounds
14964 * 3) the signed bounds cross zero, so they tell us nothing
14965 * about the result
14966 * If the value in dst_reg is known nonnegative, then again the
14967 * unsigned bounds capture the signed bounds.
14968 * Thus, in all cases it suffices to blow away our signed bounds
14969 * and rely on inferring new ones from the unsigned bounds and
14970 * var_off of the result.
14971 */
14972 dst_reg->smin_value = S64_MIN;
14973 dst_reg->smax_value = S64_MAX;
14974 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14975 dst_reg->umin_value >>= umax_val;
14976 dst_reg->umax_value >>= umin_val;
14977
14978 /* Its not easy to operate on alu32 bounds here because it depends
14979 * on bits being shifted in. Take easy way out and mark unbounded
14980 * so we can recalculate later from tnum.
14981 */
14982 __mark_reg32_unbounded(dst_reg);
14983 __update_reg_bounds(dst_reg);
14984 }
14985
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14986 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14987 struct bpf_reg_state *src_reg)
14988 {
14989 u64 umin_val = src_reg->u32_min_value;
14990
14991 /* Upon reaching here, src_known is true and
14992 * umax_val is equal to umin_val.
14993 */
14994 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
14995 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
14996
14997 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14998
14999 /* blow away the dst_reg umin_value/umax_value and rely on
15000 * dst_reg var_off to refine the result.
15001 */
15002 dst_reg->u32_min_value = 0;
15003 dst_reg->u32_max_value = U32_MAX;
15004
15005 __mark_reg64_unbounded(dst_reg);
15006 __update_reg32_bounds(dst_reg);
15007 }
15008
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15009 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
15010 struct bpf_reg_state *src_reg)
15011 {
15012 u64 umin_val = src_reg->umin_value;
15013
15014 /* Upon reaching here, src_known is true and umax_val is equal
15015 * to umin_val.
15016 */
15017 dst_reg->smin_value >>= umin_val;
15018 dst_reg->smax_value >>= umin_val;
15019
15020 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
15021
15022 /* blow away the dst_reg umin_value/umax_value and rely on
15023 * dst_reg var_off to refine the result.
15024 */
15025 dst_reg->umin_value = 0;
15026 dst_reg->umax_value = U64_MAX;
15027
15028 /* Its not easy to operate on alu32 bounds here because it depends
15029 * on bits being shifted in from upper 32-bits. Take easy way out
15030 * and mark unbounded so we can recalculate later from tnum.
15031 */
15032 __mark_reg32_unbounded(dst_reg);
15033 __update_reg_bounds(dst_reg);
15034 }
15035
is_safe_to_compute_dst_reg_range(struct bpf_insn * insn,const struct bpf_reg_state * src_reg)15036 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
15037 const struct bpf_reg_state *src_reg)
15038 {
15039 bool src_is_const = false;
15040 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
15041
15042 if (insn_bitness == 32) {
15043 if (tnum_subreg_is_const(src_reg->var_off)
15044 && src_reg->s32_min_value == src_reg->s32_max_value
15045 && src_reg->u32_min_value == src_reg->u32_max_value)
15046 src_is_const = true;
15047 } else {
15048 if (tnum_is_const(src_reg->var_off)
15049 && src_reg->smin_value == src_reg->smax_value
15050 && src_reg->umin_value == src_reg->umax_value)
15051 src_is_const = true;
15052 }
15053
15054 switch (BPF_OP(insn->code)) {
15055 case BPF_ADD:
15056 case BPF_SUB:
15057 case BPF_AND:
15058 case BPF_XOR:
15059 case BPF_OR:
15060 case BPF_MUL:
15061 return true;
15062
15063 /* Shift operators range is only computable if shift dimension operand
15064 * is a constant. Shifts greater than 31 or 63 are undefined. This
15065 * includes shifts by a negative number.
15066 */
15067 case BPF_LSH:
15068 case BPF_RSH:
15069 case BPF_ARSH:
15070 return (src_is_const && src_reg->umax_value < insn_bitness);
15071 default:
15072 return false;
15073 }
15074 }
15075
15076 /* WARNING: This function does calculations on 64-bit values, but the actual
15077 * execution may occur on 32-bit values. Therefore, things like bitshifts
15078 * need extra checks in the 32-bit case.
15079 */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)15080 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
15081 struct bpf_insn *insn,
15082 struct bpf_reg_state *dst_reg,
15083 struct bpf_reg_state src_reg)
15084 {
15085 u8 opcode = BPF_OP(insn->code);
15086 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
15087 int ret;
15088
15089 if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
15090 __mark_reg_unknown(env, dst_reg);
15091 return 0;
15092 }
15093
15094 if (sanitize_needed(opcode)) {
15095 ret = sanitize_val_alu(env, insn);
15096 if (ret < 0)
15097 return sanitize_err(env, insn, ret, NULL, NULL);
15098 }
15099
15100 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
15101 * There are two classes of instructions: The first class we track both
15102 * alu32 and alu64 sign/unsigned bounds independently this provides the
15103 * greatest amount of precision when alu operations are mixed with jmp32
15104 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
15105 * and BPF_OR. This is possible because these ops have fairly easy to
15106 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
15107 * See alu32 verifier tests for examples. The second class of
15108 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
15109 * with regards to tracking sign/unsigned bounds because the bits may
15110 * cross subreg boundaries in the alu64 case. When this happens we mark
15111 * the reg unbounded in the subreg bound space and use the resulting
15112 * tnum to calculate an approximation of the sign/unsigned bounds.
15113 */
15114 switch (opcode) {
15115 case BPF_ADD:
15116 scalar32_min_max_add(dst_reg, &src_reg);
15117 scalar_min_max_add(dst_reg, &src_reg);
15118 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
15119 break;
15120 case BPF_SUB:
15121 scalar32_min_max_sub(dst_reg, &src_reg);
15122 scalar_min_max_sub(dst_reg, &src_reg);
15123 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
15124 break;
15125 case BPF_MUL:
15126 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
15127 scalar32_min_max_mul(dst_reg, &src_reg);
15128 scalar_min_max_mul(dst_reg, &src_reg);
15129 break;
15130 case BPF_AND:
15131 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
15132 scalar32_min_max_and(dst_reg, &src_reg);
15133 scalar_min_max_and(dst_reg, &src_reg);
15134 break;
15135 case BPF_OR:
15136 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
15137 scalar32_min_max_or(dst_reg, &src_reg);
15138 scalar_min_max_or(dst_reg, &src_reg);
15139 break;
15140 case BPF_XOR:
15141 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
15142 scalar32_min_max_xor(dst_reg, &src_reg);
15143 scalar_min_max_xor(dst_reg, &src_reg);
15144 break;
15145 case BPF_LSH:
15146 if (alu32)
15147 scalar32_min_max_lsh(dst_reg, &src_reg);
15148 else
15149 scalar_min_max_lsh(dst_reg, &src_reg);
15150 break;
15151 case BPF_RSH:
15152 if (alu32)
15153 scalar32_min_max_rsh(dst_reg, &src_reg);
15154 else
15155 scalar_min_max_rsh(dst_reg, &src_reg);
15156 break;
15157 case BPF_ARSH:
15158 if (alu32)
15159 scalar32_min_max_arsh(dst_reg, &src_reg);
15160 else
15161 scalar_min_max_arsh(dst_reg, &src_reg);
15162 break;
15163 default:
15164 break;
15165 }
15166
15167 /* ALU32 ops are zero extended into 64bit register */
15168 if (alu32)
15169 zext_32_to_64(dst_reg);
15170 reg_bounds_sync(dst_reg);
15171 return 0;
15172 }
15173
15174 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
15175 * and var_off.
15176 */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)15177 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
15178 struct bpf_insn *insn)
15179 {
15180 struct bpf_verifier_state *vstate = env->cur_state;
15181 struct bpf_func_state *state = vstate->frame[vstate->curframe];
15182 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
15183 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
15184 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
15185 u8 opcode = BPF_OP(insn->code);
15186 int err;
15187
15188 dst_reg = ®s[insn->dst_reg];
15189 src_reg = NULL;
15190
15191 if (dst_reg->type == PTR_TO_ARENA) {
15192 struct bpf_insn_aux_data *aux = cur_aux(env);
15193
15194 if (BPF_CLASS(insn->code) == BPF_ALU64)
15195 /*
15196 * 32-bit operations zero upper bits automatically.
15197 * 64-bit operations need to be converted to 32.
15198 */
15199 aux->needs_zext = true;
15200
15201 /* Any arithmetic operations are allowed on arena pointers */
15202 return 0;
15203 }
15204
15205 if (dst_reg->type != SCALAR_VALUE)
15206 ptr_reg = dst_reg;
15207
15208 if (BPF_SRC(insn->code) == BPF_X) {
15209 src_reg = ®s[insn->src_reg];
15210 if (src_reg->type != SCALAR_VALUE) {
15211 if (dst_reg->type != SCALAR_VALUE) {
15212 /* Combining two pointers by any ALU op yields
15213 * an arbitrary scalar. Disallow all math except
15214 * pointer subtraction
15215 */
15216 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
15217 mark_reg_unknown(env, regs, insn->dst_reg);
15218 return 0;
15219 }
15220 verbose(env, "R%d pointer %s pointer prohibited\n",
15221 insn->dst_reg,
15222 bpf_alu_string[opcode >> 4]);
15223 return -EACCES;
15224 } else {
15225 /* scalar += pointer
15226 * This is legal, but we have to reverse our
15227 * src/dest handling in computing the range
15228 */
15229 err = mark_chain_precision(env, insn->dst_reg);
15230 if (err)
15231 return err;
15232 return adjust_ptr_min_max_vals(env, insn,
15233 src_reg, dst_reg);
15234 }
15235 } else if (ptr_reg) {
15236 /* pointer += scalar */
15237 err = mark_chain_precision(env, insn->src_reg);
15238 if (err)
15239 return err;
15240 return adjust_ptr_min_max_vals(env, insn,
15241 dst_reg, src_reg);
15242 } else if (dst_reg->precise) {
15243 /* if dst_reg is precise, src_reg should be precise as well */
15244 err = mark_chain_precision(env, insn->src_reg);
15245 if (err)
15246 return err;
15247 }
15248 } else {
15249 /* Pretend the src is a reg with a known value, since we only
15250 * need to be able to read from this state.
15251 */
15252 off_reg.type = SCALAR_VALUE;
15253 __mark_reg_known(&off_reg, insn->imm);
15254 src_reg = &off_reg;
15255 if (ptr_reg) /* pointer += K */
15256 return adjust_ptr_min_max_vals(env, insn,
15257 ptr_reg, src_reg);
15258 }
15259
15260 /* Got here implies adding two SCALAR_VALUEs */
15261 if (WARN_ON_ONCE(ptr_reg)) {
15262 print_verifier_state(env, vstate, vstate->curframe, true);
15263 verbose(env, "verifier internal error: unexpected ptr_reg\n");
15264 return -EINVAL;
15265 }
15266 if (WARN_ON(!src_reg)) {
15267 print_verifier_state(env, vstate, vstate->curframe, true);
15268 verbose(env, "verifier internal error: no src_reg\n");
15269 return -EINVAL;
15270 }
15271 err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
15272 if (err)
15273 return err;
15274 /*
15275 * Compilers can generate the code
15276 * r1 = r2
15277 * r1 += 0x1
15278 * if r2 < 1000 goto ...
15279 * use r1 in memory access
15280 * So for 64-bit alu remember constant delta between r2 and r1 and
15281 * update r1 after 'if' condition.
15282 */
15283 if (env->bpf_capable &&
15284 BPF_OP(insn->code) == BPF_ADD && !alu32 &&
15285 dst_reg->id && is_reg_const(src_reg, false)) {
15286 u64 val = reg_const_value(src_reg, false);
15287
15288 if ((dst_reg->id & BPF_ADD_CONST) ||
15289 /* prevent overflow in sync_linked_regs() later */
15290 val > (u32)S32_MAX) {
15291 /*
15292 * If the register already went through rX += val
15293 * we cannot accumulate another val into rx->off.
15294 */
15295 dst_reg->off = 0;
15296 dst_reg->id = 0;
15297 } else {
15298 dst_reg->id |= BPF_ADD_CONST;
15299 dst_reg->off = val;
15300 }
15301 } else {
15302 /*
15303 * Make sure ID is cleared otherwise dst_reg min/max could be
15304 * incorrectly propagated into other registers by sync_linked_regs()
15305 */
15306 dst_reg->id = 0;
15307 }
15308 return 0;
15309 }
15310
15311 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)15312 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
15313 {
15314 struct bpf_reg_state *regs = cur_regs(env);
15315 u8 opcode = BPF_OP(insn->code);
15316 int err;
15317
15318 if (opcode == BPF_END || opcode == BPF_NEG) {
15319 if (opcode == BPF_NEG) {
15320 if (BPF_SRC(insn->code) != BPF_K ||
15321 insn->src_reg != BPF_REG_0 ||
15322 insn->off != 0 || insn->imm != 0) {
15323 verbose(env, "BPF_NEG uses reserved fields\n");
15324 return -EINVAL;
15325 }
15326 } else {
15327 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
15328 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
15329 (BPF_CLASS(insn->code) == BPF_ALU64 &&
15330 BPF_SRC(insn->code) != BPF_TO_LE)) {
15331 verbose(env, "BPF_END uses reserved fields\n");
15332 return -EINVAL;
15333 }
15334 }
15335
15336 /* check src operand */
15337 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15338 if (err)
15339 return err;
15340
15341 if (is_pointer_value(env, insn->dst_reg)) {
15342 verbose(env, "R%d pointer arithmetic prohibited\n",
15343 insn->dst_reg);
15344 return -EACCES;
15345 }
15346
15347 /* check dest operand */
15348 err = check_reg_arg(env, insn->dst_reg, DST_OP);
15349 if (err)
15350 return err;
15351
15352 } else if (opcode == BPF_MOV) {
15353
15354 if (BPF_SRC(insn->code) == BPF_X) {
15355 if (BPF_CLASS(insn->code) == BPF_ALU) {
15356 if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
15357 insn->imm) {
15358 verbose(env, "BPF_MOV uses reserved fields\n");
15359 return -EINVAL;
15360 }
15361 } else if (insn->off == BPF_ADDR_SPACE_CAST) {
15362 if (insn->imm != 1 && insn->imm != 1u << 16) {
15363 verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
15364 return -EINVAL;
15365 }
15366 if (!env->prog->aux->arena) {
15367 verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
15368 return -EINVAL;
15369 }
15370 } else {
15371 if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
15372 insn->off != 32) || insn->imm) {
15373 verbose(env, "BPF_MOV uses reserved fields\n");
15374 return -EINVAL;
15375 }
15376 }
15377
15378 /* check src operand */
15379 err = check_reg_arg(env, insn->src_reg, SRC_OP);
15380 if (err)
15381 return err;
15382 } else {
15383 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
15384 verbose(env, "BPF_MOV uses reserved fields\n");
15385 return -EINVAL;
15386 }
15387 }
15388
15389 /* check dest operand, mark as required later */
15390 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15391 if (err)
15392 return err;
15393
15394 if (BPF_SRC(insn->code) == BPF_X) {
15395 struct bpf_reg_state *src_reg = regs + insn->src_reg;
15396 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
15397
15398 if (BPF_CLASS(insn->code) == BPF_ALU64) {
15399 if (insn->imm) {
15400 /* off == BPF_ADDR_SPACE_CAST */
15401 mark_reg_unknown(env, regs, insn->dst_reg);
15402 if (insn->imm == 1) { /* cast from as(1) to as(0) */
15403 dst_reg->type = PTR_TO_ARENA;
15404 /* PTR_TO_ARENA is 32-bit */
15405 dst_reg->subreg_def = env->insn_idx + 1;
15406 }
15407 } else if (insn->off == 0) {
15408 /* case: R1 = R2
15409 * copy register state to dest reg
15410 */
15411 assign_scalar_id_before_mov(env, src_reg);
15412 copy_register_state(dst_reg, src_reg);
15413 dst_reg->live |= REG_LIVE_WRITTEN;
15414 dst_reg->subreg_def = DEF_NOT_SUBREG;
15415 } else {
15416 /* case: R1 = (s8, s16 s32)R2 */
15417 if (is_pointer_value(env, insn->src_reg)) {
15418 verbose(env,
15419 "R%d sign-extension part of pointer\n",
15420 insn->src_reg);
15421 return -EACCES;
15422 } else if (src_reg->type == SCALAR_VALUE) {
15423 bool no_sext;
15424
15425 no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15426 if (no_sext)
15427 assign_scalar_id_before_mov(env, src_reg);
15428 copy_register_state(dst_reg, src_reg);
15429 if (!no_sext)
15430 dst_reg->id = 0;
15431 coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
15432 dst_reg->live |= REG_LIVE_WRITTEN;
15433 dst_reg->subreg_def = DEF_NOT_SUBREG;
15434 } else {
15435 mark_reg_unknown(env, regs, insn->dst_reg);
15436 }
15437 }
15438 } else {
15439 /* R1 = (u32) R2 */
15440 if (is_pointer_value(env, insn->src_reg)) {
15441 verbose(env,
15442 "R%d partial copy of pointer\n",
15443 insn->src_reg);
15444 return -EACCES;
15445 } else if (src_reg->type == SCALAR_VALUE) {
15446 if (insn->off == 0) {
15447 bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
15448
15449 if (is_src_reg_u32)
15450 assign_scalar_id_before_mov(env, src_reg);
15451 copy_register_state(dst_reg, src_reg);
15452 /* Make sure ID is cleared if src_reg is not in u32
15453 * range otherwise dst_reg min/max could be incorrectly
15454 * propagated into src_reg by sync_linked_regs()
15455 */
15456 if (!is_src_reg_u32)
15457 dst_reg->id = 0;
15458 dst_reg->live |= REG_LIVE_WRITTEN;
15459 dst_reg->subreg_def = env->insn_idx + 1;
15460 } else {
15461 /* case: W1 = (s8, s16)W2 */
15462 bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15463
15464 if (no_sext)
15465 assign_scalar_id_before_mov(env, src_reg);
15466 copy_register_state(dst_reg, src_reg);
15467 if (!no_sext)
15468 dst_reg->id = 0;
15469 dst_reg->live |= REG_LIVE_WRITTEN;
15470 dst_reg->subreg_def = env->insn_idx + 1;
15471 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15472 }
15473 } else {
15474 mark_reg_unknown(env, regs,
15475 insn->dst_reg);
15476 }
15477 zext_32_to_64(dst_reg);
15478 reg_bounds_sync(dst_reg);
15479 }
15480 } else {
15481 /* case: R = imm
15482 * remember the value we stored into this reg
15483 */
15484 /* clear any state __mark_reg_known doesn't set */
15485 mark_reg_unknown(env, regs, insn->dst_reg);
15486 regs[insn->dst_reg].type = SCALAR_VALUE;
15487 if (BPF_CLASS(insn->code) == BPF_ALU64) {
15488 __mark_reg_known(regs + insn->dst_reg,
15489 insn->imm);
15490 } else {
15491 __mark_reg_known(regs + insn->dst_reg,
15492 (u32)insn->imm);
15493 }
15494 }
15495
15496 } else if (opcode > BPF_END) {
15497 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
15498 return -EINVAL;
15499
15500 } else { /* all other ALU ops: and, sub, xor, add, ... */
15501
15502 if (BPF_SRC(insn->code) == BPF_X) {
15503 if (insn->imm != 0 || insn->off > 1 ||
15504 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
15505 verbose(env, "BPF_ALU uses reserved fields\n");
15506 return -EINVAL;
15507 }
15508 /* check src1 operand */
15509 err = check_reg_arg(env, insn->src_reg, SRC_OP);
15510 if (err)
15511 return err;
15512 } else {
15513 if (insn->src_reg != BPF_REG_0 || insn->off > 1 ||
15514 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
15515 verbose(env, "BPF_ALU uses reserved fields\n");
15516 return -EINVAL;
15517 }
15518 }
15519
15520 /* check src2 operand */
15521 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15522 if (err)
15523 return err;
15524
15525 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15526 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15527 verbose(env, "div by zero\n");
15528 return -EINVAL;
15529 }
15530
15531 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15532 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15533 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15534
15535 if (insn->imm < 0 || insn->imm >= size) {
15536 verbose(env, "invalid shift %d\n", insn->imm);
15537 return -EINVAL;
15538 }
15539 }
15540
15541 /* check dest operand */
15542 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15543 err = err ?: adjust_reg_min_max_vals(env, insn);
15544 if (err)
15545 return err;
15546 }
15547
15548 return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu");
15549 }
15550
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)15551 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15552 struct bpf_reg_state *dst_reg,
15553 enum bpf_reg_type type,
15554 bool range_right_open)
15555 {
15556 struct bpf_func_state *state;
15557 struct bpf_reg_state *reg;
15558 int new_range;
15559
15560 if (dst_reg->off < 0 ||
15561 (dst_reg->off == 0 && range_right_open))
15562 /* This doesn't give us any range */
15563 return;
15564
15565 if (dst_reg->umax_value > MAX_PACKET_OFF ||
15566 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
15567 /* Risk of overflow. For instance, ptr + (1<<63) may be less
15568 * than pkt_end, but that's because it's also less than pkt.
15569 */
15570 return;
15571
15572 new_range = dst_reg->off;
15573 if (range_right_open)
15574 new_range++;
15575
15576 /* Examples for register markings:
15577 *
15578 * pkt_data in dst register:
15579 *
15580 * r2 = r3;
15581 * r2 += 8;
15582 * if (r2 > pkt_end) goto <handle exception>
15583 * <access okay>
15584 *
15585 * r2 = r3;
15586 * r2 += 8;
15587 * if (r2 < pkt_end) goto <access okay>
15588 * <handle exception>
15589 *
15590 * Where:
15591 * r2 == dst_reg, pkt_end == src_reg
15592 * r2=pkt(id=n,off=8,r=0)
15593 * r3=pkt(id=n,off=0,r=0)
15594 *
15595 * pkt_data in src register:
15596 *
15597 * r2 = r3;
15598 * r2 += 8;
15599 * if (pkt_end >= r2) goto <access okay>
15600 * <handle exception>
15601 *
15602 * r2 = r3;
15603 * r2 += 8;
15604 * if (pkt_end <= r2) goto <handle exception>
15605 * <access okay>
15606 *
15607 * Where:
15608 * pkt_end == dst_reg, r2 == src_reg
15609 * r2=pkt(id=n,off=8,r=0)
15610 * r3=pkt(id=n,off=0,r=0)
15611 *
15612 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15613 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15614 * and [r3, r3 + 8-1) respectively is safe to access depending on
15615 * the check.
15616 */
15617
15618 /* If our ids match, then we must have the same max_value. And we
15619 * don't care about the other reg's fixed offset, since if it's too big
15620 * the range won't allow anything.
15621 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
15622 */
15623 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15624 if (reg->type == type && reg->id == dst_reg->id)
15625 /* keep the maximum range already checked */
15626 reg->range = max(reg->range, new_range);
15627 }));
15628 }
15629
15630 /*
15631 * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15632 */
is_scalar_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15633 static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15634 u8 opcode, bool is_jmp32)
15635 {
15636 struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15637 struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15638 u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
15639 u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
15640 s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
15641 s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
15642 u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
15643 u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
15644 s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
15645 s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
15646
15647 switch (opcode) {
15648 case BPF_JEQ:
15649 /* constants, umin/umax and smin/smax checks would be
15650 * redundant in this case because they all should match
15651 */
15652 if (tnum_is_const(t1) && tnum_is_const(t2))
15653 return t1.value == t2.value;
15654 /* non-overlapping ranges */
15655 if (umin1 > umax2 || umax1 < umin2)
15656 return 0;
15657 if (smin1 > smax2 || smax1 < smin2)
15658 return 0;
15659 if (!is_jmp32) {
15660 /* if 64-bit ranges are inconclusive, see if we can
15661 * utilize 32-bit subrange knowledge to eliminate
15662 * branches that can't be taken a priori
15663 */
15664 if (reg1->u32_min_value > reg2->u32_max_value ||
15665 reg1->u32_max_value < reg2->u32_min_value)
15666 return 0;
15667 if (reg1->s32_min_value > reg2->s32_max_value ||
15668 reg1->s32_max_value < reg2->s32_min_value)
15669 return 0;
15670 }
15671 break;
15672 case BPF_JNE:
15673 /* constants, umin/umax and smin/smax checks would be
15674 * redundant in this case because they all should match
15675 */
15676 if (tnum_is_const(t1) && tnum_is_const(t2))
15677 return t1.value != t2.value;
15678 /* non-overlapping ranges */
15679 if (umin1 > umax2 || umax1 < umin2)
15680 return 1;
15681 if (smin1 > smax2 || smax1 < smin2)
15682 return 1;
15683 if (!is_jmp32) {
15684 /* if 64-bit ranges are inconclusive, see if we can
15685 * utilize 32-bit subrange knowledge to eliminate
15686 * branches that can't be taken a priori
15687 */
15688 if (reg1->u32_min_value > reg2->u32_max_value ||
15689 reg1->u32_max_value < reg2->u32_min_value)
15690 return 1;
15691 if (reg1->s32_min_value > reg2->s32_max_value ||
15692 reg1->s32_max_value < reg2->s32_min_value)
15693 return 1;
15694 }
15695 break;
15696 case BPF_JSET:
15697 if (!is_reg_const(reg2, is_jmp32)) {
15698 swap(reg1, reg2);
15699 swap(t1, t2);
15700 }
15701 if (!is_reg_const(reg2, is_jmp32))
15702 return -1;
15703 if ((~t1.mask & t1.value) & t2.value)
15704 return 1;
15705 if (!((t1.mask | t1.value) & t2.value))
15706 return 0;
15707 break;
15708 case BPF_JGT:
15709 if (umin1 > umax2)
15710 return 1;
15711 else if (umax1 <= umin2)
15712 return 0;
15713 break;
15714 case BPF_JSGT:
15715 if (smin1 > smax2)
15716 return 1;
15717 else if (smax1 <= smin2)
15718 return 0;
15719 break;
15720 case BPF_JLT:
15721 if (umax1 < umin2)
15722 return 1;
15723 else if (umin1 >= umax2)
15724 return 0;
15725 break;
15726 case BPF_JSLT:
15727 if (smax1 < smin2)
15728 return 1;
15729 else if (smin1 >= smax2)
15730 return 0;
15731 break;
15732 case BPF_JGE:
15733 if (umin1 >= umax2)
15734 return 1;
15735 else if (umax1 < umin2)
15736 return 0;
15737 break;
15738 case BPF_JSGE:
15739 if (smin1 >= smax2)
15740 return 1;
15741 else if (smax1 < smin2)
15742 return 0;
15743 break;
15744 case BPF_JLE:
15745 if (umax1 <= umin2)
15746 return 1;
15747 else if (umin1 > umax2)
15748 return 0;
15749 break;
15750 case BPF_JSLE:
15751 if (smax1 <= smin2)
15752 return 1;
15753 else if (smin1 > smax2)
15754 return 0;
15755 break;
15756 }
15757
15758 return -1;
15759 }
15760
flip_opcode(u32 opcode)15761 static int flip_opcode(u32 opcode)
15762 {
15763 /* How can we transform "a <op> b" into "b <op> a"? */
15764 static const u8 opcode_flip[16] = {
15765 /* these stay the same */
15766 [BPF_JEQ >> 4] = BPF_JEQ,
15767 [BPF_JNE >> 4] = BPF_JNE,
15768 [BPF_JSET >> 4] = BPF_JSET,
15769 /* these swap "lesser" and "greater" (L and G in the opcodes) */
15770 [BPF_JGE >> 4] = BPF_JLE,
15771 [BPF_JGT >> 4] = BPF_JLT,
15772 [BPF_JLE >> 4] = BPF_JGE,
15773 [BPF_JLT >> 4] = BPF_JGT,
15774 [BPF_JSGE >> 4] = BPF_JSLE,
15775 [BPF_JSGT >> 4] = BPF_JSLT,
15776 [BPF_JSLE >> 4] = BPF_JSGE,
15777 [BPF_JSLT >> 4] = BPF_JSGT
15778 };
15779 return opcode_flip[opcode >> 4];
15780 }
15781
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)15782 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15783 struct bpf_reg_state *src_reg,
15784 u8 opcode)
15785 {
15786 struct bpf_reg_state *pkt;
15787
15788 if (src_reg->type == PTR_TO_PACKET_END) {
15789 pkt = dst_reg;
15790 } else if (dst_reg->type == PTR_TO_PACKET_END) {
15791 pkt = src_reg;
15792 opcode = flip_opcode(opcode);
15793 } else {
15794 return -1;
15795 }
15796
15797 if (pkt->range >= 0)
15798 return -1;
15799
15800 switch (opcode) {
15801 case BPF_JLE:
15802 /* pkt <= pkt_end */
15803 fallthrough;
15804 case BPF_JGT:
15805 /* pkt > pkt_end */
15806 if (pkt->range == BEYOND_PKT_END)
15807 /* pkt has at last one extra byte beyond pkt_end */
15808 return opcode == BPF_JGT;
15809 break;
15810 case BPF_JLT:
15811 /* pkt < pkt_end */
15812 fallthrough;
15813 case BPF_JGE:
15814 /* pkt >= pkt_end */
15815 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15816 return opcode == BPF_JGE;
15817 break;
15818 }
15819 return -1;
15820 }
15821
15822 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15823 * and return:
15824 * 1 - branch will be taken and "goto target" will be executed
15825 * 0 - branch will not be taken and fall-through to next insn
15826 * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15827 * range [0,10]
15828 */
is_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15829 static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15830 u8 opcode, bool is_jmp32)
15831 {
15832 if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15833 return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15834
15835 if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15836 u64 val;
15837
15838 /* arrange that reg2 is a scalar, and reg1 is a pointer */
15839 if (!is_reg_const(reg2, is_jmp32)) {
15840 opcode = flip_opcode(opcode);
15841 swap(reg1, reg2);
15842 }
15843 /* and ensure that reg2 is a constant */
15844 if (!is_reg_const(reg2, is_jmp32))
15845 return -1;
15846
15847 if (!reg_not_null(reg1))
15848 return -1;
15849
15850 /* If pointer is valid tests against zero will fail so we can
15851 * use this to direct branch taken.
15852 */
15853 val = reg_const_value(reg2, is_jmp32);
15854 if (val != 0)
15855 return -1;
15856
15857 switch (opcode) {
15858 case BPF_JEQ:
15859 return 0;
15860 case BPF_JNE:
15861 return 1;
15862 default:
15863 return -1;
15864 }
15865 }
15866
15867 /* now deal with two scalars, but not necessarily constants */
15868 return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
15869 }
15870
15871 /* Opcode that corresponds to a *false* branch condition.
15872 * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15873 */
rev_opcode(u8 opcode)15874 static u8 rev_opcode(u8 opcode)
15875 {
15876 switch (opcode) {
15877 case BPF_JEQ: return BPF_JNE;
15878 case BPF_JNE: return BPF_JEQ;
15879 /* JSET doesn't have it's reverse opcode in BPF, so add
15880 * BPF_X flag to denote the reverse of that operation
15881 */
15882 case BPF_JSET: return BPF_JSET | BPF_X;
15883 case BPF_JSET | BPF_X: return BPF_JSET;
15884 case BPF_JGE: return BPF_JLT;
15885 case BPF_JGT: return BPF_JLE;
15886 case BPF_JLE: return BPF_JGT;
15887 case BPF_JLT: return BPF_JGE;
15888 case BPF_JSGE: return BPF_JSLT;
15889 case BPF_JSGT: return BPF_JSLE;
15890 case BPF_JSLE: return BPF_JSGT;
15891 case BPF_JSLT: return BPF_JSGE;
15892 default: return 0;
15893 }
15894 }
15895
15896 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
regs_refine_cond_op(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15897 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15898 u8 opcode, bool is_jmp32)
15899 {
15900 struct tnum t;
15901 u64 val;
15902
15903 /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15904 switch (opcode) {
15905 case BPF_JGE:
15906 case BPF_JGT:
15907 case BPF_JSGE:
15908 case BPF_JSGT:
15909 opcode = flip_opcode(opcode);
15910 swap(reg1, reg2);
15911 break;
15912 default:
15913 break;
15914 }
15915
15916 switch (opcode) {
15917 case BPF_JEQ:
15918 if (is_jmp32) {
15919 reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15920 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15921 reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15922 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15923 reg2->u32_min_value = reg1->u32_min_value;
15924 reg2->u32_max_value = reg1->u32_max_value;
15925 reg2->s32_min_value = reg1->s32_min_value;
15926 reg2->s32_max_value = reg1->s32_max_value;
15927
15928 t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15929 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15930 reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15931 } else {
15932 reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
15933 reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15934 reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
15935 reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15936 reg2->umin_value = reg1->umin_value;
15937 reg2->umax_value = reg1->umax_value;
15938 reg2->smin_value = reg1->smin_value;
15939 reg2->smax_value = reg1->smax_value;
15940
15941 reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15942 reg2->var_off = reg1->var_off;
15943 }
15944 break;
15945 case BPF_JNE:
15946 if (!is_reg_const(reg2, is_jmp32))
15947 swap(reg1, reg2);
15948 if (!is_reg_const(reg2, is_jmp32))
15949 break;
15950
15951 /* try to recompute the bound of reg1 if reg2 is a const and
15952 * is exactly the edge of reg1.
15953 */
15954 val = reg_const_value(reg2, is_jmp32);
15955 if (is_jmp32) {
15956 /* u32_min_value is not equal to 0xffffffff at this point,
15957 * because otherwise u32_max_value is 0xffffffff as well,
15958 * in such a case both reg1 and reg2 would be constants,
15959 * jump would be predicted and reg_set_min_max() won't
15960 * be called.
15961 *
15962 * Same reasoning works for all {u,s}{min,max}{32,64} cases
15963 * below.
15964 */
15965 if (reg1->u32_min_value == (u32)val)
15966 reg1->u32_min_value++;
15967 if (reg1->u32_max_value == (u32)val)
15968 reg1->u32_max_value--;
15969 if (reg1->s32_min_value == (s32)val)
15970 reg1->s32_min_value++;
15971 if (reg1->s32_max_value == (s32)val)
15972 reg1->s32_max_value--;
15973 } else {
15974 if (reg1->umin_value == (u64)val)
15975 reg1->umin_value++;
15976 if (reg1->umax_value == (u64)val)
15977 reg1->umax_value--;
15978 if (reg1->smin_value == (s64)val)
15979 reg1->smin_value++;
15980 if (reg1->smax_value == (s64)val)
15981 reg1->smax_value--;
15982 }
15983 break;
15984 case BPF_JSET:
15985 if (!is_reg_const(reg2, is_jmp32))
15986 swap(reg1, reg2);
15987 if (!is_reg_const(reg2, is_jmp32))
15988 break;
15989 val = reg_const_value(reg2, is_jmp32);
15990 /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15991 * requires single bit to learn something useful. E.g., if we
15992 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15993 * are actually set? We can learn something definite only if
15994 * it's a single-bit value to begin with.
15995 *
15996 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15997 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15998 * bit 1 is set, which we can readily use in adjustments.
15999 */
16000 if (!is_power_of_2(val))
16001 break;
16002 if (is_jmp32) {
16003 t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
16004 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
16005 } else {
16006 reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
16007 }
16008 break;
16009 case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
16010 if (!is_reg_const(reg2, is_jmp32))
16011 swap(reg1, reg2);
16012 if (!is_reg_const(reg2, is_jmp32))
16013 break;
16014 val = reg_const_value(reg2, is_jmp32);
16015 if (is_jmp32) {
16016 t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
16017 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
16018 } else {
16019 reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
16020 }
16021 break;
16022 case BPF_JLE:
16023 if (is_jmp32) {
16024 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
16025 reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
16026 } else {
16027 reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
16028 reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
16029 }
16030 break;
16031 case BPF_JLT:
16032 if (is_jmp32) {
16033 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
16034 reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
16035 } else {
16036 reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
16037 reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
16038 }
16039 break;
16040 case BPF_JSLE:
16041 if (is_jmp32) {
16042 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
16043 reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
16044 } else {
16045 reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
16046 reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
16047 }
16048 break;
16049 case BPF_JSLT:
16050 if (is_jmp32) {
16051 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
16052 reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
16053 } else {
16054 reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
16055 reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
16056 }
16057 break;
16058 default:
16059 return;
16060 }
16061 }
16062
16063 /* Adjusts the register min/max values in the case that the dst_reg and
16064 * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
16065 * check, in which case we have a fake SCALAR_VALUE representing insn->imm).
16066 * Technically we can do similar adjustments for pointers to the same object,
16067 * but we don't support that right now.
16068 */
reg_set_min_max(struct bpf_verifier_env * env,struct bpf_reg_state * true_reg1,struct bpf_reg_state * true_reg2,struct bpf_reg_state * false_reg1,struct bpf_reg_state * false_reg2,u8 opcode,bool is_jmp32)16069 static int reg_set_min_max(struct bpf_verifier_env *env,
16070 struct bpf_reg_state *true_reg1,
16071 struct bpf_reg_state *true_reg2,
16072 struct bpf_reg_state *false_reg1,
16073 struct bpf_reg_state *false_reg2,
16074 u8 opcode, bool is_jmp32)
16075 {
16076 int err;
16077
16078 /* If either register is a pointer, we can't learn anything about its
16079 * variable offset from the compare (unless they were a pointer into
16080 * the same object, but we don't bother with that).
16081 */
16082 if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
16083 return 0;
16084
16085 /* fallthrough (FALSE) branch */
16086 regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
16087 reg_bounds_sync(false_reg1);
16088 reg_bounds_sync(false_reg2);
16089
16090 /* jump (TRUE) branch */
16091 regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
16092 reg_bounds_sync(true_reg1);
16093 reg_bounds_sync(true_reg2);
16094
16095 err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
16096 err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
16097 err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
16098 err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
16099 return err;
16100 }
16101
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)16102 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
16103 struct bpf_reg_state *reg, u32 id,
16104 bool is_null)
16105 {
16106 if (type_may_be_null(reg->type) && reg->id == id &&
16107 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
16108 /* Old offset (both fixed and variable parts) should have been
16109 * known-zero, because we don't allow pointer arithmetic on
16110 * pointers that might be NULL. If we see this happening, don't
16111 * convert the register.
16112 *
16113 * But in some cases, some helpers that return local kptrs
16114 * advance offset for the returned pointer. In those cases, it
16115 * is fine to expect to see reg->off.
16116 */
16117 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
16118 return;
16119 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
16120 WARN_ON_ONCE(reg->off))
16121 return;
16122
16123 if (is_null) {
16124 reg->type = SCALAR_VALUE;
16125 /* We don't need id and ref_obj_id from this point
16126 * onwards anymore, thus we should better reset it,
16127 * so that state pruning has chances to take effect.
16128 */
16129 reg->id = 0;
16130 reg->ref_obj_id = 0;
16131
16132 return;
16133 }
16134
16135 mark_ptr_not_null_reg(reg);
16136
16137 if (!reg_may_point_to_spin_lock(reg)) {
16138 /* For not-NULL ptr, reg->ref_obj_id will be reset
16139 * in release_reference().
16140 *
16141 * reg->id is still used by spin_lock ptr. Other
16142 * than spin_lock ptr type, reg->id can be reset.
16143 */
16144 reg->id = 0;
16145 }
16146 }
16147 }
16148
16149 /* The logic is similar to find_good_pkt_pointers(), both could eventually
16150 * be folded together at some point.
16151 */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)16152 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
16153 bool is_null)
16154 {
16155 struct bpf_func_state *state = vstate->frame[vstate->curframe];
16156 struct bpf_reg_state *regs = state->regs, *reg;
16157 u32 ref_obj_id = regs[regno].ref_obj_id;
16158 u32 id = regs[regno].id;
16159
16160 if (ref_obj_id && ref_obj_id == id && is_null)
16161 /* regs[regno] is in the " == NULL" branch.
16162 * No one could have freed the reference state before
16163 * doing the NULL check.
16164 */
16165 WARN_ON_ONCE(release_reference_nomark(vstate, id));
16166
16167 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
16168 mark_ptr_or_null_reg(state, reg, id, is_null);
16169 }));
16170 }
16171
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)16172 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
16173 struct bpf_reg_state *dst_reg,
16174 struct bpf_reg_state *src_reg,
16175 struct bpf_verifier_state *this_branch,
16176 struct bpf_verifier_state *other_branch)
16177 {
16178 if (BPF_SRC(insn->code) != BPF_X)
16179 return false;
16180
16181 /* Pointers are always 64-bit. */
16182 if (BPF_CLASS(insn->code) == BPF_JMP32)
16183 return false;
16184
16185 switch (BPF_OP(insn->code)) {
16186 case BPF_JGT:
16187 if ((dst_reg->type == PTR_TO_PACKET &&
16188 src_reg->type == PTR_TO_PACKET_END) ||
16189 (dst_reg->type == PTR_TO_PACKET_META &&
16190 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16191 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
16192 find_good_pkt_pointers(this_branch, dst_reg,
16193 dst_reg->type, false);
16194 mark_pkt_end(other_branch, insn->dst_reg, true);
16195 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
16196 src_reg->type == PTR_TO_PACKET) ||
16197 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16198 src_reg->type == PTR_TO_PACKET_META)) {
16199 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
16200 find_good_pkt_pointers(other_branch, src_reg,
16201 src_reg->type, true);
16202 mark_pkt_end(this_branch, insn->src_reg, false);
16203 } else {
16204 return false;
16205 }
16206 break;
16207 case BPF_JLT:
16208 if ((dst_reg->type == PTR_TO_PACKET &&
16209 src_reg->type == PTR_TO_PACKET_END) ||
16210 (dst_reg->type == PTR_TO_PACKET_META &&
16211 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16212 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
16213 find_good_pkt_pointers(other_branch, dst_reg,
16214 dst_reg->type, true);
16215 mark_pkt_end(this_branch, insn->dst_reg, false);
16216 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
16217 src_reg->type == PTR_TO_PACKET) ||
16218 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16219 src_reg->type == PTR_TO_PACKET_META)) {
16220 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
16221 find_good_pkt_pointers(this_branch, src_reg,
16222 src_reg->type, false);
16223 mark_pkt_end(other_branch, insn->src_reg, true);
16224 } else {
16225 return false;
16226 }
16227 break;
16228 case BPF_JGE:
16229 if ((dst_reg->type == PTR_TO_PACKET &&
16230 src_reg->type == PTR_TO_PACKET_END) ||
16231 (dst_reg->type == PTR_TO_PACKET_META &&
16232 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16233 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
16234 find_good_pkt_pointers(this_branch, dst_reg,
16235 dst_reg->type, true);
16236 mark_pkt_end(other_branch, insn->dst_reg, false);
16237 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
16238 src_reg->type == PTR_TO_PACKET) ||
16239 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16240 src_reg->type == PTR_TO_PACKET_META)) {
16241 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
16242 find_good_pkt_pointers(other_branch, src_reg,
16243 src_reg->type, false);
16244 mark_pkt_end(this_branch, insn->src_reg, true);
16245 } else {
16246 return false;
16247 }
16248 break;
16249 case BPF_JLE:
16250 if ((dst_reg->type == PTR_TO_PACKET &&
16251 src_reg->type == PTR_TO_PACKET_END) ||
16252 (dst_reg->type == PTR_TO_PACKET_META &&
16253 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16254 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
16255 find_good_pkt_pointers(other_branch, dst_reg,
16256 dst_reg->type, false);
16257 mark_pkt_end(this_branch, insn->dst_reg, true);
16258 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
16259 src_reg->type == PTR_TO_PACKET) ||
16260 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16261 src_reg->type == PTR_TO_PACKET_META)) {
16262 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
16263 find_good_pkt_pointers(this_branch, src_reg,
16264 src_reg->type, true);
16265 mark_pkt_end(other_branch, insn->src_reg, false);
16266 } else {
16267 return false;
16268 }
16269 break;
16270 default:
16271 return false;
16272 }
16273
16274 return true;
16275 }
16276
__collect_linked_regs(struct linked_regs * reg_set,struct bpf_reg_state * reg,u32 id,u32 frameno,u32 spi_or_reg,bool is_reg)16277 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
16278 u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
16279 {
16280 struct linked_reg *e;
16281
16282 if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
16283 return;
16284
16285 e = linked_regs_push(reg_set);
16286 if (e) {
16287 e->frameno = frameno;
16288 e->is_reg = is_reg;
16289 e->regno = spi_or_reg;
16290 } else {
16291 reg->id = 0;
16292 }
16293 }
16294
16295 /* For all R being scalar registers or spilled scalar registers
16296 * in verifier state, save R in linked_regs if R->id == id.
16297 * If there are too many Rs sharing same id, reset id for leftover Rs.
16298 */
collect_linked_regs(struct bpf_verifier_state * vstate,u32 id,struct linked_regs * linked_regs)16299 static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id,
16300 struct linked_regs *linked_regs)
16301 {
16302 struct bpf_func_state *func;
16303 struct bpf_reg_state *reg;
16304 int i, j;
16305
16306 id = id & ~BPF_ADD_CONST;
16307 for (i = vstate->curframe; i >= 0; i--) {
16308 func = vstate->frame[i];
16309 for (j = 0; j < BPF_REG_FP; j++) {
16310 reg = &func->regs[j];
16311 __collect_linked_regs(linked_regs, reg, id, i, j, true);
16312 }
16313 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
16314 if (!is_spilled_reg(&func->stack[j]))
16315 continue;
16316 reg = &func->stack[j].spilled_ptr;
16317 __collect_linked_regs(linked_regs, reg, id, i, j, false);
16318 }
16319 }
16320 }
16321
16322 /* For all R in linked_regs, copy known_reg range into R
16323 * if R->id == known_reg->id.
16324 */
sync_linked_regs(struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg,struct linked_regs * linked_regs)16325 static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg,
16326 struct linked_regs *linked_regs)
16327 {
16328 struct bpf_reg_state fake_reg;
16329 struct bpf_reg_state *reg;
16330 struct linked_reg *e;
16331 int i;
16332
16333 for (i = 0; i < linked_regs->cnt; ++i) {
16334 e = &linked_regs->entries[i];
16335 reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
16336 : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
16337 if (reg->type != SCALAR_VALUE || reg == known_reg)
16338 continue;
16339 if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
16340 continue;
16341 if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
16342 reg->off == known_reg->off) {
16343 s32 saved_subreg_def = reg->subreg_def;
16344
16345 copy_register_state(reg, known_reg);
16346 reg->subreg_def = saved_subreg_def;
16347 } else {
16348 s32 saved_subreg_def = reg->subreg_def;
16349 s32 saved_off = reg->off;
16350
16351 fake_reg.type = SCALAR_VALUE;
16352 __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off);
16353
16354 /* reg = known_reg; reg += delta */
16355 copy_register_state(reg, known_reg);
16356 /*
16357 * Must preserve off, id and add_const flag,
16358 * otherwise another sync_linked_regs() will be incorrect.
16359 */
16360 reg->off = saved_off;
16361 reg->subreg_def = saved_subreg_def;
16362
16363 scalar32_min_max_add(reg, &fake_reg);
16364 scalar_min_max_add(reg, &fake_reg);
16365 reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
16366 }
16367 }
16368 }
16369
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)16370 static int check_cond_jmp_op(struct bpf_verifier_env *env,
16371 struct bpf_insn *insn, int *insn_idx)
16372 {
16373 struct bpf_verifier_state *this_branch = env->cur_state;
16374 struct bpf_verifier_state *other_branch;
16375 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
16376 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
16377 struct bpf_reg_state *eq_branch_regs;
16378 struct linked_regs linked_regs = {};
16379 u8 opcode = BPF_OP(insn->code);
16380 bool is_jmp32;
16381 int pred = -1;
16382 int err;
16383
16384 /* Only conditional jumps are expected to reach here. */
16385 if (opcode == BPF_JA || opcode > BPF_JCOND) {
16386 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
16387 return -EINVAL;
16388 }
16389
16390 if (opcode == BPF_JCOND) {
16391 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
16392 int idx = *insn_idx;
16393
16394 if (insn->code != (BPF_JMP | BPF_JCOND) ||
16395 insn->src_reg != BPF_MAY_GOTO ||
16396 insn->dst_reg || insn->imm) {
16397 verbose(env, "invalid may_goto imm %d\n", insn->imm);
16398 return -EINVAL;
16399 }
16400 prev_st = find_prev_entry(env, cur_st->parent, idx);
16401
16402 /* branch out 'fallthrough' insn as a new state to explore */
16403 queued_st = push_stack(env, idx + 1, idx, false);
16404 if (!queued_st)
16405 return -ENOMEM;
16406
16407 queued_st->may_goto_depth++;
16408 if (prev_st)
16409 widen_imprecise_scalars(env, prev_st, queued_st);
16410 *insn_idx += insn->off;
16411 return 0;
16412 }
16413
16414 /* check src2 operand */
16415 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16416 if (err)
16417 return err;
16418
16419 dst_reg = ®s[insn->dst_reg];
16420 if (BPF_SRC(insn->code) == BPF_X) {
16421 if (insn->imm != 0) {
16422 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
16423 return -EINVAL;
16424 }
16425
16426 /* check src1 operand */
16427 err = check_reg_arg(env, insn->src_reg, SRC_OP);
16428 if (err)
16429 return err;
16430
16431 src_reg = ®s[insn->src_reg];
16432 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
16433 is_pointer_value(env, insn->src_reg)) {
16434 verbose(env, "R%d pointer comparison prohibited\n",
16435 insn->src_reg);
16436 return -EACCES;
16437 }
16438 } else {
16439 if (insn->src_reg != BPF_REG_0) {
16440 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
16441 return -EINVAL;
16442 }
16443 src_reg = &env->fake_reg[0];
16444 memset(src_reg, 0, sizeof(*src_reg));
16445 src_reg->type = SCALAR_VALUE;
16446 __mark_reg_known(src_reg, insn->imm);
16447 }
16448
16449 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
16450 pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
16451 if (pred >= 0) {
16452 /* If we get here with a dst_reg pointer type it is because
16453 * above is_branch_taken() special cased the 0 comparison.
16454 */
16455 if (!__is_pointer_value(false, dst_reg))
16456 err = mark_chain_precision(env, insn->dst_reg);
16457 if (BPF_SRC(insn->code) == BPF_X && !err &&
16458 !__is_pointer_value(false, src_reg))
16459 err = mark_chain_precision(env, insn->src_reg);
16460 if (err)
16461 return err;
16462 }
16463
16464 if (pred == 1) {
16465 /* Only follow the goto, ignore fall-through. If needed, push
16466 * the fall-through branch for simulation under speculative
16467 * execution.
16468 */
16469 if (!env->bypass_spec_v1 &&
16470 !sanitize_speculative_path(env, insn, *insn_idx + 1,
16471 *insn_idx))
16472 return -EFAULT;
16473 if (env->log.level & BPF_LOG_LEVEL)
16474 print_insn_state(env, this_branch, this_branch->curframe);
16475 *insn_idx += insn->off;
16476 return 0;
16477 } else if (pred == 0) {
16478 /* Only follow the fall-through branch, since that's where the
16479 * program will go. If needed, push the goto branch for
16480 * simulation under speculative execution.
16481 */
16482 if (!env->bypass_spec_v1 &&
16483 !sanitize_speculative_path(env, insn,
16484 *insn_idx + insn->off + 1,
16485 *insn_idx))
16486 return -EFAULT;
16487 if (env->log.level & BPF_LOG_LEVEL)
16488 print_insn_state(env, this_branch, this_branch->curframe);
16489 return 0;
16490 }
16491
16492 /* Push scalar registers sharing same ID to jump history,
16493 * do this before creating 'other_branch', so that both
16494 * 'this_branch' and 'other_branch' share this history
16495 * if parent state is created.
16496 */
16497 if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16498 collect_linked_regs(this_branch, src_reg->id, &linked_regs);
16499 if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16500 collect_linked_regs(this_branch, dst_reg->id, &linked_regs);
16501 if (linked_regs.cnt > 1) {
16502 err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
16503 if (err)
16504 return err;
16505 }
16506
16507 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
16508 false);
16509 if (!other_branch)
16510 return -EFAULT;
16511 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16512
16513 if (BPF_SRC(insn->code) == BPF_X) {
16514 err = reg_set_min_max(env,
16515 &other_branch_regs[insn->dst_reg],
16516 &other_branch_regs[insn->src_reg],
16517 dst_reg, src_reg, opcode, is_jmp32);
16518 } else /* BPF_SRC(insn->code) == BPF_K */ {
16519 /* reg_set_min_max() can mangle the fake_reg. Make a copy
16520 * so that these are two different memory locations. The
16521 * src_reg is not used beyond here in context of K.
16522 */
16523 memcpy(&env->fake_reg[1], &env->fake_reg[0],
16524 sizeof(env->fake_reg[0]));
16525 err = reg_set_min_max(env,
16526 &other_branch_regs[insn->dst_reg],
16527 &env->fake_reg[0],
16528 dst_reg, &env->fake_reg[1],
16529 opcode, is_jmp32);
16530 }
16531 if (err)
16532 return err;
16533
16534 if (BPF_SRC(insn->code) == BPF_X &&
16535 src_reg->type == SCALAR_VALUE && src_reg->id &&
16536 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16537 sync_linked_regs(this_branch, src_reg, &linked_regs);
16538 sync_linked_regs(other_branch, &other_branch_regs[insn->src_reg], &linked_regs);
16539 }
16540 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16541 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16542 sync_linked_regs(this_branch, dst_reg, &linked_regs);
16543 sync_linked_regs(other_branch, &other_branch_regs[insn->dst_reg], &linked_regs);
16544 }
16545
16546 /* if one pointer register is compared to another pointer
16547 * register check if PTR_MAYBE_NULL could be lifted.
16548 * E.g. register A - maybe null
16549 * register B - not null
16550 * for JNE A, B, ... - A is not null in the false branch;
16551 * for JEQ A, B, ... - A is not null in the true branch.
16552 *
16553 * Since PTR_TO_BTF_ID points to a kernel struct that does
16554 * not need to be null checked by the BPF program, i.e.,
16555 * could be null even without PTR_MAYBE_NULL marking, so
16556 * only propagate nullness when neither reg is that type.
16557 */
16558 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16559 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16560 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16561 base_type(src_reg->type) != PTR_TO_BTF_ID &&
16562 base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16563 eq_branch_regs = NULL;
16564 switch (opcode) {
16565 case BPF_JEQ:
16566 eq_branch_regs = other_branch_regs;
16567 break;
16568 case BPF_JNE:
16569 eq_branch_regs = regs;
16570 break;
16571 default:
16572 /* do nothing */
16573 break;
16574 }
16575 if (eq_branch_regs) {
16576 if (type_may_be_null(src_reg->type))
16577 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16578 else
16579 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16580 }
16581 }
16582
16583 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16584 * NOTE: these optimizations below are related with pointer comparison
16585 * which will never be JMP32.
16586 */
16587 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
16588 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16589 type_may_be_null(dst_reg->type)) {
16590 /* Mark all identical registers in each branch as either
16591 * safe or unknown depending R == 0 or R != 0 conditional.
16592 */
16593 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16594 opcode == BPF_JNE);
16595 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16596 opcode == BPF_JEQ);
16597 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg],
16598 this_branch, other_branch) &&
16599 is_pointer_value(env, insn->dst_reg)) {
16600 verbose(env, "R%d pointer comparison prohibited\n",
16601 insn->dst_reg);
16602 return -EACCES;
16603 }
16604 if (env->log.level & BPF_LOG_LEVEL)
16605 print_insn_state(env, this_branch, this_branch->curframe);
16606 return 0;
16607 }
16608
16609 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)16610 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16611 {
16612 struct bpf_insn_aux_data *aux = cur_aux(env);
16613 struct bpf_reg_state *regs = cur_regs(env);
16614 struct bpf_reg_state *dst_reg;
16615 struct bpf_map *map;
16616 int err;
16617
16618 if (BPF_SIZE(insn->code) != BPF_DW) {
16619 verbose(env, "invalid BPF_LD_IMM insn\n");
16620 return -EINVAL;
16621 }
16622 if (insn->off != 0) {
16623 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
16624 return -EINVAL;
16625 }
16626
16627 err = check_reg_arg(env, insn->dst_reg, DST_OP);
16628 if (err)
16629 return err;
16630
16631 dst_reg = ®s[insn->dst_reg];
16632 if (insn->src_reg == 0) {
16633 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16634
16635 dst_reg->type = SCALAR_VALUE;
16636 __mark_reg_known(®s[insn->dst_reg], imm);
16637 return 0;
16638 }
16639
16640 /* All special src_reg cases are listed below. From this point onwards
16641 * we either succeed and assign a corresponding dst_reg->type after
16642 * zeroing the offset, or fail and reject the program.
16643 */
16644 mark_reg_known_zero(env, regs, insn->dst_reg);
16645
16646 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16647 dst_reg->type = aux->btf_var.reg_type;
16648 switch (base_type(dst_reg->type)) {
16649 case PTR_TO_MEM:
16650 dst_reg->mem_size = aux->btf_var.mem_size;
16651 break;
16652 case PTR_TO_BTF_ID:
16653 dst_reg->btf = aux->btf_var.btf;
16654 dst_reg->btf_id = aux->btf_var.btf_id;
16655 break;
16656 default:
16657 verbose(env, "bpf verifier is misconfigured\n");
16658 return -EFAULT;
16659 }
16660 return 0;
16661 }
16662
16663 if (insn->src_reg == BPF_PSEUDO_FUNC) {
16664 struct bpf_prog_aux *aux = env->prog->aux;
16665 u32 subprogno = find_subprog(env,
16666 env->insn_idx + insn->imm + 1);
16667
16668 if (!aux->func_info) {
16669 verbose(env, "missing btf func_info\n");
16670 return -EINVAL;
16671 }
16672 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16673 verbose(env, "callback function not static\n");
16674 return -EINVAL;
16675 }
16676
16677 dst_reg->type = PTR_TO_FUNC;
16678 dst_reg->subprogno = subprogno;
16679 return 0;
16680 }
16681
16682 map = env->used_maps[aux->map_index];
16683 dst_reg->map_ptr = map;
16684
16685 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16686 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16687 if (map->map_type == BPF_MAP_TYPE_ARENA) {
16688 __mark_reg_unknown(env, dst_reg);
16689 return 0;
16690 }
16691 dst_reg->type = PTR_TO_MAP_VALUE;
16692 dst_reg->off = aux->map_off;
16693 WARN_ON_ONCE(map->max_entries != 1);
16694 /* We want reg->id to be same (0) as map_value is not distinct */
16695 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16696 insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16697 dst_reg->type = CONST_PTR_TO_MAP;
16698 } else {
16699 verbose(env, "bpf verifier is misconfigured\n");
16700 return -EINVAL;
16701 }
16702
16703 return 0;
16704 }
16705
may_access_skb(enum bpf_prog_type type)16706 static bool may_access_skb(enum bpf_prog_type type)
16707 {
16708 switch (type) {
16709 case BPF_PROG_TYPE_SOCKET_FILTER:
16710 case BPF_PROG_TYPE_SCHED_CLS:
16711 case BPF_PROG_TYPE_SCHED_ACT:
16712 return true;
16713 default:
16714 return false;
16715 }
16716 }
16717
16718 /* verify safety of LD_ABS|LD_IND instructions:
16719 * - they can only appear in the programs where ctx == skb
16720 * - since they are wrappers of function calls, they scratch R1-R5 registers,
16721 * preserve R6-R9, and store return value into R0
16722 *
16723 * Implicit input:
16724 * ctx == skb == R6 == CTX
16725 *
16726 * Explicit input:
16727 * SRC == any register
16728 * IMM == 32-bit immediate
16729 *
16730 * Output:
16731 * R0 - 8/16/32-bit skb data converted to cpu endianness
16732 */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)16733 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16734 {
16735 struct bpf_reg_state *regs = cur_regs(env);
16736 static const int ctx_reg = BPF_REG_6;
16737 u8 mode = BPF_MODE(insn->code);
16738 int i, err;
16739
16740 if (!may_access_skb(resolve_prog_type(env->prog))) {
16741 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16742 return -EINVAL;
16743 }
16744
16745 if (!env->ops->gen_ld_abs) {
16746 verbose(env, "bpf verifier is misconfigured\n");
16747 return -EINVAL;
16748 }
16749
16750 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
16751 BPF_SIZE(insn->code) == BPF_DW ||
16752 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
16753 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
16754 return -EINVAL;
16755 }
16756
16757 /* check whether implicit source operand (register R6) is readable */
16758 err = check_reg_arg(env, ctx_reg, SRC_OP);
16759 if (err)
16760 return err;
16761
16762 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16763 * gen_ld_abs() may terminate the program at runtime, leading to
16764 * reference leak.
16765 */
16766 err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16767 if (err)
16768 return err;
16769
16770 if (regs[ctx_reg].type != PTR_TO_CTX) {
16771 verbose(env,
16772 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16773 return -EINVAL;
16774 }
16775
16776 if (mode == BPF_IND) {
16777 /* check explicit source operand */
16778 err = check_reg_arg(env, insn->src_reg, SRC_OP);
16779 if (err)
16780 return err;
16781 }
16782
16783 err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg);
16784 if (err < 0)
16785 return err;
16786
16787 /* reset caller saved regs to unreadable */
16788 for (i = 0; i < CALLER_SAVED_REGS; i++) {
16789 mark_reg_not_init(env, regs, caller_saved[i]);
16790 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16791 }
16792
16793 /* mark destination R0 register as readable, since it contains
16794 * the value fetched from the packet.
16795 * Already marked as written above.
16796 */
16797 mark_reg_unknown(env, regs, BPF_REG_0);
16798 /* ld_abs load up to 32-bit skb data. */
16799 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16800 return 0;
16801 }
16802
check_return_code(struct bpf_verifier_env * env,int regno,const char * reg_name)16803 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16804 {
16805 const char *exit_ctx = "At program exit";
16806 struct tnum enforce_attach_type_range = tnum_unknown;
16807 const struct bpf_prog *prog = env->prog;
16808 struct bpf_reg_state *reg = reg_state(env, regno);
16809 struct bpf_retval_range range = retval_range(0, 1);
16810 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16811 int err;
16812 struct bpf_func_state *frame = env->cur_state->frame[0];
16813 const bool is_subprog = frame->subprogno;
16814 bool return_32bit = false;
16815 const struct btf_type *reg_type, *ret_type = NULL;
16816
16817 /* LSM and struct_ops func-ptr's return type could be "void" */
16818 if (!is_subprog || frame->in_exception_callback_fn) {
16819 switch (prog_type) {
16820 case BPF_PROG_TYPE_LSM:
16821 if (prog->expected_attach_type == BPF_LSM_CGROUP)
16822 /* See below, can be 0 or 0-1 depending on hook. */
16823 break;
16824 if (!prog->aux->attach_func_proto->type)
16825 return 0;
16826 break;
16827 case BPF_PROG_TYPE_STRUCT_OPS:
16828 if (!prog->aux->attach_func_proto->type)
16829 return 0;
16830
16831 if (frame->in_exception_callback_fn)
16832 break;
16833
16834 /* Allow a struct_ops program to return a referenced kptr if it
16835 * matches the operator's return type and is in its unmodified
16836 * form. A scalar zero (i.e., a null pointer) is also allowed.
16837 */
16838 reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16839 ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16840 prog->aux->attach_func_proto->type,
16841 NULL);
16842 if (ret_type && ret_type == reg_type && reg->ref_obj_id)
16843 return __check_ptr_off_reg(env, reg, regno, false);
16844 break;
16845 default:
16846 break;
16847 }
16848 }
16849
16850 /* eBPF calling convention is such that R0 is used
16851 * to return the value from eBPF program.
16852 * Make sure that it's readable at this time
16853 * of bpf_exit, which means that program wrote
16854 * something into it earlier
16855 */
16856 err = check_reg_arg(env, regno, SRC_OP);
16857 if (err)
16858 return err;
16859
16860 if (is_pointer_value(env, regno)) {
16861 verbose(env, "R%d leaks addr as return value\n", regno);
16862 return -EACCES;
16863 }
16864
16865 if (frame->in_async_callback_fn) {
16866 /* enforce return zero from async callbacks like timer */
16867 exit_ctx = "At async callback return";
16868 range = retval_range(0, 0);
16869 goto enforce_retval;
16870 }
16871
16872 if (is_subprog && !frame->in_exception_callback_fn) {
16873 if (reg->type != SCALAR_VALUE) {
16874 verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
16875 regno, reg_type_str(env, reg->type));
16876 return -EINVAL;
16877 }
16878 return 0;
16879 }
16880
16881 switch (prog_type) {
16882 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16883 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
16884 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
16885 env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
16886 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
16887 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
16888 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
16889 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
16890 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
16891 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
16892 range = retval_range(1, 1);
16893 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
16894 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
16895 range = retval_range(0, 3);
16896 break;
16897 case BPF_PROG_TYPE_CGROUP_SKB:
16898 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
16899 range = retval_range(0, 3);
16900 enforce_attach_type_range = tnum_range(2, 3);
16901 }
16902 break;
16903 case BPF_PROG_TYPE_CGROUP_SOCK:
16904 case BPF_PROG_TYPE_SOCK_OPS:
16905 case BPF_PROG_TYPE_CGROUP_DEVICE:
16906 case BPF_PROG_TYPE_CGROUP_SYSCTL:
16907 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16908 break;
16909 case BPF_PROG_TYPE_RAW_TRACEPOINT:
16910 if (!env->prog->aux->attach_btf_id)
16911 return 0;
16912 range = retval_range(0, 0);
16913 break;
16914 case BPF_PROG_TYPE_TRACING:
16915 switch (env->prog->expected_attach_type) {
16916 case BPF_TRACE_FENTRY:
16917 case BPF_TRACE_FEXIT:
16918 range = retval_range(0, 0);
16919 break;
16920 case BPF_TRACE_RAW_TP:
16921 case BPF_MODIFY_RETURN:
16922 return 0;
16923 case BPF_TRACE_ITER:
16924 break;
16925 default:
16926 return -ENOTSUPP;
16927 }
16928 break;
16929 case BPF_PROG_TYPE_KPROBE:
16930 switch (env->prog->expected_attach_type) {
16931 case BPF_TRACE_KPROBE_SESSION:
16932 case BPF_TRACE_UPROBE_SESSION:
16933 range = retval_range(0, 1);
16934 break;
16935 default:
16936 return 0;
16937 }
16938 break;
16939 case BPF_PROG_TYPE_SK_LOOKUP:
16940 range = retval_range(SK_DROP, SK_PASS);
16941 break;
16942
16943 case BPF_PROG_TYPE_LSM:
16944 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16945 /* no range found, any return value is allowed */
16946 if (!get_func_retval_range(env->prog, &range))
16947 return 0;
16948 /* no restricted range, any return value is allowed */
16949 if (range.minval == S32_MIN && range.maxval == S32_MAX)
16950 return 0;
16951 return_32bit = true;
16952 } else if (!env->prog->aux->attach_func_proto->type) {
16953 /* Make sure programs that attach to void
16954 * hooks don't try to modify return value.
16955 */
16956 range = retval_range(1, 1);
16957 }
16958 break;
16959
16960 case BPF_PROG_TYPE_NETFILTER:
16961 range = retval_range(NF_DROP, NF_ACCEPT);
16962 break;
16963 case BPF_PROG_TYPE_STRUCT_OPS:
16964 if (!ret_type)
16965 return 0;
16966 range = retval_range(0, 0);
16967 break;
16968 case BPF_PROG_TYPE_EXT:
16969 /* freplace program can return anything as its return value
16970 * depends on the to-be-replaced kernel func or bpf program.
16971 */
16972 default:
16973 return 0;
16974 }
16975
16976 enforce_retval:
16977 if (reg->type != SCALAR_VALUE) {
16978 verbose(env, "%s the register R%d is not a known value (%s)\n",
16979 exit_ctx, regno, reg_type_str(env, reg->type));
16980 return -EINVAL;
16981 }
16982
16983 err = mark_chain_precision(env, regno);
16984 if (err)
16985 return err;
16986
16987 if (!retval_range_within(range, reg, return_32bit)) {
16988 verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16989 if (!is_subprog &&
16990 prog->expected_attach_type == BPF_LSM_CGROUP &&
16991 prog_type == BPF_PROG_TYPE_LSM &&
16992 !prog->aux->attach_func_proto->type)
16993 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16994 return -EINVAL;
16995 }
16996
16997 if (!tnum_is_unknown(enforce_attach_type_range) &&
16998 tnum_in(enforce_attach_type_range, reg->var_off))
16999 env->prog->enforce_expected_attach_type = 1;
17000 return 0;
17001 }
17002
mark_subprog_changes_pkt_data(struct bpf_verifier_env * env,int off)17003 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
17004 {
17005 struct bpf_subprog_info *subprog;
17006
17007 subprog = find_containing_subprog(env, off);
17008 subprog->changes_pkt_data = true;
17009 }
17010
mark_subprog_might_sleep(struct bpf_verifier_env * env,int off)17011 static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
17012 {
17013 struct bpf_subprog_info *subprog;
17014
17015 subprog = find_containing_subprog(env, off);
17016 subprog->might_sleep = true;
17017 }
17018
17019 /* 't' is an index of a call-site.
17020 * 'w' is a callee entry point.
17021 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
17022 * Rely on DFS traversal order and absence of recursive calls to guarantee that
17023 * callee's change_pkt_data marks would be correct at that moment.
17024 */
merge_callee_effects(struct bpf_verifier_env * env,int t,int w)17025 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
17026 {
17027 struct bpf_subprog_info *caller, *callee;
17028
17029 caller = find_containing_subprog(env, t);
17030 callee = find_containing_subprog(env, w);
17031 caller->changes_pkt_data |= callee->changes_pkt_data;
17032 caller->might_sleep |= callee->might_sleep;
17033 }
17034
17035 /* non-recursive DFS pseudo code
17036 * 1 procedure DFS-iterative(G,v):
17037 * 2 label v as discovered
17038 * 3 let S be a stack
17039 * 4 S.push(v)
17040 * 5 while S is not empty
17041 * 6 t <- S.peek()
17042 * 7 if t is what we're looking for:
17043 * 8 return t
17044 * 9 for all edges e in G.adjacentEdges(t) do
17045 * 10 if edge e is already labelled
17046 * 11 continue with the next edge
17047 * 12 w <- G.adjacentVertex(t,e)
17048 * 13 if vertex w is not discovered and not explored
17049 * 14 label e as tree-edge
17050 * 15 label w as discovered
17051 * 16 S.push(w)
17052 * 17 continue at 5
17053 * 18 else if vertex w is discovered
17054 * 19 label e as back-edge
17055 * 20 else
17056 * 21 // vertex w is explored
17057 * 22 label e as forward- or cross-edge
17058 * 23 label t as explored
17059 * 24 S.pop()
17060 *
17061 * convention:
17062 * 0x10 - discovered
17063 * 0x11 - discovered and fall-through edge labelled
17064 * 0x12 - discovered and fall-through and branch edges labelled
17065 * 0x20 - explored
17066 */
17067
17068 enum {
17069 DISCOVERED = 0x10,
17070 EXPLORED = 0x20,
17071 FALLTHROUGH = 1,
17072 BRANCH = 2,
17073 };
17074
mark_prune_point(struct bpf_verifier_env * env,int idx)17075 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
17076 {
17077 env->insn_aux_data[idx].prune_point = true;
17078 }
17079
is_prune_point(struct bpf_verifier_env * env,int insn_idx)17080 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
17081 {
17082 return env->insn_aux_data[insn_idx].prune_point;
17083 }
17084
mark_force_checkpoint(struct bpf_verifier_env * env,int idx)17085 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
17086 {
17087 env->insn_aux_data[idx].force_checkpoint = true;
17088 }
17089
is_force_checkpoint(struct bpf_verifier_env * env,int insn_idx)17090 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
17091 {
17092 return env->insn_aux_data[insn_idx].force_checkpoint;
17093 }
17094
mark_calls_callback(struct bpf_verifier_env * env,int idx)17095 static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
17096 {
17097 env->insn_aux_data[idx].calls_callback = true;
17098 }
17099
calls_callback(struct bpf_verifier_env * env,int insn_idx)17100 static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
17101 {
17102 return env->insn_aux_data[insn_idx].calls_callback;
17103 }
17104
17105 enum {
17106 DONE_EXPLORING = 0,
17107 KEEP_EXPLORING = 1,
17108 };
17109
17110 /* t, w, e - match pseudo-code above:
17111 * t - index of current instruction
17112 * w - next instruction
17113 * e - edge
17114 */
push_insn(int t,int w,int e,struct bpf_verifier_env * env)17115 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
17116 {
17117 int *insn_stack = env->cfg.insn_stack;
17118 int *insn_state = env->cfg.insn_state;
17119
17120 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
17121 return DONE_EXPLORING;
17122
17123 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
17124 return DONE_EXPLORING;
17125
17126 if (w < 0 || w >= env->prog->len) {
17127 verbose_linfo(env, t, "%d: ", t);
17128 verbose(env, "jump out of range from insn %d to %d\n", t, w);
17129 return -EINVAL;
17130 }
17131
17132 if (e == BRANCH) {
17133 /* mark branch target for state pruning */
17134 mark_prune_point(env, w);
17135 mark_jmp_point(env, w);
17136 }
17137
17138 if (insn_state[w] == 0) {
17139 /* tree-edge */
17140 insn_state[t] = DISCOVERED | e;
17141 insn_state[w] = DISCOVERED;
17142 if (env->cfg.cur_stack >= env->prog->len)
17143 return -E2BIG;
17144 insn_stack[env->cfg.cur_stack++] = w;
17145 return KEEP_EXPLORING;
17146 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
17147 if (env->bpf_capable)
17148 return DONE_EXPLORING;
17149 verbose_linfo(env, t, "%d: ", t);
17150 verbose_linfo(env, w, "%d: ", w);
17151 verbose(env, "back-edge from insn %d to %d\n", t, w);
17152 return -EINVAL;
17153 } else if (insn_state[w] == EXPLORED) {
17154 /* forward- or cross-edge */
17155 insn_state[t] = DISCOVERED | e;
17156 } else {
17157 verbose(env, "insn state internal bug\n");
17158 return -EFAULT;
17159 }
17160 return DONE_EXPLORING;
17161 }
17162
visit_func_call_insn(int t,struct bpf_insn * insns,struct bpf_verifier_env * env,bool visit_callee)17163 static int visit_func_call_insn(int t, struct bpf_insn *insns,
17164 struct bpf_verifier_env *env,
17165 bool visit_callee)
17166 {
17167 int ret, insn_sz;
17168 int w;
17169
17170 insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
17171 ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
17172 if (ret)
17173 return ret;
17174
17175 mark_prune_point(env, t + insn_sz);
17176 /* when we exit from subprog, we need to record non-linear history */
17177 mark_jmp_point(env, t + insn_sz);
17178
17179 if (visit_callee) {
17180 w = t + insns[t].imm + 1;
17181 mark_prune_point(env, t);
17182 merge_callee_effects(env, t, w);
17183 ret = push_insn(t, w, BRANCH, env);
17184 }
17185 return ret;
17186 }
17187
17188 /* Bitmask with 1s for all caller saved registers */
17189 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
17190
17191 /* True if do_misc_fixups() replaces calls to helper number 'imm',
17192 * replacement patch is presumed to follow bpf_fastcall contract
17193 * (see mark_fastcall_pattern_for_call() below).
17194 */
verifier_inlines_helper_call(struct bpf_verifier_env * env,s32 imm)17195 static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
17196 {
17197 switch (imm) {
17198 #ifdef CONFIG_X86_64
17199 case BPF_FUNC_get_smp_processor_id:
17200 return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
17201 #endif
17202 default:
17203 return false;
17204 }
17205 }
17206
17207 struct call_summary {
17208 u8 num_params;
17209 bool is_void;
17210 bool fastcall;
17211 };
17212
17213 /* If @call is a kfunc or helper call, fills @cs and returns true,
17214 * otherwise returns false.
17215 */
get_call_summary(struct bpf_verifier_env * env,struct bpf_insn * call,struct call_summary * cs)17216 static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
17217 struct call_summary *cs)
17218 {
17219 struct bpf_kfunc_call_arg_meta meta;
17220 const struct bpf_func_proto *fn;
17221 int i;
17222
17223 if (bpf_helper_call(call)) {
17224
17225 if (get_helper_proto(env, call->imm, &fn) < 0)
17226 /* error would be reported later */
17227 return false;
17228 cs->fastcall = fn->allow_fastcall &&
17229 (verifier_inlines_helper_call(env, call->imm) ||
17230 bpf_jit_inlines_helper_call(call->imm));
17231 cs->is_void = fn->ret_type == RET_VOID;
17232 cs->num_params = 0;
17233 for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
17234 if (fn->arg_type[i] == ARG_DONTCARE)
17235 break;
17236 cs->num_params++;
17237 }
17238 return true;
17239 }
17240
17241 if (bpf_pseudo_kfunc_call(call)) {
17242 int err;
17243
17244 err = fetch_kfunc_meta(env, call, &meta, NULL);
17245 if (err < 0)
17246 /* error would be reported later */
17247 return false;
17248 cs->num_params = btf_type_vlen(meta.func_proto);
17249 cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
17250 cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
17251 return true;
17252 }
17253
17254 return false;
17255 }
17256
17257 /* LLVM define a bpf_fastcall function attribute.
17258 * This attribute means that function scratches only some of
17259 * the caller saved registers defined by ABI.
17260 * For BPF the set of such registers could be defined as follows:
17261 * - R0 is scratched only if function is non-void;
17262 * - R1-R5 are scratched only if corresponding parameter type is defined
17263 * in the function prototype.
17264 *
17265 * The contract between kernel and clang allows to simultaneously use
17266 * such functions and maintain backwards compatibility with old
17267 * kernels that don't understand bpf_fastcall calls:
17268 *
17269 * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
17270 * registers are not scratched by the call;
17271 *
17272 * - as a post-processing step, clang visits each bpf_fastcall call and adds
17273 * spill/fill for every live r0-r5;
17274 *
17275 * - stack offsets used for the spill/fill are allocated as lowest
17276 * stack offsets in whole function and are not used for any other
17277 * purposes;
17278 *
17279 * - when kernel loads a program, it looks for such patterns
17280 * (bpf_fastcall function surrounded by spills/fills) and checks if
17281 * spill/fill stack offsets are used exclusively in fastcall patterns;
17282 *
17283 * - if so, and if verifier or current JIT inlines the call to the
17284 * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
17285 * spill/fill pairs;
17286 *
17287 * - when old kernel loads a program, presence of spill/fill pairs
17288 * keeps BPF program valid, albeit slightly less efficient.
17289 *
17290 * For example:
17291 *
17292 * r1 = 1;
17293 * r2 = 2;
17294 * *(u64 *)(r10 - 8) = r1; r1 = 1;
17295 * *(u64 *)(r10 - 16) = r2; r2 = 2;
17296 * call %[to_be_inlined] --> call %[to_be_inlined]
17297 * r2 = *(u64 *)(r10 - 16); r0 = r1;
17298 * r1 = *(u64 *)(r10 - 8); r0 += r2;
17299 * r0 = r1; exit;
17300 * r0 += r2;
17301 * exit;
17302 *
17303 * The purpose of mark_fastcall_pattern_for_call is to:
17304 * - look for such patterns;
17305 * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
17306 * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
17307 * - update env->subprog_info[*]->fastcall_stack_off to find an offset
17308 * at which bpf_fastcall spill/fill stack slots start;
17309 * - update env->subprog_info[*]->keep_fastcall_stack.
17310 *
17311 * The .fastcall_pattern and .fastcall_stack_off are used by
17312 * check_fastcall_stack_contract() to check if every stack access to
17313 * fastcall spill/fill stack slot originates from spill/fill
17314 * instructions, members of fastcall patterns.
17315 *
17316 * If such condition holds true for a subprogram, fastcall patterns could
17317 * be rewritten by remove_fastcall_spills_fills().
17318 * Otherwise bpf_fastcall patterns are not changed in the subprogram
17319 * (code, presumably, generated by an older clang version).
17320 *
17321 * For example, it is *not* safe to remove spill/fill below:
17322 *
17323 * r1 = 1;
17324 * *(u64 *)(r10 - 8) = r1; r1 = 1;
17325 * call %[to_be_inlined] --> call %[to_be_inlined]
17326 * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!!
17327 * r0 = *(u64 *)(r10 - 8); r0 += r1;
17328 * r0 += r1; exit;
17329 * exit;
17330 */
mark_fastcall_pattern_for_call(struct bpf_verifier_env * env,struct bpf_subprog_info * subprog,int insn_idx,s16 lowest_off)17331 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
17332 struct bpf_subprog_info *subprog,
17333 int insn_idx, s16 lowest_off)
17334 {
17335 struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
17336 struct bpf_insn *call = &env->prog->insnsi[insn_idx];
17337 u32 clobbered_regs_mask;
17338 struct call_summary cs;
17339 u32 expected_regs_mask;
17340 s16 off;
17341 int i;
17342
17343 if (!get_call_summary(env, call, &cs))
17344 return;
17345
17346 /* A bitmask specifying which caller saved registers are clobbered
17347 * by a call to a helper/kfunc *as if* this helper/kfunc follows
17348 * bpf_fastcall contract:
17349 * - includes R0 if function is non-void;
17350 * - includes R1-R5 if corresponding parameter has is described
17351 * in the function prototype.
17352 */
17353 clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
17354 /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
17355 expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
17356
17357 /* match pairs of form:
17358 *
17359 * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0)
17360 * ...
17361 * call %[to_be_inlined]
17362 * ...
17363 * rX = *(u64 *)(r10 - Y)
17364 */
17365 for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
17366 if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
17367 break;
17368 stx = &insns[insn_idx - i];
17369 ldx = &insns[insn_idx + i];
17370 /* must be a stack spill/fill pair */
17371 if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17372 ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
17373 stx->dst_reg != BPF_REG_10 ||
17374 ldx->src_reg != BPF_REG_10)
17375 break;
17376 /* must be a spill/fill for the same reg */
17377 if (stx->src_reg != ldx->dst_reg)
17378 break;
17379 /* must be one of the previously unseen registers */
17380 if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
17381 break;
17382 /* must be a spill/fill for the same expected offset,
17383 * no need to check offset alignment, BPF_DW stack access
17384 * is always 8-byte aligned.
17385 */
17386 if (stx->off != off || ldx->off != off)
17387 break;
17388 expected_regs_mask &= ~BIT(stx->src_reg);
17389 env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
17390 env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
17391 }
17392 if (i == 1)
17393 return;
17394
17395 /* Conditionally set 'fastcall_spills_num' to allow forward
17396 * compatibility when more helper functions are marked as
17397 * bpf_fastcall at compile time than current kernel supports, e.g:
17398 *
17399 * 1: *(u64 *)(r10 - 8) = r1
17400 * 2: call A ;; assume A is bpf_fastcall for current kernel
17401 * 3: r1 = *(u64 *)(r10 - 8)
17402 * 4: *(u64 *)(r10 - 8) = r1
17403 * 5: call B ;; assume B is not bpf_fastcall for current kernel
17404 * 6: r1 = *(u64 *)(r10 - 8)
17405 *
17406 * There is no need to block bpf_fastcall rewrite for such program.
17407 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
17408 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
17409 * does not remove spill/fill pair {4,6}.
17410 */
17411 if (cs.fastcall)
17412 env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
17413 else
17414 subprog->keep_fastcall_stack = 1;
17415 subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
17416 }
17417
mark_fastcall_patterns(struct bpf_verifier_env * env)17418 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
17419 {
17420 struct bpf_subprog_info *subprog = env->subprog_info;
17421 struct bpf_insn *insn;
17422 s16 lowest_off;
17423 int s, i;
17424
17425 for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
17426 /* find lowest stack spill offset used in this subprog */
17427 lowest_off = 0;
17428 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17429 insn = env->prog->insnsi + i;
17430 if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17431 insn->dst_reg != BPF_REG_10)
17432 continue;
17433 lowest_off = min(lowest_off, insn->off);
17434 }
17435 /* use this offset to find fastcall patterns */
17436 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17437 insn = env->prog->insnsi + i;
17438 if (insn->code != (BPF_JMP | BPF_CALL))
17439 continue;
17440 mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
17441 }
17442 }
17443 return 0;
17444 }
17445
17446 /* Visits the instruction at index t and returns one of the following:
17447 * < 0 - an error occurred
17448 * DONE_EXPLORING - the instruction was fully explored
17449 * KEEP_EXPLORING - there is still work to be done before it is fully explored
17450 */
visit_insn(int t,struct bpf_verifier_env * env)17451 static int visit_insn(int t, struct bpf_verifier_env *env)
17452 {
17453 struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
17454 int ret, off, insn_sz;
17455
17456 if (bpf_pseudo_func(insn))
17457 return visit_func_call_insn(t, insns, env, true);
17458
17459 /* All non-branch instructions have a single fall-through edge. */
17460 if (BPF_CLASS(insn->code) != BPF_JMP &&
17461 BPF_CLASS(insn->code) != BPF_JMP32) {
17462 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
17463 return push_insn(t, t + insn_sz, FALLTHROUGH, env);
17464 }
17465
17466 switch (BPF_OP(insn->code)) {
17467 case BPF_EXIT:
17468 return DONE_EXPLORING;
17469
17470 case BPF_CALL:
17471 if (is_async_callback_calling_insn(insn))
17472 /* Mark this call insn as a prune point to trigger
17473 * is_state_visited() check before call itself is
17474 * processed by __check_func_call(). Otherwise new
17475 * async state will be pushed for further exploration.
17476 */
17477 mark_prune_point(env, t);
17478 /* For functions that invoke callbacks it is not known how many times
17479 * callback would be called. Verifier models callback calling functions
17480 * by repeatedly visiting callback bodies and returning to origin call
17481 * instruction.
17482 * In order to stop such iteration verifier needs to identify when a
17483 * state identical some state from a previous iteration is reached.
17484 * Check below forces creation of checkpoint before callback calling
17485 * instruction to allow search for such identical states.
17486 */
17487 if (is_sync_callback_calling_insn(insn)) {
17488 mark_calls_callback(env, t);
17489 mark_force_checkpoint(env, t);
17490 mark_prune_point(env, t);
17491 mark_jmp_point(env, t);
17492 }
17493 if (bpf_helper_call(insn)) {
17494 const struct bpf_func_proto *fp;
17495
17496 ret = get_helper_proto(env, insn->imm, &fp);
17497 /* If called in a non-sleepable context program will be
17498 * rejected anyway, so we should end up with precise
17499 * sleepable marks on subprogs, except for dead code
17500 * elimination.
17501 */
17502 if (ret == 0 && fp->might_sleep)
17503 mark_subprog_might_sleep(env, t);
17504 if (bpf_helper_changes_pkt_data(insn->imm))
17505 mark_subprog_changes_pkt_data(env, t);
17506 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
17507 struct bpf_kfunc_call_arg_meta meta;
17508
17509 ret = fetch_kfunc_meta(env, insn, &meta, NULL);
17510 if (ret == 0 && is_iter_next_kfunc(&meta)) {
17511 mark_prune_point(env, t);
17512 /* Checking and saving state checkpoints at iter_next() call
17513 * is crucial for fast convergence of open-coded iterator loop
17514 * logic, so we need to force it. If we don't do that,
17515 * is_state_visited() might skip saving a checkpoint, causing
17516 * unnecessarily long sequence of not checkpointed
17517 * instructions and jumps, leading to exhaustion of jump
17518 * history buffer, and potentially other undesired outcomes.
17519 * It is expected that with correct open-coded iterators
17520 * convergence will happen quickly, so we don't run a risk of
17521 * exhausting memory.
17522 */
17523 mark_force_checkpoint(env, t);
17524 }
17525 /* Same as helpers, if called in a non-sleepable context
17526 * program will be rejected anyway, so we should end up
17527 * with precise sleepable marks on subprogs, except for
17528 * dead code elimination.
17529 */
17530 if (ret == 0 && is_kfunc_sleepable(&meta))
17531 mark_subprog_might_sleep(env, t);
17532 }
17533 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
17534
17535 case BPF_JA:
17536 if (BPF_SRC(insn->code) != BPF_K)
17537 return -EINVAL;
17538
17539 if (BPF_CLASS(insn->code) == BPF_JMP)
17540 off = insn->off;
17541 else
17542 off = insn->imm;
17543
17544 /* unconditional jump with single edge */
17545 ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
17546 if (ret)
17547 return ret;
17548
17549 mark_prune_point(env, t + off + 1);
17550 mark_jmp_point(env, t + off + 1);
17551
17552 return ret;
17553
17554 default:
17555 /* conditional jump with two edges */
17556 mark_prune_point(env, t);
17557 if (is_may_goto_insn(insn))
17558 mark_force_checkpoint(env, t);
17559
17560 ret = push_insn(t, t + 1, FALLTHROUGH, env);
17561 if (ret)
17562 return ret;
17563
17564 return push_insn(t, t + insn->off + 1, BRANCH, env);
17565 }
17566 }
17567
17568 /* non-recursive depth-first-search to detect loops in BPF program
17569 * loop == back-edge in directed graph
17570 */
check_cfg(struct bpf_verifier_env * env)17571 static int check_cfg(struct bpf_verifier_env *env)
17572 {
17573 int insn_cnt = env->prog->len;
17574 int *insn_stack, *insn_state, *insn_postorder;
17575 int ex_insn_beg, i, ret = 0;
17576
17577 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
17578 if (!insn_state)
17579 return -ENOMEM;
17580
17581 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
17582 if (!insn_stack) {
17583 kvfree(insn_state);
17584 return -ENOMEM;
17585 }
17586
17587 insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
17588 if (!insn_postorder) {
17589 kvfree(insn_state);
17590 kvfree(insn_stack);
17591 return -ENOMEM;
17592 }
17593
17594 ex_insn_beg = env->exception_callback_subprog
17595 ? env->subprog_info[env->exception_callback_subprog].start
17596 : 0;
17597
17598 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
17599 insn_stack[0] = 0; /* 0 is the first instruction */
17600 env->cfg.cur_stack = 1;
17601
17602 walk_cfg:
17603 while (env->cfg.cur_stack > 0) {
17604 int t = insn_stack[env->cfg.cur_stack - 1];
17605
17606 ret = visit_insn(t, env);
17607 switch (ret) {
17608 case DONE_EXPLORING:
17609 insn_state[t] = EXPLORED;
17610 env->cfg.cur_stack--;
17611 insn_postorder[env->cfg.cur_postorder++] = t;
17612 break;
17613 case KEEP_EXPLORING:
17614 break;
17615 default:
17616 if (ret > 0) {
17617 verbose(env, "visit_insn internal bug\n");
17618 ret = -EFAULT;
17619 }
17620 goto err_free;
17621 }
17622 }
17623
17624 if (env->cfg.cur_stack < 0) {
17625 verbose(env, "pop stack internal bug\n");
17626 ret = -EFAULT;
17627 goto err_free;
17628 }
17629
17630 if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
17631 insn_state[ex_insn_beg] = DISCOVERED;
17632 insn_stack[0] = ex_insn_beg;
17633 env->cfg.cur_stack = 1;
17634 goto walk_cfg;
17635 }
17636
17637 for (i = 0; i < insn_cnt; i++) {
17638 struct bpf_insn *insn = &env->prog->insnsi[i];
17639
17640 if (insn_state[i] != EXPLORED) {
17641 verbose(env, "unreachable insn %d\n", i);
17642 ret = -EINVAL;
17643 goto err_free;
17644 }
17645 if (bpf_is_ldimm64(insn)) {
17646 if (insn_state[i + 1] != 0) {
17647 verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
17648 ret = -EINVAL;
17649 goto err_free;
17650 }
17651 i++; /* skip second half of ldimm64 */
17652 }
17653 }
17654 ret = 0; /* cfg looks good */
17655 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
17656 env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
17657
17658 err_free:
17659 kvfree(insn_state);
17660 kvfree(insn_stack);
17661 env->cfg.insn_state = env->cfg.insn_stack = NULL;
17662 return ret;
17663 }
17664
check_abnormal_return(struct bpf_verifier_env * env)17665 static int check_abnormal_return(struct bpf_verifier_env *env)
17666 {
17667 int i;
17668
17669 for (i = 1; i < env->subprog_cnt; i++) {
17670 if (env->subprog_info[i].has_ld_abs) {
17671 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
17672 return -EINVAL;
17673 }
17674 if (env->subprog_info[i].has_tail_call) {
17675 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
17676 return -EINVAL;
17677 }
17678 }
17679 return 0;
17680 }
17681
17682 /* The minimum supported BTF func info size */
17683 #define MIN_BPF_FUNCINFO_SIZE 8
17684 #define MAX_FUNCINFO_REC_SIZE 252
17685
check_btf_func_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17686 static int check_btf_func_early(struct bpf_verifier_env *env,
17687 const union bpf_attr *attr,
17688 bpfptr_t uattr)
17689 {
17690 u32 krec_size = sizeof(struct bpf_func_info);
17691 const struct btf_type *type, *func_proto;
17692 u32 i, nfuncs, urec_size, min_size;
17693 struct bpf_func_info *krecord;
17694 struct bpf_prog *prog;
17695 const struct btf *btf;
17696 u32 prev_offset = 0;
17697 bpfptr_t urecord;
17698 int ret = -ENOMEM;
17699
17700 nfuncs = attr->func_info_cnt;
17701 if (!nfuncs) {
17702 if (check_abnormal_return(env))
17703 return -EINVAL;
17704 return 0;
17705 }
17706
17707 urec_size = attr->func_info_rec_size;
17708 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
17709 urec_size > MAX_FUNCINFO_REC_SIZE ||
17710 urec_size % sizeof(u32)) {
17711 verbose(env, "invalid func info rec size %u\n", urec_size);
17712 return -EINVAL;
17713 }
17714
17715 prog = env->prog;
17716 btf = prog->aux->btf;
17717
17718 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
17719 min_size = min_t(u32, krec_size, urec_size);
17720
17721 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
17722 if (!krecord)
17723 return -ENOMEM;
17724
17725 for (i = 0; i < nfuncs; i++) {
17726 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
17727 if (ret) {
17728 if (ret == -E2BIG) {
17729 verbose(env, "nonzero tailing record in func info");
17730 /* set the size kernel expects so loader can zero
17731 * out the rest of the record.
17732 */
17733 if (copy_to_bpfptr_offset(uattr,
17734 offsetof(union bpf_attr, func_info_rec_size),
17735 &min_size, sizeof(min_size)))
17736 ret = -EFAULT;
17737 }
17738 goto err_free;
17739 }
17740
17741 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
17742 ret = -EFAULT;
17743 goto err_free;
17744 }
17745
17746 /* check insn_off */
17747 ret = -EINVAL;
17748 if (i == 0) {
17749 if (krecord[i].insn_off) {
17750 verbose(env,
17751 "nonzero insn_off %u for the first func info record",
17752 krecord[i].insn_off);
17753 goto err_free;
17754 }
17755 } else if (krecord[i].insn_off <= prev_offset) {
17756 verbose(env,
17757 "same or smaller insn offset (%u) than previous func info record (%u)",
17758 krecord[i].insn_off, prev_offset);
17759 goto err_free;
17760 }
17761
17762 /* check type_id */
17763 type = btf_type_by_id(btf, krecord[i].type_id);
17764 if (!type || !btf_type_is_func(type)) {
17765 verbose(env, "invalid type id %d in func info",
17766 krecord[i].type_id);
17767 goto err_free;
17768 }
17769
17770 func_proto = btf_type_by_id(btf, type->type);
17771 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
17772 /* btf_func_check() already verified it during BTF load */
17773 goto err_free;
17774
17775 prev_offset = krecord[i].insn_off;
17776 bpfptr_add(&urecord, urec_size);
17777 }
17778
17779 prog->aux->func_info = krecord;
17780 prog->aux->func_info_cnt = nfuncs;
17781 return 0;
17782
17783 err_free:
17784 kvfree(krecord);
17785 return ret;
17786 }
17787
check_btf_func(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17788 static int check_btf_func(struct bpf_verifier_env *env,
17789 const union bpf_attr *attr,
17790 bpfptr_t uattr)
17791 {
17792 const struct btf_type *type, *func_proto, *ret_type;
17793 u32 i, nfuncs, urec_size;
17794 struct bpf_func_info *krecord;
17795 struct bpf_func_info_aux *info_aux = NULL;
17796 struct bpf_prog *prog;
17797 const struct btf *btf;
17798 bpfptr_t urecord;
17799 bool scalar_return;
17800 int ret = -ENOMEM;
17801
17802 nfuncs = attr->func_info_cnt;
17803 if (!nfuncs) {
17804 if (check_abnormal_return(env))
17805 return -EINVAL;
17806 return 0;
17807 }
17808 if (nfuncs != env->subprog_cnt) {
17809 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
17810 return -EINVAL;
17811 }
17812
17813 urec_size = attr->func_info_rec_size;
17814
17815 prog = env->prog;
17816 btf = prog->aux->btf;
17817
17818 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
17819
17820 krecord = prog->aux->func_info;
17821 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
17822 if (!info_aux)
17823 return -ENOMEM;
17824
17825 for (i = 0; i < nfuncs; i++) {
17826 /* check insn_off */
17827 ret = -EINVAL;
17828
17829 if (env->subprog_info[i].start != krecord[i].insn_off) {
17830 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
17831 goto err_free;
17832 }
17833
17834 /* Already checked type_id */
17835 type = btf_type_by_id(btf, krecord[i].type_id);
17836 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
17837 /* Already checked func_proto */
17838 func_proto = btf_type_by_id(btf, type->type);
17839
17840 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
17841 scalar_return =
17842 btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
17843 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
17844 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
17845 goto err_free;
17846 }
17847 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
17848 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
17849 goto err_free;
17850 }
17851
17852 bpfptr_add(&urecord, urec_size);
17853 }
17854
17855 prog->aux->func_info_aux = info_aux;
17856 return 0;
17857
17858 err_free:
17859 kfree(info_aux);
17860 return ret;
17861 }
17862
adjust_btf_func(struct bpf_verifier_env * env)17863 static void adjust_btf_func(struct bpf_verifier_env *env)
17864 {
17865 struct bpf_prog_aux *aux = env->prog->aux;
17866 int i;
17867
17868 if (!aux->func_info)
17869 return;
17870
17871 /* func_info is not available for hidden subprogs */
17872 for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
17873 aux->func_info[i].insn_off = env->subprog_info[i].start;
17874 }
17875
17876 #define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
17877 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
17878
check_btf_line(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)17879 static int check_btf_line(struct bpf_verifier_env *env,
17880 const union bpf_attr *attr,
17881 bpfptr_t uattr)
17882 {
17883 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
17884 struct bpf_subprog_info *sub;
17885 struct bpf_line_info *linfo;
17886 struct bpf_prog *prog;
17887 const struct btf *btf;
17888 bpfptr_t ulinfo;
17889 int err;
17890
17891 nr_linfo = attr->line_info_cnt;
17892 if (!nr_linfo)
17893 return 0;
17894 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
17895 return -EINVAL;
17896
17897 rec_size = attr->line_info_rec_size;
17898 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
17899 rec_size > MAX_LINEINFO_REC_SIZE ||
17900 rec_size & (sizeof(u32) - 1))
17901 return -EINVAL;
17902
17903 /* Need to zero it in case the userspace may
17904 * pass in a smaller bpf_line_info object.
17905 */
17906 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
17907 GFP_KERNEL | __GFP_NOWARN);
17908 if (!linfo)
17909 return -ENOMEM;
17910
17911 prog = env->prog;
17912 btf = prog->aux->btf;
17913
17914 s = 0;
17915 sub = env->subprog_info;
17916 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
17917 expected_size = sizeof(struct bpf_line_info);
17918 ncopy = min_t(u32, expected_size, rec_size);
17919 for (i = 0; i < nr_linfo; i++) {
17920 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
17921 if (err) {
17922 if (err == -E2BIG) {
17923 verbose(env, "nonzero tailing record in line_info");
17924 if (copy_to_bpfptr_offset(uattr,
17925 offsetof(union bpf_attr, line_info_rec_size),
17926 &expected_size, sizeof(expected_size)))
17927 err = -EFAULT;
17928 }
17929 goto err_free;
17930 }
17931
17932 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
17933 err = -EFAULT;
17934 goto err_free;
17935 }
17936
17937 /*
17938 * Check insn_off to ensure
17939 * 1) strictly increasing AND
17940 * 2) bounded by prog->len
17941 *
17942 * The linfo[0].insn_off == 0 check logically falls into
17943 * the later "missing bpf_line_info for func..." case
17944 * because the first linfo[0].insn_off must be the
17945 * first sub also and the first sub must have
17946 * subprog_info[0].start == 0.
17947 */
17948 if ((i && linfo[i].insn_off <= prev_offset) ||
17949 linfo[i].insn_off >= prog->len) {
17950 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
17951 i, linfo[i].insn_off, prev_offset,
17952 prog->len);
17953 err = -EINVAL;
17954 goto err_free;
17955 }
17956
17957 if (!prog->insnsi[linfo[i].insn_off].code) {
17958 verbose(env,
17959 "Invalid insn code at line_info[%u].insn_off\n",
17960 i);
17961 err = -EINVAL;
17962 goto err_free;
17963 }
17964
17965 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
17966 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
17967 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
17968 err = -EINVAL;
17969 goto err_free;
17970 }
17971
17972 if (s != env->subprog_cnt) {
17973 if (linfo[i].insn_off == sub[s].start) {
17974 sub[s].linfo_idx = i;
17975 s++;
17976 } else if (sub[s].start < linfo[i].insn_off) {
17977 verbose(env, "missing bpf_line_info for func#%u\n", s);
17978 err = -EINVAL;
17979 goto err_free;
17980 }
17981 }
17982
17983 prev_offset = linfo[i].insn_off;
17984 bpfptr_add(&ulinfo, rec_size);
17985 }
17986
17987 if (s != env->subprog_cnt) {
17988 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
17989 env->subprog_cnt - s, s);
17990 err = -EINVAL;
17991 goto err_free;
17992 }
17993
17994 prog->aux->linfo = linfo;
17995 prog->aux->nr_linfo = nr_linfo;
17996
17997 return 0;
17998
17999 err_free:
18000 kvfree(linfo);
18001 return err;
18002 }
18003
18004 #define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
18005 #define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
18006
check_core_relo(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)18007 static int check_core_relo(struct bpf_verifier_env *env,
18008 const union bpf_attr *attr,
18009 bpfptr_t uattr)
18010 {
18011 u32 i, nr_core_relo, ncopy, expected_size, rec_size;
18012 struct bpf_core_relo core_relo = {};
18013 struct bpf_prog *prog = env->prog;
18014 const struct btf *btf = prog->aux->btf;
18015 struct bpf_core_ctx ctx = {
18016 .log = &env->log,
18017 .btf = btf,
18018 };
18019 bpfptr_t u_core_relo;
18020 int err;
18021
18022 nr_core_relo = attr->core_relo_cnt;
18023 if (!nr_core_relo)
18024 return 0;
18025 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
18026 return -EINVAL;
18027
18028 rec_size = attr->core_relo_rec_size;
18029 if (rec_size < MIN_CORE_RELO_SIZE ||
18030 rec_size > MAX_CORE_RELO_SIZE ||
18031 rec_size % sizeof(u32))
18032 return -EINVAL;
18033
18034 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
18035 expected_size = sizeof(struct bpf_core_relo);
18036 ncopy = min_t(u32, expected_size, rec_size);
18037
18038 /* Unlike func_info and line_info, copy and apply each CO-RE
18039 * relocation record one at a time.
18040 */
18041 for (i = 0; i < nr_core_relo; i++) {
18042 /* future proofing when sizeof(bpf_core_relo) changes */
18043 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
18044 if (err) {
18045 if (err == -E2BIG) {
18046 verbose(env, "nonzero tailing record in core_relo");
18047 if (copy_to_bpfptr_offset(uattr,
18048 offsetof(union bpf_attr, core_relo_rec_size),
18049 &expected_size, sizeof(expected_size)))
18050 err = -EFAULT;
18051 }
18052 break;
18053 }
18054
18055 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
18056 err = -EFAULT;
18057 break;
18058 }
18059
18060 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
18061 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
18062 i, core_relo.insn_off, prog->len);
18063 err = -EINVAL;
18064 break;
18065 }
18066
18067 err = bpf_core_apply(&ctx, &core_relo, i,
18068 &prog->insnsi[core_relo.insn_off / 8]);
18069 if (err)
18070 break;
18071 bpfptr_add(&u_core_relo, rec_size);
18072 }
18073 return err;
18074 }
18075
check_btf_info_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)18076 static int check_btf_info_early(struct bpf_verifier_env *env,
18077 const union bpf_attr *attr,
18078 bpfptr_t uattr)
18079 {
18080 struct btf *btf;
18081 int err;
18082
18083 if (!attr->func_info_cnt && !attr->line_info_cnt) {
18084 if (check_abnormal_return(env))
18085 return -EINVAL;
18086 return 0;
18087 }
18088
18089 btf = btf_get_by_fd(attr->prog_btf_fd);
18090 if (IS_ERR(btf))
18091 return PTR_ERR(btf);
18092 if (btf_is_kernel(btf)) {
18093 btf_put(btf);
18094 return -EACCES;
18095 }
18096 env->prog->aux->btf = btf;
18097
18098 err = check_btf_func_early(env, attr, uattr);
18099 if (err)
18100 return err;
18101 return 0;
18102 }
18103
check_btf_info(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)18104 static int check_btf_info(struct bpf_verifier_env *env,
18105 const union bpf_attr *attr,
18106 bpfptr_t uattr)
18107 {
18108 int err;
18109
18110 if (!attr->func_info_cnt && !attr->line_info_cnt) {
18111 if (check_abnormal_return(env))
18112 return -EINVAL;
18113 return 0;
18114 }
18115
18116 err = check_btf_func(env, attr, uattr);
18117 if (err)
18118 return err;
18119
18120 err = check_btf_line(env, attr, uattr);
18121 if (err)
18122 return err;
18123
18124 err = check_core_relo(env, attr, uattr);
18125 if (err)
18126 return err;
18127
18128 return 0;
18129 }
18130
18131 /* check %cur's range satisfies %old's */
range_within(const struct bpf_reg_state * old,const struct bpf_reg_state * cur)18132 static bool range_within(const struct bpf_reg_state *old,
18133 const struct bpf_reg_state *cur)
18134 {
18135 return old->umin_value <= cur->umin_value &&
18136 old->umax_value >= cur->umax_value &&
18137 old->smin_value <= cur->smin_value &&
18138 old->smax_value >= cur->smax_value &&
18139 old->u32_min_value <= cur->u32_min_value &&
18140 old->u32_max_value >= cur->u32_max_value &&
18141 old->s32_min_value <= cur->s32_min_value &&
18142 old->s32_max_value >= cur->s32_max_value;
18143 }
18144
18145 /* If in the old state two registers had the same id, then they need to have
18146 * the same id in the new state as well. But that id could be different from
18147 * the old state, so we need to track the mapping from old to new ids.
18148 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
18149 * regs with old id 5 must also have new id 9 for the new state to be safe. But
18150 * regs with a different old id could still have new id 9, we don't care about
18151 * that.
18152 * So we look through our idmap to see if this old id has been seen before. If
18153 * so, we require the new id to match; otherwise, we add the id pair to the map.
18154 */
check_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)18155 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
18156 {
18157 struct bpf_id_pair *map = idmap->map;
18158 unsigned int i;
18159
18160 /* either both IDs should be set or both should be zero */
18161 if (!!old_id != !!cur_id)
18162 return false;
18163
18164 if (old_id == 0) /* cur_id == 0 as well */
18165 return true;
18166
18167 for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
18168 if (!map[i].old) {
18169 /* Reached an empty slot; haven't seen this id before */
18170 map[i].old = old_id;
18171 map[i].cur = cur_id;
18172 return true;
18173 }
18174 if (map[i].old == old_id)
18175 return map[i].cur == cur_id;
18176 if (map[i].cur == cur_id)
18177 return false;
18178 }
18179 /* We ran out of idmap slots, which should be impossible */
18180 WARN_ON_ONCE(1);
18181 return false;
18182 }
18183
18184 /* Similar to check_ids(), but allocate a unique temporary ID
18185 * for 'old_id' or 'cur_id' of zero.
18186 * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
18187 */
check_scalar_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)18188 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
18189 {
18190 old_id = old_id ? old_id : ++idmap->tmp_id_gen;
18191 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
18192
18193 return check_ids(old_id, cur_id, idmap);
18194 }
18195
clean_func_state(struct bpf_verifier_env * env,struct bpf_func_state * st)18196 static void clean_func_state(struct bpf_verifier_env *env,
18197 struct bpf_func_state *st)
18198 {
18199 enum bpf_reg_liveness live;
18200 int i, j;
18201
18202 for (i = 0; i < BPF_REG_FP; i++) {
18203 live = st->regs[i].live;
18204 /* liveness must not touch this register anymore */
18205 st->regs[i].live |= REG_LIVE_DONE;
18206 if (!(live & REG_LIVE_READ))
18207 /* since the register is unused, clear its state
18208 * to make further comparison simpler
18209 */
18210 __mark_reg_not_init(env, &st->regs[i]);
18211 }
18212
18213 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
18214 live = st->stack[i].spilled_ptr.live;
18215 /* liveness must not touch this stack slot anymore */
18216 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
18217 if (!(live & REG_LIVE_READ)) {
18218 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
18219 for (j = 0; j < BPF_REG_SIZE; j++)
18220 st->stack[i].slot_type[j] = STACK_INVALID;
18221 }
18222 }
18223 }
18224
clean_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state * st)18225 static void clean_verifier_state(struct bpf_verifier_env *env,
18226 struct bpf_verifier_state *st)
18227 {
18228 int i;
18229
18230 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
18231 /* all regs in this state in all frames were already marked */
18232 return;
18233
18234 for (i = 0; i <= st->curframe; i++)
18235 clean_func_state(env, st->frame[i]);
18236 }
18237
18238 /* the parentage chains form a tree.
18239 * the verifier states are added to state lists at given insn and
18240 * pushed into state stack for future exploration.
18241 * when the verifier reaches bpf_exit insn some of the verifer states
18242 * stored in the state lists have their final liveness state already,
18243 * but a lot of states will get revised from liveness point of view when
18244 * the verifier explores other branches.
18245 * Example:
18246 * 1: r0 = 1
18247 * 2: if r1 == 100 goto pc+1
18248 * 3: r0 = 2
18249 * 4: exit
18250 * when the verifier reaches exit insn the register r0 in the state list of
18251 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
18252 * of insn 2 and goes exploring further. At the insn 4 it will walk the
18253 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
18254 *
18255 * Since the verifier pushes the branch states as it sees them while exploring
18256 * the program the condition of walking the branch instruction for the second
18257 * time means that all states below this branch were already explored and
18258 * their final liveness marks are already propagated.
18259 * Hence when the verifier completes the search of state list in is_state_visited()
18260 * we can call this clean_live_states() function to mark all liveness states
18261 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
18262 * will not be used.
18263 * This function also clears the registers and stack for states that !READ
18264 * to simplify state merging.
18265 *
18266 * Important note here that walking the same branch instruction in the callee
18267 * doesn't meant that the states are DONE. The verifier has to compare
18268 * the callsites
18269 */
clean_live_states(struct bpf_verifier_env * env,int insn,struct bpf_verifier_state * cur)18270 static void clean_live_states(struct bpf_verifier_env *env, int insn,
18271 struct bpf_verifier_state *cur)
18272 {
18273 struct bpf_verifier_state *loop_entry;
18274 struct bpf_verifier_state_list *sl;
18275 struct list_head *pos, *head;
18276
18277 head = explored_state(env, insn);
18278 list_for_each(pos, head) {
18279 sl = container_of(pos, struct bpf_verifier_state_list, node);
18280 if (sl->state.branches)
18281 continue;
18282 loop_entry = get_loop_entry(env, &sl->state);
18283 if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches)
18284 continue;
18285 if (sl->state.insn_idx != insn ||
18286 !same_callsites(&sl->state, cur))
18287 continue;
18288 clean_verifier_state(env, &sl->state);
18289 }
18290 }
18291
regs_exact(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur,struct bpf_idmap * idmap)18292 static bool regs_exact(const struct bpf_reg_state *rold,
18293 const struct bpf_reg_state *rcur,
18294 struct bpf_idmap *idmap)
18295 {
18296 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
18297 check_ids(rold->id, rcur->id, idmap) &&
18298 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
18299 }
18300
18301 enum exact_level {
18302 NOT_EXACT,
18303 EXACT,
18304 RANGE_WITHIN
18305 };
18306
18307 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap,enum exact_level exact)18308 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
18309 struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
18310 enum exact_level exact)
18311 {
18312 if (exact == EXACT)
18313 return regs_exact(rold, rcur, idmap);
18314
18315 if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
18316 /* explored state didn't use this */
18317 return true;
18318 if (rold->type == NOT_INIT) {
18319 if (exact == NOT_EXACT || rcur->type == NOT_INIT)
18320 /* explored state can't have used this */
18321 return true;
18322 }
18323
18324 /* Enforce that register types have to match exactly, including their
18325 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
18326 * rule.
18327 *
18328 * One can make a point that using a pointer register as unbounded
18329 * SCALAR would be technically acceptable, but this could lead to
18330 * pointer leaks because scalars are allowed to leak while pointers
18331 * are not. We could make this safe in special cases if root is
18332 * calling us, but it's probably not worth the hassle.
18333 *
18334 * Also, register types that are *not* MAYBE_NULL could technically be
18335 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
18336 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
18337 * to the same map).
18338 * However, if the old MAYBE_NULL register then got NULL checked,
18339 * doing so could have affected others with the same id, and we can't
18340 * check for that because we lost the id when we converted to
18341 * a non-MAYBE_NULL variant.
18342 * So, as a general rule we don't allow mixing MAYBE_NULL and
18343 * non-MAYBE_NULL registers as well.
18344 */
18345 if (rold->type != rcur->type)
18346 return false;
18347
18348 switch (base_type(rold->type)) {
18349 case SCALAR_VALUE:
18350 if (env->explore_alu_limits) {
18351 /* explore_alu_limits disables tnum_in() and range_within()
18352 * logic and requires everything to be strict
18353 */
18354 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
18355 check_scalar_ids(rold->id, rcur->id, idmap);
18356 }
18357 if (!rold->precise && exact == NOT_EXACT)
18358 return true;
18359 if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
18360 return false;
18361 if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off))
18362 return false;
18363 /* Why check_ids() for scalar registers?
18364 *
18365 * Consider the following BPF code:
18366 * 1: r6 = ... unbound scalar, ID=a ...
18367 * 2: r7 = ... unbound scalar, ID=b ...
18368 * 3: if (r6 > r7) goto +1
18369 * 4: r6 = r7
18370 * 5: if (r6 > X) goto ...
18371 * 6: ... memory operation using r7 ...
18372 *
18373 * First verification path is [1-6]:
18374 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
18375 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
18376 * r7 <= X, because r6 and r7 share same id.
18377 * Next verification path is [1-4, 6].
18378 *
18379 * Instruction (6) would be reached in two states:
18380 * I. r6{.id=b}, r7{.id=b} via path 1-6;
18381 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
18382 *
18383 * Use check_ids() to distinguish these states.
18384 * ---
18385 * Also verify that new value satisfies old value range knowledge.
18386 */
18387 return range_within(rold, rcur) &&
18388 tnum_in(rold->var_off, rcur->var_off) &&
18389 check_scalar_ids(rold->id, rcur->id, idmap);
18390 case PTR_TO_MAP_KEY:
18391 case PTR_TO_MAP_VALUE:
18392 case PTR_TO_MEM:
18393 case PTR_TO_BUF:
18394 case PTR_TO_TP_BUFFER:
18395 /* If the new min/max/var_off satisfy the old ones and
18396 * everything else matches, we are OK.
18397 */
18398 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
18399 range_within(rold, rcur) &&
18400 tnum_in(rold->var_off, rcur->var_off) &&
18401 check_ids(rold->id, rcur->id, idmap) &&
18402 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
18403 case PTR_TO_PACKET_META:
18404 case PTR_TO_PACKET:
18405 /* We must have at least as much range as the old ptr
18406 * did, so that any accesses which were safe before are
18407 * still safe. This is true even if old range < old off,
18408 * since someone could have accessed through (ptr - k), or
18409 * even done ptr -= k in a register, to get a safe access.
18410 */
18411 if (rold->range > rcur->range)
18412 return false;
18413 /* If the offsets don't match, we can't trust our alignment;
18414 * nor can we be sure that we won't fall out of range.
18415 */
18416 if (rold->off != rcur->off)
18417 return false;
18418 /* id relations must be preserved */
18419 if (!check_ids(rold->id, rcur->id, idmap))
18420 return false;
18421 /* new val must satisfy old val knowledge */
18422 return range_within(rold, rcur) &&
18423 tnum_in(rold->var_off, rcur->var_off);
18424 case PTR_TO_STACK:
18425 /* two stack pointers are equal only if they're pointing to
18426 * the same stack frame, since fp-8 in foo != fp-8 in bar
18427 */
18428 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
18429 case PTR_TO_ARENA:
18430 return true;
18431 default:
18432 return regs_exact(rold, rcur, idmap);
18433 }
18434 }
18435
18436 static struct bpf_reg_state unbound_reg;
18437
unbound_reg_init(void)18438 static __init int unbound_reg_init(void)
18439 {
18440 __mark_reg_unknown_imprecise(&unbound_reg);
18441 unbound_reg.live |= REG_LIVE_READ;
18442 return 0;
18443 }
18444 late_initcall(unbound_reg_init);
18445
is_stack_all_misc(struct bpf_verifier_env * env,struct bpf_stack_state * stack)18446 static bool is_stack_all_misc(struct bpf_verifier_env *env,
18447 struct bpf_stack_state *stack)
18448 {
18449 u32 i;
18450
18451 for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
18452 if ((stack->slot_type[i] == STACK_MISC) ||
18453 (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
18454 continue;
18455 return false;
18456 }
18457
18458 return true;
18459 }
18460
scalar_reg_for_stack(struct bpf_verifier_env * env,struct bpf_stack_state * stack)18461 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
18462 struct bpf_stack_state *stack)
18463 {
18464 if (is_spilled_scalar_reg64(stack))
18465 return &stack->spilled_ptr;
18466
18467 if (is_stack_all_misc(env, stack))
18468 return &unbound_reg;
18469
18470 return NULL;
18471 }
18472
stacksafe(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_idmap * idmap,enum exact_level exact)18473 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
18474 struct bpf_func_state *cur, struct bpf_idmap *idmap,
18475 enum exact_level exact)
18476 {
18477 int i, spi;
18478
18479 /* walk slots of the explored stack and ignore any additional
18480 * slots in the current stack, since explored(safe) state
18481 * didn't use them
18482 */
18483 for (i = 0; i < old->allocated_stack; i++) {
18484 struct bpf_reg_state *old_reg, *cur_reg;
18485
18486 spi = i / BPF_REG_SIZE;
18487
18488 if (exact != NOT_EXACT &&
18489 (i >= cur->allocated_stack ||
18490 old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
18491 cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
18492 return false;
18493
18494 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
18495 && exact == NOT_EXACT) {
18496 i += BPF_REG_SIZE - 1;
18497 /* explored state didn't use this */
18498 continue;
18499 }
18500
18501 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
18502 continue;
18503
18504 if (env->allow_uninit_stack &&
18505 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
18506 continue;
18507
18508 /* explored stack has more populated slots than current stack
18509 * and these slots were used
18510 */
18511 if (i >= cur->allocated_stack)
18512 return false;
18513
18514 /* 64-bit scalar spill vs all slots MISC and vice versa.
18515 * Load from all slots MISC produces unbound scalar.
18516 * Construct a fake register for such stack and call
18517 * regsafe() to ensure scalar ids are compared.
18518 */
18519 old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
18520 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
18521 if (old_reg && cur_reg) {
18522 if (!regsafe(env, old_reg, cur_reg, idmap, exact))
18523 return false;
18524 i += BPF_REG_SIZE - 1;
18525 continue;
18526 }
18527
18528 /* if old state was safe with misc data in the stack
18529 * it will be safe with zero-initialized stack.
18530 * The opposite is not true
18531 */
18532 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
18533 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
18534 continue;
18535 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
18536 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
18537 /* Ex: old explored (safe) state has STACK_SPILL in
18538 * this stack slot, but current has STACK_MISC ->
18539 * this verifier states are not equivalent,
18540 * return false to continue verification of this path
18541 */
18542 return false;
18543 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
18544 continue;
18545 /* Both old and cur are having same slot_type */
18546 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
18547 case STACK_SPILL:
18548 /* when explored and current stack slot are both storing
18549 * spilled registers, check that stored pointers types
18550 * are the same as well.
18551 * Ex: explored safe path could have stored
18552 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
18553 * but current path has stored:
18554 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
18555 * such verifier states are not equivalent.
18556 * return false to continue verification of this path
18557 */
18558 if (!regsafe(env, &old->stack[spi].spilled_ptr,
18559 &cur->stack[spi].spilled_ptr, idmap, exact))
18560 return false;
18561 break;
18562 case STACK_DYNPTR:
18563 old_reg = &old->stack[spi].spilled_ptr;
18564 cur_reg = &cur->stack[spi].spilled_ptr;
18565 if (old_reg->dynptr.type != cur_reg->dynptr.type ||
18566 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
18567 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
18568 return false;
18569 break;
18570 case STACK_ITER:
18571 old_reg = &old->stack[spi].spilled_ptr;
18572 cur_reg = &cur->stack[spi].spilled_ptr;
18573 /* iter.depth is not compared between states as it
18574 * doesn't matter for correctness and would otherwise
18575 * prevent convergence; we maintain it only to prevent
18576 * infinite loop check triggering, see
18577 * iter_active_depths_differ()
18578 */
18579 if (old_reg->iter.btf != cur_reg->iter.btf ||
18580 old_reg->iter.btf_id != cur_reg->iter.btf_id ||
18581 old_reg->iter.state != cur_reg->iter.state ||
18582 /* ignore {old_reg,cur_reg}->iter.depth, see above */
18583 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
18584 return false;
18585 break;
18586 case STACK_IRQ_FLAG:
18587 old_reg = &old->stack[spi].spilled_ptr;
18588 cur_reg = &cur->stack[spi].spilled_ptr;
18589 if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
18590 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
18591 return false;
18592 break;
18593 case STACK_MISC:
18594 case STACK_ZERO:
18595 case STACK_INVALID:
18596 continue;
18597 /* Ensure that new unhandled slot types return false by default */
18598 default:
18599 return false;
18600 }
18601 }
18602 return true;
18603 }
18604
refsafe(struct bpf_verifier_state * old,struct bpf_verifier_state * cur,struct bpf_idmap * idmap)18605 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
18606 struct bpf_idmap *idmap)
18607 {
18608 int i;
18609
18610 if (old->acquired_refs != cur->acquired_refs)
18611 return false;
18612
18613 if (old->active_locks != cur->active_locks)
18614 return false;
18615
18616 if (old->active_preempt_locks != cur->active_preempt_locks)
18617 return false;
18618
18619 if (old->active_rcu_lock != cur->active_rcu_lock)
18620 return false;
18621
18622 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
18623 return false;
18624
18625 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
18626 old->active_lock_ptr != cur->active_lock_ptr)
18627 return false;
18628
18629 for (i = 0; i < old->acquired_refs; i++) {
18630 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
18631 old->refs[i].type != cur->refs[i].type)
18632 return false;
18633 switch (old->refs[i].type) {
18634 case REF_TYPE_PTR:
18635 case REF_TYPE_IRQ:
18636 break;
18637 case REF_TYPE_LOCK:
18638 case REF_TYPE_RES_LOCK:
18639 case REF_TYPE_RES_LOCK_IRQ:
18640 if (old->refs[i].ptr != cur->refs[i].ptr)
18641 return false;
18642 break;
18643 default:
18644 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
18645 return false;
18646 }
18647 }
18648
18649 return true;
18650 }
18651
18652 /* compare two verifier states
18653 *
18654 * all states stored in state_list are known to be valid, since
18655 * verifier reached 'bpf_exit' instruction through them
18656 *
18657 * this function is called when verifier exploring different branches of
18658 * execution popped from the state stack. If it sees an old state that has
18659 * more strict register state and more strict stack state then this execution
18660 * branch doesn't need to be explored further, since verifier already
18661 * concluded that more strict state leads to valid finish.
18662 *
18663 * Therefore two states are equivalent if register state is more conservative
18664 * and explored stack state is more conservative than the current one.
18665 * Example:
18666 * explored current
18667 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
18668 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
18669 *
18670 * In other words if current stack state (one being explored) has more
18671 * valid slots than old one that already passed validation, it means
18672 * the verifier can stop exploring and conclude that current state is valid too
18673 *
18674 * Similarly with registers. If explored state has register type as invalid
18675 * whereas register type in current state is meaningful, it means that
18676 * the current state will reach 'bpf_exit' instruction safely
18677 */
func_states_equal(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,u32 insn_idx,enum exact_level exact)18678 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
18679 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
18680 {
18681 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
18682 u16 i;
18683
18684 if (old->callback_depth > cur->callback_depth)
18685 return false;
18686
18687 for (i = 0; i < MAX_BPF_REG; i++)
18688 if (((1 << i) & live_regs) &&
18689 !regsafe(env, &old->regs[i], &cur->regs[i],
18690 &env->idmap_scratch, exact))
18691 return false;
18692
18693 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
18694 return false;
18695
18696 return true;
18697 }
18698
reset_idmap_scratch(struct bpf_verifier_env * env)18699 static void reset_idmap_scratch(struct bpf_verifier_env *env)
18700 {
18701 env->idmap_scratch.tmp_id_gen = env->id_gen;
18702 memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
18703 }
18704
states_equal(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur,enum exact_level exact)18705 static bool states_equal(struct bpf_verifier_env *env,
18706 struct bpf_verifier_state *old,
18707 struct bpf_verifier_state *cur,
18708 enum exact_level exact)
18709 {
18710 u32 insn_idx;
18711 int i;
18712
18713 if (old->curframe != cur->curframe)
18714 return false;
18715
18716 reset_idmap_scratch(env);
18717
18718 /* Verification state from speculative execution simulation
18719 * must never prune a non-speculative execution one.
18720 */
18721 if (old->speculative && !cur->speculative)
18722 return false;
18723
18724 if (old->in_sleepable != cur->in_sleepable)
18725 return false;
18726
18727 if (!refsafe(old, cur, &env->idmap_scratch))
18728 return false;
18729
18730 /* for states to be equal callsites have to be the same
18731 * and all frame states need to be equivalent
18732 */
18733 for (i = 0; i <= old->curframe; i++) {
18734 insn_idx = i == old->curframe
18735 ? env->insn_idx
18736 : old->frame[i + 1]->callsite;
18737 if (old->frame[i]->callsite != cur->frame[i]->callsite)
18738 return false;
18739 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
18740 return false;
18741 }
18742 return true;
18743 }
18744
18745 /* Return 0 if no propagation happened. Return negative error code if error
18746 * happened. Otherwise, return the propagated bit.
18747 */
propagate_liveness_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_reg_state * parent_reg)18748 static int propagate_liveness_reg(struct bpf_verifier_env *env,
18749 struct bpf_reg_state *reg,
18750 struct bpf_reg_state *parent_reg)
18751 {
18752 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
18753 u8 flag = reg->live & REG_LIVE_READ;
18754 int err;
18755
18756 /* When comes here, read flags of PARENT_REG or REG could be any of
18757 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
18758 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
18759 */
18760 if (parent_flag == REG_LIVE_READ64 ||
18761 /* Or if there is no read flag from REG. */
18762 !flag ||
18763 /* Or if the read flag from REG is the same as PARENT_REG. */
18764 parent_flag == flag)
18765 return 0;
18766
18767 err = mark_reg_read(env, reg, parent_reg, flag);
18768 if (err)
18769 return err;
18770
18771 return flag;
18772 }
18773
18774 /* A write screens off any subsequent reads; but write marks come from the
18775 * straight-line code between a state and its parent. When we arrive at an
18776 * equivalent state (jump target or such) we didn't arrive by the straight-line
18777 * code, so read marks in the state must propagate to the parent regardless
18778 * of the state's write marks. That's what 'parent == state->parent' comparison
18779 * in mark_reg_read() is for.
18780 */
propagate_liveness(struct bpf_verifier_env * env,const struct bpf_verifier_state * vstate,struct bpf_verifier_state * vparent)18781 static int propagate_liveness(struct bpf_verifier_env *env,
18782 const struct bpf_verifier_state *vstate,
18783 struct bpf_verifier_state *vparent)
18784 {
18785 struct bpf_reg_state *state_reg, *parent_reg;
18786 struct bpf_func_state *state, *parent;
18787 int i, frame, err = 0;
18788
18789 if (vparent->curframe != vstate->curframe) {
18790 WARN(1, "propagate_live: parent frame %d current frame %d\n",
18791 vparent->curframe, vstate->curframe);
18792 return -EFAULT;
18793 }
18794 /* Propagate read liveness of registers... */
18795 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
18796 for (frame = 0; frame <= vstate->curframe; frame++) {
18797 parent = vparent->frame[frame];
18798 state = vstate->frame[frame];
18799 parent_reg = parent->regs;
18800 state_reg = state->regs;
18801 /* We don't need to worry about FP liveness, it's read-only */
18802 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
18803 err = propagate_liveness_reg(env, &state_reg[i],
18804 &parent_reg[i]);
18805 if (err < 0)
18806 return err;
18807 if (err == REG_LIVE_READ64)
18808 mark_insn_zext(env, &parent_reg[i]);
18809 }
18810
18811 /* Propagate stack slots. */
18812 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
18813 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
18814 parent_reg = &parent->stack[i].spilled_ptr;
18815 state_reg = &state->stack[i].spilled_ptr;
18816 err = propagate_liveness_reg(env, state_reg,
18817 parent_reg);
18818 if (err < 0)
18819 return err;
18820 }
18821 }
18822 return 0;
18823 }
18824
18825 /* find precise scalars in the previous equivalent state and
18826 * propagate them into the current state
18827 */
propagate_precision(struct bpf_verifier_env * env,const struct bpf_verifier_state * old)18828 static int propagate_precision(struct bpf_verifier_env *env,
18829 const struct bpf_verifier_state *old)
18830 {
18831 struct bpf_reg_state *state_reg;
18832 struct bpf_func_state *state;
18833 int i, err = 0, fr;
18834 bool first;
18835
18836 for (fr = old->curframe; fr >= 0; fr--) {
18837 state = old->frame[fr];
18838 state_reg = state->regs;
18839 first = true;
18840 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
18841 if (state_reg->type != SCALAR_VALUE ||
18842 !state_reg->precise ||
18843 !(state_reg->live & REG_LIVE_READ))
18844 continue;
18845 if (env->log.level & BPF_LOG_LEVEL2) {
18846 if (first)
18847 verbose(env, "frame %d: propagating r%d", fr, i);
18848 else
18849 verbose(env, ",r%d", i);
18850 }
18851 bt_set_frame_reg(&env->bt, fr, i);
18852 first = false;
18853 }
18854
18855 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
18856 if (!is_spilled_reg(&state->stack[i]))
18857 continue;
18858 state_reg = &state->stack[i].spilled_ptr;
18859 if (state_reg->type != SCALAR_VALUE ||
18860 !state_reg->precise ||
18861 !(state_reg->live & REG_LIVE_READ))
18862 continue;
18863 if (env->log.level & BPF_LOG_LEVEL2) {
18864 if (first)
18865 verbose(env, "frame %d: propagating fp%d",
18866 fr, (-i - 1) * BPF_REG_SIZE);
18867 else
18868 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
18869 }
18870 bt_set_frame_slot(&env->bt, fr, i);
18871 first = false;
18872 }
18873 if (!first)
18874 verbose(env, "\n");
18875 }
18876
18877 err = mark_chain_precision_batch(env);
18878 if (err < 0)
18879 return err;
18880
18881 return 0;
18882 }
18883
states_maybe_looping(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)18884 static bool states_maybe_looping(struct bpf_verifier_state *old,
18885 struct bpf_verifier_state *cur)
18886 {
18887 struct bpf_func_state *fold, *fcur;
18888 int i, fr = cur->curframe;
18889
18890 if (old->curframe != fr)
18891 return false;
18892
18893 fold = old->frame[fr];
18894 fcur = cur->frame[fr];
18895 for (i = 0; i < MAX_BPF_REG; i++)
18896 if (memcmp(&fold->regs[i], &fcur->regs[i],
18897 offsetof(struct bpf_reg_state, parent)))
18898 return false;
18899 return true;
18900 }
18901
is_iter_next_insn(struct bpf_verifier_env * env,int insn_idx)18902 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
18903 {
18904 return env->insn_aux_data[insn_idx].is_iter_next;
18905 }
18906
18907 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
18908 * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
18909 * states to match, which otherwise would look like an infinite loop. So while
18910 * iter_next() calls are taken care of, we still need to be careful and
18911 * prevent erroneous and too eager declaration of "ininite loop", when
18912 * iterators are involved.
18913 *
18914 * Here's a situation in pseudo-BPF assembly form:
18915 *
18916 * 0: again: ; set up iter_next() call args
18917 * 1: r1 = &it ; <CHECKPOINT HERE>
18918 * 2: call bpf_iter_num_next ; this is iter_next() call
18919 * 3: if r0 == 0 goto done
18920 * 4: ... something useful here ...
18921 * 5: goto again ; another iteration
18922 * 6: done:
18923 * 7: r1 = &it
18924 * 8: call bpf_iter_num_destroy ; clean up iter state
18925 * 9: exit
18926 *
18927 * This is a typical loop. Let's assume that we have a prune point at 1:,
18928 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
18929 * again`, assuming other heuristics don't get in a way).
18930 *
18931 * When we first time come to 1:, let's say we have some state X. We proceed
18932 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
18933 * Now we come back to validate that forked ACTIVE state. We proceed through
18934 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
18935 * are converging. But the problem is that we don't know that yet, as this
18936 * convergence has to happen at iter_next() call site only. So if nothing is
18937 * done, at 1: verifier will use bounded loop logic and declare infinite
18938 * looping (and would be *technically* correct, if not for iterator's
18939 * "eventual sticky NULL" contract, see process_iter_next_call()). But we
18940 * don't want that. So what we do in process_iter_next_call() when we go on
18941 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
18942 * a different iteration. So when we suspect an infinite loop, we additionally
18943 * check if any of the *ACTIVE* iterator states depths differ. If yes, we
18944 * pretend we are not looping and wait for next iter_next() call.
18945 *
18946 * This only applies to ACTIVE state. In DRAINED state we don't expect to
18947 * loop, because that would actually mean infinite loop, as DRAINED state is
18948 * "sticky", and so we'll keep returning into the same instruction with the
18949 * same state (at least in one of possible code paths).
18950 *
18951 * This approach allows to keep infinite loop heuristic even in the face of
18952 * active iterator. E.g., C snippet below is and will be detected as
18953 * inifintely looping:
18954 *
18955 * struct bpf_iter_num it;
18956 * int *p, x;
18957 *
18958 * bpf_iter_num_new(&it, 0, 10);
18959 * while ((p = bpf_iter_num_next(&t))) {
18960 * x = p;
18961 * while (x--) {} // <<-- infinite loop here
18962 * }
18963 *
18964 */
iter_active_depths_differ(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)18965 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
18966 {
18967 struct bpf_reg_state *slot, *cur_slot;
18968 struct bpf_func_state *state;
18969 int i, fr;
18970
18971 for (fr = old->curframe; fr >= 0; fr--) {
18972 state = old->frame[fr];
18973 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
18974 if (state->stack[i].slot_type[0] != STACK_ITER)
18975 continue;
18976
18977 slot = &state->stack[i].spilled_ptr;
18978 if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
18979 continue;
18980
18981 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
18982 if (cur_slot->iter.depth != slot->iter.depth)
18983 return true;
18984 }
18985 }
18986 return false;
18987 }
18988
is_state_visited(struct bpf_verifier_env * env,int insn_idx)18989 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
18990 {
18991 struct bpf_verifier_state_list *new_sl;
18992 struct bpf_verifier_state_list *sl;
18993 struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
18994 int i, j, n, err, states_cnt = 0;
18995 bool force_new_state, add_new_state, force_exact;
18996 struct list_head *pos, *tmp, *head;
18997
18998 force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
18999 /* Avoid accumulating infinitely long jmp history */
19000 cur->insn_hist_end - cur->insn_hist_start > 40;
19001
19002 /* bpf progs typically have pruning point every 4 instructions
19003 * http://vger.kernel.org/bpfconf2019.html#session-1
19004 * Do not add new state for future pruning if the verifier hasn't seen
19005 * at least 2 jumps and at least 8 instructions.
19006 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
19007 * In tests that amounts to up to 50% reduction into total verifier
19008 * memory consumption and 20% verifier time speedup.
19009 */
19010 add_new_state = force_new_state;
19011 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
19012 env->insn_processed - env->prev_insn_processed >= 8)
19013 add_new_state = true;
19014
19015 clean_live_states(env, insn_idx, cur);
19016
19017 head = explored_state(env, insn_idx);
19018 list_for_each_safe(pos, tmp, head) {
19019 sl = container_of(pos, struct bpf_verifier_state_list, node);
19020 states_cnt++;
19021 if (sl->state.insn_idx != insn_idx)
19022 continue;
19023
19024 if (sl->state.branches) {
19025 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
19026
19027 if (frame->in_async_callback_fn &&
19028 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
19029 /* Different async_entry_cnt means that the verifier is
19030 * processing another entry into async callback.
19031 * Seeing the same state is not an indication of infinite
19032 * loop or infinite recursion.
19033 * But finding the same state doesn't mean that it's safe
19034 * to stop processing the current state. The previous state
19035 * hasn't yet reached bpf_exit, since state.branches > 0.
19036 * Checking in_async_callback_fn alone is not enough either.
19037 * Since the verifier still needs to catch infinite loops
19038 * inside async callbacks.
19039 */
19040 goto skip_inf_loop_check;
19041 }
19042 /* BPF open-coded iterators loop detection is special.
19043 * states_maybe_looping() logic is too simplistic in detecting
19044 * states that *might* be equivalent, because it doesn't know
19045 * about ID remapping, so don't even perform it.
19046 * See process_iter_next_call() and iter_active_depths_differ()
19047 * for overview of the logic. When current and one of parent
19048 * states are detected as equivalent, it's a good thing: we prove
19049 * convergence and can stop simulating further iterations.
19050 * It's safe to assume that iterator loop will finish, taking into
19051 * account iter_next() contract of eventually returning
19052 * sticky NULL result.
19053 *
19054 * Note, that states have to be compared exactly in this case because
19055 * read and precision marks might not be finalized inside the loop.
19056 * E.g. as in the program below:
19057 *
19058 * 1. r7 = -16
19059 * 2. r6 = bpf_get_prandom_u32()
19060 * 3. while (bpf_iter_num_next(&fp[-8])) {
19061 * 4. if (r6 != 42) {
19062 * 5. r7 = -32
19063 * 6. r6 = bpf_get_prandom_u32()
19064 * 7. continue
19065 * 8. }
19066 * 9. r0 = r10
19067 * 10. r0 += r7
19068 * 11. r8 = *(u64 *)(r0 + 0)
19069 * 12. r6 = bpf_get_prandom_u32()
19070 * 13. }
19071 *
19072 * Here verifier would first visit path 1-3, create a checkpoint at 3
19073 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
19074 * not have read or precision mark for r7 yet, thus inexact states
19075 * comparison would discard current state with r7=-32
19076 * => unsafe memory access at 11 would not be caught.
19077 */
19078 if (is_iter_next_insn(env, insn_idx)) {
19079 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
19080 struct bpf_func_state *cur_frame;
19081 struct bpf_reg_state *iter_state, *iter_reg;
19082 int spi;
19083
19084 cur_frame = cur->frame[cur->curframe];
19085 /* btf_check_iter_kfuncs() enforces that
19086 * iter state pointer is always the first arg
19087 */
19088 iter_reg = &cur_frame->regs[BPF_REG_1];
19089 /* current state is valid due to states_equal(),
19090 * so we can assume valid iter and reg state,
19091 * no need for extra (re-)validations
19092 */
19093 spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
19094 iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
19095 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
19096 update_loop_entry(env, cur, &sl->state);
19097 goto hit;
19098 }
19099 }
19100 goto skip_inf_loop_check;
19101 }
19102 if (is_may_goto_insn_at(env, insn_idx)) {
19103 if (sl->state.may_goto_depth != cur->may_goto_depth &&
19104 states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
19105 update_loop_entry(env, cur, &sl->state);
19106 goto hit;
19107 }
19108 }
19109 if (calls_callback(env, insn_idx)) {
19110 if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
19111 goto hit;
19112 goto skip_inf_loop_check;
19113 }
19114 /* attempt to detect infinite loop to avoid unnecessary doomed work */
19115 if (states_maybe_looping(&sl->state, cur) &&
19116 states_equal(env, &sl->state, cur, EXACT) &&
19117 !iter_active_depths_differ(&sl->state, cur) &&
19118 sl->state.may_goto_depth == cur->may_goto_depth &&
19119 sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
19120 verbose_linfo(env, insn_idx, "; ");
19121 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
19122 verbose(env, "cur state:");
19123 print_verifier_state(env, cur, cur->curframe, true);
19124 verbose(env, "old state:");
19125 print_verifier_state(env, &sl->state, cur->curframe, true);
19126 return -EINVAL;
19127 }
19128 /* if the verifier is processing a loop, avoid adding new state
19129 * too often, since different loop iterations have distinct
19130 * states and may not help future pruning.
19131 * This threshold shouldn't be too low to make sure that
19132 * a loop with large bound will be rejected quickly.
19133 * The most abusive loop will be:
19134 * r1 += 1
19135 * if r1 < 1000000 goto pc-2
19136 * 1M insn_procssed limit / 100 == 10k peak states.
19137 * This threshold shouldn't be too high either, since states
19138 * at the end of the loop are likely to be useful in pruning.
19139 */
19140 skip_inf_loop_check:
19141 if (!force_new_state &&
19142 env->jmps_processed - env->prev_jmps_processed < 20 &&
19143 env->insn_processed - env->prev_insn_processed < 100)
19144 add_new_state = false;
19145 goto miss;
19146 }
19147 /* If sl->state is a part of a loop and this loop's entry is a part of
19148 * current verification path then states have to be compared exactly.
19149 * 'force_exact' is needed to catch the following case:
19150 *
19151 * initial Here state 'succ' was processed first,
19152 * | it was eventually tracked to produce a
19153 * V state identical to 'hdr'.
19154 * .---------> hdr All branches from 'succ' had been explored
19155 * | | and thus 'succ' has its .branches == 0.
19156 * | V
19157 * | .------... Suppose states 'cur' and 'succ' correspond
19158 * | | | to the same instruction + callsites.
19159 * | V V In such case it is necessary to check
19160 * | ... ... if 'succ' and 'cur' are states_equal().
19161 * | | | If 'succ' and 'cur' are a part of the
19162 * | V V same loop exact flag has to be set.
19163 * | succ <- cur To check if that is the case, verify
19164 * | | if loop entry of 'succ' is in current
19165 * | V DFS path.
19166 * | ...
19167 * | |
19168 * '----'
19169 *
19170 * Additional details are in the comment before get_loop_entry().
19171 */
19172 loop_entry = get_loop_entry(env, &sl->state);
19173 if (IS_ERR(loop_entry))
19174 return PTR_ERR(loop_entry);
19175 force_exact = loop_entry && loop_entry->branches > 0;
19176 if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
19177 if (force_exact)
19178 update_loop_entry(env, cur, loop_entry);
19179 hit:
19180 sl->hit_cnt++;
19181 /* reached equivalent register/stack state,
19182 * prune the search.
19183 * Registers read by the continuation are read by us.
19184 * If we have any write marks in env->cur_state, they
19185 * will prevent corresponding reads in the continuation
19186 * from reaching our parent (an explored_state). Our
19187 * own state will get the read marks recorded, but
19188 * they'll be immediately forgotten as we're pruning
19189 * this state and will pop a new one.
19190 */
19191 err = propagate_liveness(env, &sl->state, cur);
19192
19193 /* if previous state reached the exit with precision and
19194 * current state is equivalent to it (except precision marks)
19195 * the precision needs to be propagated back in
19196 * the current state.
19197 */
19198 if (is_jmp_point(env, env->insn_idx))
19199 err = err ? : push_insn_history(env, cur, 0, 0);
19200 err = err ? : propagate_precision(env, &sl->state);
19201 if (err)
19202 return err;
19203 return 1;
19204 }
19205 miss:
19206 /* when new state is not going to be added do not increase miss count.
19207 * Otherwise several loop iterations will remove the state
19208 * recorded earlier. The goal of these heuristics is to have
19209 * states from some iterations of the loop (some in the beginning
19210 * and some at the end) to help pruning.
19211 */
19212 if (add_new_state)
19213 sl->miss_cnt++;
19214 /* heuristic to determine whether this state is beneficial
19215 * to keep checking from state equivalence point of view.
19216 * Higher numbers increase max_states_per_insn and verification time,
19217 * but do not meaningfully decrease insn_processed.
19218 * 'n' controls how many times state could miss before eviction.
19219 * Use bigger 'n' for checkpoints because evicting checkpoint states
19220 * too early would hinder iterator convergence.
19221 */
19222 n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
19223 if (sl->miss_cnt > sl->hit_cnt * n + n) {
19224 /* the state is unlikely to be useful. Remove it to
19225 * speed up verification
19226 */
19227 sl->in_free_list = true;
19228 list_del(&sl->node);
19229 list_add(&sl->node, &env->free_list);
19230 env->free_list_size++;
19231 env->explored_states_size--;
19232 maybe_free_verifier_state(env, sl);
19233 }
19234 }
19235
19236 if (env->max_states_per_insn < states_cnt)
19237 env->max_states_per_insn = states_cnt;
19238
19239 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
19240 return 0;
19241
19242 if (!add_new_state)
19243 return 0;
19244
19245 /* There were no equivalent states, remember the current one.
19246 * Technically the current state is not proven to be safe yet,
19247 * but it will either reach outer most bpf_exit (which means it's safe)
19248 * or it will be rejected. When there are no loops the verifier won't be
19249 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
19250 * again on the way to bpf_exit.
19251 * When looping the sl->state.branches will be > 0 and this state
19252 * will not be considered for equivalence until branches == 0.
19253 */
19254 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
19255 if (!new_sl)
19256 return -ENOMEM;
19257 env->total_states++;
19258 env->explored_states_size++;
19259 update_peak_states(env);
19260 env->prev_jmps_processed = env->jmps_processed;
19261 env->prev_insn_processed = env->insn_processed;
19262
19263 /* forget precise markings we inherited, see __mark_chain_precision */
19264 if (env->bpf_capable)
19265 mark_all_scalars_imprecise(env, cur);
19266
19267 /* add new state to the head of linked list */
19268 new = &new_sl->state;
19269 err = copy_verifier_state(new, cur);
19270 if (err) {
19271 free_verifier_state(new, false);
19272 kfree(new_sl);
19273 return err;
19274 }
19275 new->insn_idx = insn_idx;
19276 WARN_ONCE(new->branches != 1,
19277 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
19278
19279 cur->parent = new;
19280 cur->first_insn_idx = insn_idx;
19281 cur->insn_hist_start = cur->insn_hist_end;
19282 cur->dfs_depth = new->dfs_depth + 1;
19283 list_add(&new_sl->node, head);
19284
19285 /* connect new state to parentage chain. Current frame needs all
19286 * registers connected. Only r6 - r9 of the callers are alive (pushed
19287 * to the stack implicitly by JITs) so in callers' frames connect just
19288 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
19289 * the state of the call instruction (with WRITTEN set), and r0 comes
19290 * from callee with its full parentage chain, anyway.
19291 */
19292 /* clear write marks in current state: the writes we did are not writes
19293 * our child did, so they don't screen off its reads from us.
19294 * (There are no read marks in current state, because reads always mark
19295 * their parent and current state never has children yet. Only
19296 * explored_states can get read marks.)
19297 */
19298 for (j = 0; j <= cur->curframe; j++) {
19299 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
19300 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
19301 for (i = 0; i < BPF_REG_FP; i++)
19302 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
19303 }
19304
19305 /* all stack frames are accessible from callee, clear them all */
19306 for (j = 0; j <= cur->curframe; j++) {
19307 struct bpf_func_state *frame = cur->frame[j];
19308 struct bpf_func_state *newframe = new->frame[j];
19309
19310 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
19311 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
19312 frame->stack[i].spilled_ptr.parent =
19313 &newframe->stack[i].spilled_ptr;
19314 }
19315 }
19316 return 0;
19317 }
19318
19319 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)19320 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
19321 {
19322 switch (base_type(type)) {
19323 case PTR_TO_CTX:
19324 case PTR_TO_SOCKET:
19325 case PTR_TO_SOCK_COMMON:
19326 case PTR_TO_TCP_SOCK:
19327 case PTR_TO_XDP_SOCK:
19328 case PTR_TO_BTF_ID:
19329 case PTR_TO_ARENA:
19330 return false;
19331 default:
19332 return true;
19333 }
19334 }
19335
19336 /* If an instruction was previously used with particular pointer types, then we
19337 * need to be careful to avoid cases such as the below, where it may be ok
19338 * for one branch accessing the pointer, but not ok for the other branch:
19339 *
19340 * R1 = sock_ptr
19341 * goto X;
19342 * ...
19343 * R1 = some_other_valid_ptr;
19344 * goto X;
19345 * ...
19346 * R2 = *(u32 *)(R1 + 0);
19347 */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)19348 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
19349 {
19350 return src != prev && (!reg_type_mismatch_ok(src) ||
19351 !reg_type_mismatch_ok(prev));
19352 }
19353
save_aux_ptr_type(struct bpf_verifier_env * env,enum bpf_reg_type type,bool allow_trust_mismatch)19354 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
19355 bool allow_trust_mismatch)
19356 {
19357 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
19358
19359 if (*prev_type == NOT_INIT) {
19360 /* Saw a valid insn
19361 * dst_reg = *(u32 *)(src_reg + off)
19362 * save type to validate intersecting paths
19363 */
19364 *prev_type = type;
19365 } else if (reg_type_mismatch(type, *prev_type)) {
19366 /* Abuser program is trying to use the same insn
19367 * dst_reg = *(u32*) (src_reg + off)
19368 * with different pointer types:
19369 * src_reg == ctx in one branch and
19370 * src_reg == stack|map in some other branch.
19371 * Reject it.
19372 */
19373 if (allow_trust_mismatch &&
19374 base_type(type) == PTR_TO_BTF_ID &&
19375 base_type(*prev_type) == PTR_TO_BTF_ID) {
19376 /*
19377 * Have to support a use case when one path through
19378 * the program yields TRUSTED pointer while another
19379 * is UNTRUSTED. Fallback to UNTRUSTED to generate
19380 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
19381 */
19382 *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
19383 } else {
19384 verbose(env, "same insn cannot be used with different pointers\n");
19385 return -EINVAL;
19386 }
19387 }
19388
19389 return 0;
19390 }
19391
do_check(struct bpf_verifier_env * env)19392 static int do_check(struct bpf_verifier_env *env)
19393 {
19394 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
19395 struct bpf_verifier_state *state = env->cur_state;
19396 struct bpf_insn *insns = env->prog->insnsi;
19397 struct bpf_reg_state *regs;
19398 int insn_cnt = env->prog->len;
19399 bool do_print_state = false;
19400 int prev_insn_idx = -1;
19401
19402 for (;;) {
19403 bool exception_exit = false;
19404 struct bpf_insn *insn;
19405 u8 class;
19406 int err;
19407
19408 /* reset current history entry on each new instruction */
19409 env->cur_hist_ent = NULL;
19410
19411 env->prev_insn_idx = prev_insn_idx;
19412 if (env->insn_idx >= insn_cnt) {
19413 verbose(env, "invalid insn idx %d insn_cnt %d\n",
19414 env->insn_idx, insn_cnt);
19415 return -EFAULT;
19416 }
19417
19418 insn = &insns[env->insn_idx];
19419 class = BPF_CLASS(insn->code);
19420
19421 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
19422 verbose(env,
19423 "BPF program is too large. Processed %d insn\n",
19424 env->insn_processed);
19425 return -E2BIG;
19426 }
19427
19428 state->last_insn_idx = env->prev_insn_idx;
19429
19430 if (is_prune_point(env, env->insn_idx)) {
19431 err = is_state_visited(env, env->insn_idx);
19432 if (err < 0)
19433 return err;
19434 if (err == 1) {
19435 /* found equivalent state, can prune the search */
19436 if (env->log.level & BPF_LOG_LEVEL) {
19437 if (do_print_state)
19438 verbose(env, "\nfrom %d to %d%s: safe\n",
19439 env->prev_insn_idx, env->insn_idx,
19440 env->cur_state->speculative ?
19441 " (speculative execution)" : "");
19442 else
19443 verbose(env, "%d: safe\n", env->insn_idx);
19444 }
19445 goto process_bpf_exit;
19446 }
19447 }
19448
19449 if (is_jmp_point(env, env->insn_idx)) {
19450 err = push_insn_history(env, state, 0, 0);
19451 if (err)
19452 return err;
19453 }
19454
19455 if (signal_pending(current))
19456 return -EAGAIN;
19457
19458 if (need_resched())
19459 cond_resched();
19460
19461 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
19462 verbose(env, "\nfrom %d to %d%s:",
19463 env->prev_insn_idx, env->insn_idx,
19464 env->cur_state->speculative ?
19465 " (speculative execution)" : "");
19466 print_verifier_state(env, state, state->curframe, true);
19467 do_print_state = false;
19468 }
19469
19470 if (env->log.level & BPF_LOG_LEVEL) {
19471 if (verifier_state_scratched(env))
19472 print_insn_state(env, state, state->curframe);
19473
19474 verbose_linfo(env, env->insn_idx, "; ");
19475 env->prev_log_pos = env->log.end_pos;
19476 verbose(env, "%d: ", env->insn_idx);
19477 verbose_insn(env, insn);
19478 env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
19479 env->prev_log_pos = env->log.end_pos;
19480 }
19481
19482 if (bpf_prog_is_offloaded(env->prog->aux)) {
19483 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
19484 env->prev_insn_idx);
19485 if (err)
19486 return err;
19487 }
19488
19489 regs = cur_regs(env);
19490 sanitize_mark_insn_seen(env);
19491 prev_insn_idx = env->insn_idx;
19492
19493 if (class == BPF_ALU || class == BPF_ALU64) {
19494 err = check_alu_op(env, insn);
19495 if (err)
19496 return err;
19497
19498 } else if (class == BPF_LDX) {
19499 bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
19500
19501 /* Check for reserved fields is already done in
19502 * resolve_pseudo_ldimm64().
19503 */
19504 err = check_load_mem(env, insn, false, is_ldsx, true,
19505 "ldx");
19506 if (err)
19507 return err;
19508 } else if (class == BPF_STX) {
19509 if (BPF_MODE(insn->code) == BPF_ATOMIC) {
19510 err = check_atomic(env, insn);
19511 if (err)
19512 return err;
19513 env->insn_idx++;
19514 continue;
19515 }
19516
19517 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
19518 verbose(env, "BPF_STX uses reserved fields\n");
19519 return -EINVAL;
19520 }
19521
19522 err = check_store_reg(env, insn, false);
19523 if (err)
19524 return err;
19525 } else if (class == BPF_ST) {
19526 enum bpf_reg_type dst_reg_type;
19527
19528 if (BPF_MODE(insn->code) != BPF_MEM ||
19529 insn->src_reg != BPF_REG_0) {
19530 verbose(env, "BPF_ST uses reserved fields\n");
19531 return -EINVAL;
19532 }
19533 /* check src operand */
19534 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
19535 if (err)
19536 return err;
19537
19538 dst_reg_type = regs[insn->dst_reg].type;
19539
19540 /* check that memory (dst_reg + off) is writeable */
19541 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
19542 insn->off, BPF_SIZE(insn->code),
19543 BPF_WRITE, -1, false, false);
19544 if (err)
19545 return err;
19546
19547 err = save_aux_ptr_type(env, dst_reg_type, false);
19548 if (err)
19549 return err;
19550 } else if (class == BPF_JMP || class == BPF_JMP32) {
19551 u8 opcode = BPF_OP(insn->code);
19552
19553 env->jmps_processed++;
19554 if (opcode == BPF_CALL) {
19555 if (BPF_SRC(insn->code) != BPF_K ||
19556 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
19557 && insn->off != 0) ||
19558 (insn->src_reg != BPF_REG_0 &&
19559 insn->src_reg != BPF_PSEUDO_CALL &&
19560 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
19561 insn->dst_reg != BPF_REG_0 ||
19562 class == BPF_JMP32) {
19563 verbose(env, "BPF_CALL uses reserved fields\n");
19564 return -EINVAL;
19565 }
19566
19567 if (env->cur_state->active_locks) {
19568 if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
19569 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
19570 (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
19571 verbose(env, "function calls are not allowed while holding a lock\n");
19572 return -EINVAL;
19573 }
19574 }
19575 if (insn->src_reg == BPF_PSEUDO_CALL) {
19576 err = check_func_call(env, insn, &env->insn_idx);
19577 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
19578 err = check_kfunc_call(env, insn, &env->insn_idx);
19579 if (!err && is_bpf_throw_kfunc(insn)) {
19580 exception_exit = true;
19581 goto process_bpf_exit_full;
19582 }
19583 } else {
19584 err = check_helper_call(env, insn, &env->insn_idx);
19585 }
19586 if (err)
19587 return err;
19588
19589 mark_reg_scratched(env, BPF_REG_0);
19590 } else if (opcode == BPF_JA) {
19591 if (BPF_SRC(insn->code) != BPF_K ||
19592 insn->src_reg != BPF_REG_0 ||
19593 insn->dst_reg != BPF_REG_0 ||
19594 (class == BPF_JMP && insn->imm != 0) ||
19595 (class == BPF_JMP32 && insn->off != 0)) {
19596 verbose(env, "BPF_JA uses reserved fields\n");
19597 return -EINVAL;
19598 }
19599
19600 if (class == BPF_JMP)
19601 env->insn_idx += insn->off + 1;
19602 else
19603 env->insn_idx += insn->imm + 1;
19604 continue;
19605
19606 } else if (opcode == BPF_EXIT) {
19607 if (BPF_SRC(insn->code) != BPF_K ||
19608 insn->imm != 0 ||
19609 insn->src_reg != BPF_REG_0 ||
19610 insn->dst_reg != BPF_REG_0 ||
19611 class == BPF_JMP32) {
19612 verbose(env, "BPF_EXIT uses reserved fields\n");
19613 return -EINVAL;
19614 }
19615 process_bpf_exit_full:
19616 /* We must do check_reference_leak here before
19617 * prepare_func_exit to handle the case when
19618 * state->curframe > 0, it may be a callback
19619 * function, for which reference_state must
19620 * match caller reference state when it exits.
19621 */
19622 err = check_resource_leak(env, exception_exit, !env->cur_state->curframe,
19623 "BPF_EXIT instruction in main prog");
19624 if (err)
19625 return err;
19626
19627 /* The side effect of the prepare_func_exit
19628 * which is being skipped is that it frees
19629 * bpf_func_state. Typically, process_bpf_exit
19630 * will only be hit with outermost exit.
19631 * copy_verifier_state in pop_stack will handle
19632 * freeing of any extra bpf_func_state left over
19633 * from not processing all nested function
19634 * exits. We also skip return code checks as
19635 * they are not needed for exceptional exits.
19636 */
19637 if (exception_exit)
19638 goto process_bpf_exit;
19639
19640 if (state->curframe) {
19641 /* exit from nested function */
19642 err = prepare_func_exit(env, &env->insn_idx);
19643 if (err)
19644 return err;
19645 do_print_state = true;
19646 continue;
19647 }
19648
19649 err = check_return_code(env, BPF_REG_0, "R0");
19650 if (err)
19651 return err;
19652 process_bpf_exit:
19653 mark_verifier_state_scratched(env);
19654 update_branch_counts(env, env->cur_state);
19655 err = pop_stack(env, &prev_insn_idx,
19656 &env->insn_idx, pop_log);
19657 if (err < 0) {
19658 if (err != -ENOENT)
19659 return err;
19660 break;
19661 } else {
19662 if (WARN_ON_ONCE(env->cur_state->loop_entry)) {
19663 verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n");
19664 return -EFAULT;
19665 }
19666 do_print_state = true;
19667 continue;
19668 }
19669 } else {
19670 err = check_cond_jmp_op(env, insn, &env->insn_idx);
19671 if (err)
19672 return err;
19673 }
19674 } else if (class == BPF_LD) {
19675 u8 mode = BPF_MODE(insn->code);
19676
19677 if (mode == BPF_ABS || mode == BPF_IND) {
19678 err = check_ld_abs(env, insn);
19679 if (err)
19680 return err;
19681
19682 } else if (mode == BPF_IMM) {
19683 err = check_ld_imm(env, insn);
19684 if (err)
19685 return err;
19686
19687 env->insn_idx++;
19688 sanitize_mark_insn_seen(env);
19689 } else {
19690 verbose(env, "invalid BPF_LD mode\n");
19691 return -EINVAL;
19692 }
19693 } else {
19694 verbose(env, "unknown insn class %d\n", class);
19695 return -EINVAL;
19696 }
19697
19698 env->insn_idx++;
19699 }
19700
19701 return 0;
19702 }
19703
find_btf_percpu_datasec(struct btf * btf)19704 static int find_btf_percpu_datasec(struct btf *btf)
19705 {
19706 const struct btf_type *t;
19707 const char *tname;
19708 int i, n;
19709
19710 /*
19711 * Both vmlinux and module each have their own ".data..percpu"
19712 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
19713 * types to look at only module's own BTF types.
19714 */
19715 n = btf_nr_types(btf);
19716 if (btf_is_module(btf))
19717 i = btf_nr_types(btf_vmlinux);
19718 else
19719 i = 1;
19720
19721 for(; i < n; i++) {
19722 t = btf_type_by_id(btf, i);
19723 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
19724 continue;
19725
19726 tname = btf_name_by_offset(btf, t->name_off);
19727 if (!strcmp(tname, ".data..percpu"))
19728 return i;
19729 }
19730
19731 return -ENOENT;
19732 }
19733
19734 /*
19735 * Add btf to the used_btfs array and return the index. (If the btf was
19736 * already added, then just return the index.) Upon successful insertion
19737 * increase btf refcnt, and, if present, also refcount the corresponding
19738 * kernel module.
19739 */
__add_used_btf(struct bpf_verifier_env * env,struct btf * btf)19740 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
19741 {
19742 struct btf_mod_pair *btf_mod;
19743 int i;
19744
19745 /* check whether we recorded this BTF (and maybe module) already */
19746 for (i = 0; i < env->used_btf_cnt; i++)
19747 if (env->used_btfs[i].btf == btf)
19748 return i;
19749
19750 if (env->used_btf_cnt >= MAX_USED_BTFS)
19751 return -E2BIG;
19752
19753 btf_get(btf);
19754
19755 btf_mod = &env->used_btfs[env->used_btf_cnt];
19756 btf_mod->btf = btf;
19757 btf_mod->module = NULL;
19758
19759 /* if we reference variables from kernel module, bump its refcount */
19760 if (btf_is_module(btf)) {
19761 btf_mod->module = btf_try_get_module(btf);
19762 if (!btf_mod->module) {
19763 btf_put(btf);
19764 return -ENXIO;
19765 }
19766 }
19767
19768 return env->used_btf_cnt++;
19769 }
19770
19771 /* replace pseudo btf_id with kernel symbol address */
__check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux,struct btf * btf)19772 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
19773 struct bpf_insn *insn,
19774 struct bpf_insn_aux_data *aux,
19775 struct btf *btf)
19776 {
19777 const struct btf_var_secinfo *vsi;
19778 const struct btf_type *datasec;
19779 const struct btf_type *t;
19780 const char *sym_name;
19781 bool percpu = false;
19782 u32 type, id = insn->imm;
19783 s32 datasec_id;
19784 u64 addr;
19785 int i;
19786
19787 t = btf_type_by_id(btf, id);
19788 if (!t) {
19789 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
19790 return -ENOENT;
19791 }
19792
19793 if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
19794 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
19795 return -EINVAL;
19796 }
19797
19798 sym_name = btf_name_by_offset(btf, t->name_off);
19799 addr = kallsyms_lookup_name(sym_name);
19800 if (!addr) {
19801 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
19802 sym_name);
19803 return -ENOENT;
19804 }
19805 insn[0].imm = (u32)addr;
19806 insn[1].imm = addr >> 32;
19807
19808 if (btf_type_is_func(t)) {
19809 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
19810 aux->btf_var.mem_size = 0;
19811 return 0;
19812 }
19813
19814 datasec_id = find_btf_percpu_datasec(btf);
19815 if (datasec_id > 0) {
19816 datasec = btf_type_by_id(btf, datasec_id);
19817 for_each_vsi(i, datasec, vsi) {
19818 if (vsi->type == id) {
19819 percpu = true;
19820 break;
19821 }
19822 }
19823 }
19824
19825 type = t->type;
19826 t = btf_type_skip_modifiers(btf, type, NULL);
19827 if (percpu) {
19828 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
19829 aux->btf_var.btf = btf;
19830 aux->btf_var.btf_id = type;
19831 } else if (!btf_type_is_struct(t)) {
19832 const struct btf_type *ret;
19833 const char *tname;
19834 u32 tsize;
19835
19836 /* resolve the type size of ksym. */
19837 ret = btf_resolve_size(btf, t, &tsize);
19838 if (IS_ERR(ret)) {
19839 tname = btf_name_by_offset(btf, t->name_off);
19840 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
19841 tname, PTR_ERR(ret));
19842 return -EINVAL;
19843 }
19844 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
19845 aux->btf_var.mem_size = tsize;
19846 } else {
19847 aux->btf_var.reg_type = PTR_TO_BTF_ID;
19848 aux->btf_var.btf = btf;
19849 aux->btf_var.btf_id = type;
19850 }
19851
19852 return 0;
19853 }
19854
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)19855 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
19856 struct bpf_insn *insn,
19857 struct bpf_insn_aux_data *aux)
19858 {
19859 struct btf *btf;
19860 int btf_fd;
19861 int err;
19862
19863 btf_fd = insn[1].imm;
19864 if (btf_fd) {
19865 CLASS(fd, f)(btf_fd);
19866
19867 btf = __btf_get_by_fd(f);
19868 if (IS_ERR(btf)) {
19869 verbose(env, "invalid module BTF object FD specified.\n");
19870 return -EINVAL;
19871 }
19872 } else {
19873 if (!btf_vmlinux) {
19874 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
19875 return -EINVAL;
19876 }
19877 btf = btf_vmlinux;
19878 }
19879
19880 err = __check_pseudo_btf_id(env, insn, aux, btf);
19881 if (err)
19882 return err;
19883
19884 err = __add_used_btf(env, btf);
19885 if (err < 0)
19886 return err;
19887 return 0;
19888 }
19889
is_tracing_prog_type(enum bpf_prog_type type)19890 static bool is_tracing_prog_type(enum bpf_prog_type type)
19891 {
19892 switch (type) {
19893 case BPF_PROG_TYPE_KPROBE:
19894 case BPF_PROG_TYPE_TRACEPOINT:
19895 case BPF_PROG_TYPE_PERF_EVENT:
19896 case BPF_PROG_TYPE_RAW_TRACEPOINT:
19897 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
19898 return true;
19899 default:
19900 return false;
19901 }
19902 }
19903
bpf_map_is_cgroup_storage(struct bpf_map * map)19904 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
19905 {
19906 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
19907 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
19908 }
19909
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)19910 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
19911 struct bpf_map *map,
19912 struct bpf_prog *prog)
19913
19914 {
19915 enum bpf_prog_type prog_type = resolve_prog_type(prog);
19916
19917 if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
19918 btf_record_has_field(map->record, BPF_RB_ROOT)) {
19919 if (is_tracing_prog_type(prog_type)) {
19920 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
19921 return -EINVAL;
19922 }
19923 }
19924
19925 if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
19926 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
19927 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
19928 return -EINVAL;
19929 }
19930
19931 if (is_tracing_prog_type(prog_type)) {
19932 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
19933 return -EINVAL;
19934 }
19935 }
19936
19937 if (btf_record_has_field(map->record, BPF_TIMER)) {
19938 if (is_tracing_prog_type(prog_type)) {
19939 verbose(env, "tracing progs cannot use bpf_timer yet\n");
19940 return -EINVAL;
19941 }
19942 }
19943
19944 if (btf_record_has_field(map->record, BPF_WORKQUEUE)) {
19945 if (is_tracing_prog_type(prog_type)) {
19946 verbose(env, "tracing progs cannot use bpf_wq yet\n");
19947 return -EINVAL;
19948 }
19949 }
19950
19951 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
19952 !bpf_offload_prog_map_match(prog, map)) {
19953 verbose(env, "offload device mismatch between prog and map\n");
19954 return -EINVAL;
19955 }
19956
19957 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
19958 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
19959 return -EINVAL;
19960 }
19961
19962 if (prog->sleepable)
19963 switch (map->map_type) {
19964 case BPF_MAP_TYPE_HASH:
19965 case BPF_MAP_TYPE_LRU_HASH:
19966 case BPF_MAP_TYPE_ARRAY:
19967 case BPF_MAP_TYPE_PERCPU_HASH:
19968 case BPF_MAP_TYPE_PERCPU_ARRAY:
19969 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
19970 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
19971 case BPF_MAP_TYPE_HASH_OF_MAPS:
19972 case BPF_MAP_TYPE_RINGBUF:
19973 case BPF_MAP_TYPE_USER_RINGBUF:
19974 case BPF_MAP_TYPE_INODE_STORAGE:
19975 case BPF_MAP_TYPE_SK_STORAGE:
19976 case BPF_MAP_TYPE_TASK_STORAGE:
19977 case BPF_MAP_TYPE_CGRP_STORAGE:
19978 case BPF_MAP_TYPE_QUEUE:
19979 case BPF_MAP_TYPE_STACK:
19980 case BPF_MAP_TYPE_ARENA:
19981 break;
19982 default:
19983 verbose(env,
19984 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
19985 return -EINVAL;
19986 }
19987
19988 if (bpf_map_is_cgroup_storage(map) &&
19989 bpf_cgroup_storage_assign(env->prog->aux, map)) {
19990 verbose(env, "only one cgroup storage of each type is allowed\n");
19991 return -EBUSY;
19992 }
19993
19994 if (map->map_type == BPF_MAP_TYPE_ARENA) {
19995 if (env->prog->aux->arena) {
19996 verbose(env, "Only one arena per program\n");
19997 return -EBUSY;
19998 }
19999 if (!env->allow_ptr_leaks || !env->bpf_capable) {
20000 verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
20001 return -EPERM;
20002 }
20003 if (!env->prog->jit_requested) {
20004 verbose(env, "JIT is required to use arena\n");
20005 return -EOPNOTSUPP;
20006 }
20007 if (!bpf_jit_supports_arena()) {
20008 verbose(env, "JIT doesn't support arena\n");
20009 return -EOPNOTSUPP;
20010 }
20011 env->prog->aux->arena = (void *)map;
20012 if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
20013 verbose(env, "arena's user address must be set via map_extra or mmap()\n");
20014 return -EINVAL;
20015 }
20016 }
20017
20018 return 0;
20019 }
20020
__add_used_map(struct bpf_verifier_env * env,struct bpf_map * map)20021 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
20022 {
20023 int i, err;
20024
20025 /* check whether we recorded this map already */
20026 for (i = 0; i < env->used_map_cnt; i++)
20027 if (env->used_maps[i] == map)
20028 return i;
20029
20030 if (env->used_map_cnt >= MAX_USED_MAPS) {
20031 verbose(env, "The total number of maps per program has reached the limit of %u\n",
20032 MAX_USED_MAPS);
20033 return -E2BIG;
20034 }
20035
20036 err = check_map_prog_compatibility(env, map, env->prog);
20037 if (err)
20038 return err;
20039
20040 if (env->prog->sleepable)
20041 atomic64_inc(&map->sleepable_refcnt);
20042
20043 /* hold the map. If the program is rejected by verifier,
20044 * the map will be released by release_maps() or it
20045 * will be used by the valid program until it's unloaded
20046 * and all maps are released in bpf_free_used_maps()
20047 */
20048 bpf_map_inc(map);
20049
20050 env->used_maps[env->used_map_cnt++] = map;
20051
20052 return env->used_map_cnt - 1;
20053 }
20054
20055 /* Add map behind fd to used maps list, if it's not already there, and return
20056 * its index.
20057 * Returns <0 on error, or >= 0 index, on success.
20058 */
add_used_map(struct bpf_verifier_env * env,int fd)20059 static int add_used_map(struct bpf_verifier_env *env, int fd)
20060 {
20061 struct bpf_map *map;
20062 CLASS(fd, f)(fd);
20063
20064 map = __bpf_map_get(f);
20065 if (IS_ERR(map)) {
20066 verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
20067 return PTR_ERR(map);
20068 }
20069
20070 return __add_used_map(env, map);
20071 }
20072
20073 /* find and rewrite pseudo imm in ld_imm64 instructions:
20074 *
20075 * 1. if it accesses map FD, replace it with actual map pointer.
20076 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
20077 *
20078 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
20079 */
resolve_pseudo_ldimm64(struct bpf_verifier_env * env)20080 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
20081 {
20082 struct bpf_insn *insn = env->prog->insnsi;
20083 int insn_cnt = env->prog->len;
20084 int i, err;
20085
20086 err = bpf_prog_calc_tag(env->prog);
20087 if (err)
20088 return err;
20089
20090 for (i = 0; i < insn_cnt; i++, insn++) {
20091 if (BPF_CLASS(insn->code) == BPF_LDX &&
20092 ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
20093 insn->imm != 0)) {
20094 verbose(env, "BPF_LDX uses reserved fields\n");
20095 return -EINVAL;
20096 }
20097
20098 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
20099 struct bpf_insn_aux_data *aux;
20100 struct bpf_map *map;
20101 int map_idx;
20102 u64 addr;
20103 u32 fd;
20104
20105 if (i == insn_cnt - 1 || insn[1].code != 0 ||
20106 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
20107 insn[1].off != 0) {
20108 verbose(env, "invalid bpf_ld_imm64 insn\n");
20109 return -EINVAL;
20110 }
20111
20112 if (insn[0].src_reg == 0)
20113 /* valid generic load 64-bit imm */
20114 goto next_insn;
20115
20116 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
20117 aux = &env->insn_aux_data[i];
20118 err = check_pseudo_btf_id(env, insn, aux);
20119 if (err)
20120 return err;
20121 goto next_insn;
20122 }
20123
20124 if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
20125 aux = &env->insn_aux_data[i];
20126 aux->ptr_type = PTR_TO_FUNC;
20127 goto next_insn;
20128 }
20129
20130 /* In final convert_pseudo_ld_imm64() step, this is
20131 * converted into regular 64-bit imm load insn.
20132 */
20133 switch (insn[0].src_reg) {
20134 case BPF_PSEUDO_MAP_VALUE:
20135 case BPF_PSEUDO_MAP_IDX_VALUE:
20136 break;
20137 case BPF_PSEUDO_MAP_FD:
20138 case BPF_PSEUDO_MAP_IDX:
20139 if (insn[1].imm == 0)
20140 break;
20141 fallthrough;
20142 default:
20143 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
20144 return -EINVAL;
20145 }
20146
20147 switch (insn[0].src_reg) {
20148 case BPF_PSEUDO_MAP_IDX_VALUE:
20149 case BPF_PSEUDO_MAP_IDX:
20150 if (bpfptr_is_null(env->fd_array)) {
20151 verbose(env, "fd_idx without fd_array is invalid\n");
20152 return -EPROTO;
20153 }
20154 if (copy_from_bpfptr_offset(&fd, env->fd_array,
20155 insn[0].imm * sizeof(fd),
20156 sizeof(fd)))
20157 return -EFAULT;
20158 break;
20159 default:
20160 fd = insn[0].imm;
20161 break;
20162 }
20163
20164 map_idx = add_used_map(env, fd);
20165 if (map_idx < 0)
20166 return map_idx;
20167 map = env->used_maps[map_idx];
20168
20169 aux = &env->insn_aux_data[i];
20170 aux->map_index = map_idx;
20171
20172 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
20173 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
20174 addr = (unsigned long)map;
20175 } else {
20176 u32 off = insn[1].imm;
20177
20178 if (off >= BPF_MAX_VAR_OFF) {
20179 verbose(env, "direct value offset of %u is not allowed\n", off);
20180 return -EINVAL;
20181 }
20182
20183 if (!map->ops->map_direct_value_addr) {
20184 verbose(env, "no direct value access support for this map type\n");
20185 return -EINVAL;
20186 }
20187
20188 err = map->ops->map_direct_value_addr(map, &addr, off);
20189 if (err) {
20190 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
20191 map->value_size, off);
20192 return err;
20193 }
20194
20195 aux->map_off = off;
20196 addr += off;
20197 }
20198
20199 insn[0].imm = (u32)addr;
20200 insn[1].imm = addr >> 32;
20201
20202 next_insn:
20203 insn++;
20204 i++;
20205 continue;
20206 }
20207
20208 /* Basic sanity check before we invest more work here. */
20209 if (!bpf_opcode_in_insntable(insn->code)) {
20210 verbose(env, "unknown opcode %02x\n", insn->code);
20211 return -EINVAL;
20212 }
20213 }
20214
20215 /* now all pseudo BPF_LD_IMM64 instructions load valid
20216 * 'struct bpf_map *' into a register instead of user map_fd.
20217 * These pointers will be used later by verifier to validate map access.
20218 */
20219 return 0;
20220 }
20221
20222 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)20223 static void release_maps(struct bpf_verifier_env *env)
20224 {
20225 __bpf_free_used_maps(env->prog->aux, env->used_maps,
20226 env->used_map_cnt);
20227 }
20228
20229 /* drop refcnt of maps used by the rejected program */
release_btfs(struct bpf_verifier_env * env)20230 static void release_btfs(struct bpf_verifier_env *env)
20231 {
20232 __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
20233 }
20234
20235 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)20236 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
20237 {
20238 struct bpf_insn *insn = env->prog->insnsi;
20239 int insn_cnt = env->prog->len;
20240 int i;
20241
20242 for (i = 0; i < insn_cnt; i++, insn++) {
20243 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
20244 continue;
20245 if (insn->src_reg == BPF_PSEUDO_FUNC)
20246 continue;
20247 insn->src_reg = 0;
20248 }
20249 }
20250
20251 /* single env->prog->insni[off] instruction was replaced with the range
20252 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
20253 * [0, off) and [off, end) to new locations, so the patched range stays zero
20254 */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_insn_aux_data * new_data,struct bpf_prog * new_prog,u32 off,u32 cnt)20255 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
20256 struct bpf_insn_aux_data *new_data,
20257 struct bpf_prog *new_prog, u32 off, u32 cnt)
20258 {
20259 struct bpf_insn_aux_data *old_data = env->insn_aux_data;
20260 struct bpf_insn *insn = new_prog->insnsi;
20261 u32 old_seen = old_data[off].seen;
20262 u32 prog_len;
20263 int i;
20264
20265 /* aux info at OFF always needs adjustment, no matter fast path
20266 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
20267 * original insn at old prog.
20268 */
20269 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
20270
20271 if (cnt == 1)
20272 return;
20273 prog_len = new_prog->len;
20274
20275 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
20276 memcpy(new_data + off + cnt - 1, old_data + off,
20277 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
20278 for (i = off; i < off + cnt - 1; i++) {
20279 /* Expand insni[off]'s seen count to the patched range. */
20280 new_data[i].seen = old_seen;
20281 new_data[i].zext_dst = insn_has_def32(env, insn + i);
20282 }
20283 env->insn_aux_data = new_data;
20284 vfree(old_data);
20285 }
20286
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)20287 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
20288 {
20289 int i;
20290
20291 if (len == 1)
20292 return;
20293 /* NOTE: fake 'exit' subprog should be updated as well. */
20294 for (i = 0; i <= env->subprog_cnt; i++) {
20295 if (env->subprog_info[i].start <= off)
20296 continue;
20297 env->subprog_info[i].start += len - 1;
20298 }
20299 }
20300
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)20301 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
20302 {
20303 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
20304 int i, sz = prog->aux->size_poke_tab;
20305 struct bpf_jit_poke_descriptor *desc;
20306
20307 for (i = 0; i < sz; i++) {
20308 desc = &tab[i];
20309 if (desc->insn_idx <= off)
20310 continue;
20311 desc->insn_idx += len - 1;
20312 }
20313 }
20314
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)20315 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
20316 const struct bpf_insn *patch, u32 len)
20317 {
20318 struct bpf_prog *new_prog;
20319 struct bpf_insn_aux_data *new_data = NULL;
20320
20321 if (len > 1) {
20322 new_data = vzalloc(array_size(env->prog->len + len - 1,
20323 sizeof(struct bpf_insn_aux_data)));
20324 if (!new_data)
20325 return NULL;
20326 }
20327
20328 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
20329 if (IS_ERR(new_prog)) {
20330 if (PTR_ERR(new_prog) == -ERANGE)
20331 verbose(env,
20332 "insn %d cannot be patched due to 16-bit range\n",
20333 env->insn_aux_data[off].orig_idx);
20334 vfree(new_data);
20335 return NULL;
20336 }
20337 adjust_insn_aux_data(env, new_data, new_prog, off, len);
20338 adjust_subprog_starts(env, off, len);
20339 adjust_poke_descs(new_prog, off, len);
20340 return new_prog;
20341 }
20342
20343 /*
20344 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
20345 * jump offset by 'delta'.
20346 */
adjust_jmp_off(struct bpf_prog * prog,u32 tgt_idx,u32 delta)20347 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
20348 {
20349 struct bpf_insn *insn = prog->insnsi;
20350 u32 insn_cnt = prog->len, i;
20351 s32 imm;
20352 s16 off;
20353
20354 for (i = 0; i < insn_cnt; i++, insn++) {
20355 u8 code = insn->code;
20356
20357 if (tgt_idx <= i && i < tgt_idx + delta)
20358 continue;
20359
20360 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
20361 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
20362 continue;
20363
20364 if (insn->code == (BPF_JMP32 | BPF_JA)) {
20365 if (i + 1 + insn->imm != tgt_idx)
20366 continue;
20367 if (check_add_overflow(insn->imm, delta, &imm))
20368 return -ERANGE;
20369 insn->imm = imm;
20370 } else {
20371 if (i + 1 + insn->off != tgt_idx)
20372 continue;
20373 if (check_add_overflow(insn->off, delta, &off))
20374 return -ERANGE;
20375 insn->off = off;
20376 }
20377 }
20378 return 0;
20379 }
20380
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)20381 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
20382 u32 off, u32 cnt)
20383 {
20384 int i, j;
20385
20386 /* find first prog starting at or after off (first to remove) */
20387 for (i = 0; i < env->subprog_cnt; i++)
20388 if (env->subprog_info[i].start >= off)
20389 break;
20390 /* find first prog starting at or after off + cnt (first to stay) */
20391 for (j = i; j < env->subprog_cnt; j++)
20392 if (env->subprog_info[j].start >= off + cnt)
20393 break;
20394 /* if j doesn't start exactly at off + cnt, we are just removing
20395 * the front of previous prog
20396 */
20397 if (env->subprog_info[j].start != off + cnt)
20398 j--;
20399
20400 if (j > i) {
20401 struct bpf_prog_aux *aux = env->prog->aux;
20402 int move;
20403
20404 /* move fake 'exit' subprog as well */
20405 move = env->subprog_cnt + 1 - j;
20406
20407 memmove(env->subprog_info + i,
20408 env->subprog_info + j,
20409 sizeof(*env->subprog_info) * move);
20410 env->subprog_cnt -= j - i;
20411
20412 /* remove func_info */
20413 if (aux->func_info) {
20414 move = aux->func_info_cnt - j;
20415
20416 memmove(aux->func_info + i,
20417 aux->func_info + j,
20418 sizeof(*aux->func_info) * move);
20419 aux->func_info_cnt -= j - i;
20420 /* func_info->insn_off is set after all code rewrites,
20421 * in adjust_btf_func() - no need to adjust
20422 */
20423 }
20424 } else {
20425 /* convert i from "first prog to remove" to "first to adjust" */
20426 if (env->subprog_info[i].start == off)
20427 i++;
20428 }
20429
20430 /* update fake 'exit' subprog as well */
20431 for (; i <= env->subprog_cnt; i++)
20432 env->subprog_info[i].start -= cnt;
20433
20434 return 0;
20435 }
20436
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)20437 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
20438 u32 cnt)
20439 {
20440 struct bpf_prog *prog = env->prog;
20441 u32 i, l_off, l_cnt, nr_linfo;
20442 struct bpf_line_info *linfo;
20443
20444 nr_linfo = prog->aux->nr_linfo;
20445 if (!nr_linfo)
20446 return 0;
20447
20448 linfo = prog->aux->linfo;
20449
20450 /* find first line info to remove, count lines to be removed */
20451 for (i = 0; i < nr_linfo; i++)
20452 if (linfo[i].insn_off >= off)
20453 break;
20454
20455 l_off = i;
20456 l_cnt = 0;
20457 for (; i < nr_linfo; i++)
20458 if (linfo[i].insn_off < off + cnt)
20459 l_cnt++;
20460 else
20461 break;
20462
20463 /* First live insn doesn't match first live linfo, it needs to "inherit"
20464 * last removed linfo. prog is already modified, so prog->len == off
20465 * means no live instructions after (tail of the program was removed).
20466 */
20467 if (prog->len != off && l_cnt &&
20468 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
20469 l_cnt--;
20470 linfo[--i].insn_off = off + cnt;
20471 }
20472
20473 /* remove the line info which refer to the removed instructions */
20474 if (l_cnt) {
20475 memmove(linfo + l_off, linfo + i,
20476 sizeof(*linfo) * (nr_linfo - i));
20477
20478 prog->aux->nr_linfo -= l_cnt;
20479 nr_linfo = prog->aux->nr_linfo;
20480 }
20481
20482 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
20483 for (i = l_off; i < nr_linfo; i++)
20484 linfo[i].insn_off -= cnt;
20485
20486 /* fix up all subprogs (incl. 'exit') which start >= off */
20487 for (i = 0; i <= env->subprog_cnt; i++)
20488 if (env->subprog_info[i].linfo_idx > l_off) {
20489 /* program may have started in the removed region but
20490 * may not be fully removed
20491 */
20492 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
20493 env->subprog_info[i].linfo_idx -= l_cnt;
20494 else
20495 env->subprog_info[i].linfo_idx = l_off;
20496 }
20497
20498 return 0;
20499 }
20500
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)20501 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
20502 {
20503 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20504 unsigned int orig_prog_len = env->prog->len;
20505 int err;
20506
20507 if (bpf_prog_is_offloaded(env->prog->aux))
20508 bpf_prog_offload_remove_insns(env, off, cnt);
20509
20510 err = bpf_remove_insns(env->prog, off, cnt);
20511 if (err)
20512 return err;
20513
20514 err = adjust_subprog_starts_after_remove(env, off, cnt);
20515 if (err)
20516 return err;
20517
20518 err = bpf_adj_linfo_after_remove(env, off, cnt);
20519 if (err)
20520 return err;
20521
20522 memmove(aux_data + off, aux_data + off + cnt,
20523 sizeof(*aux_data) * (orig_prog_len - off - cnt));
20524
20525 return 0;
20526 }
20527
20528 /* The verifier does more data flow analysis than llvm and will not
20529 * explore branches that are dead at run time. Malicious programs can
20530 * have dead code too. Therefore replace all dead at-run-time code
20531 * with 'ja -1'.
20532 *
20533 * Just nops are not optimal, e.g. if they would sit at the end of the
20534 * program and through another bug we would manage to jump there, then
20535 * we'd execute beyond program memory otherwise. Returning exception
20536 * code also wouldn't work since we can have subprogs where the dead
20537 * code could be located.
20538 */
sanitize_dead_code(struct bpf_verifier_env * env)20539 static void sanitize_dead_code(struct bpf_verifier_env *env)
20540 {
20541 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20542 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
20543 struct bpf_insn *insn = env->prog->insnsi;
20544 const int insn_cnt = env->prog->len;
20545 int i;
20546
20547 for (i = 0; i < insn_cnt; i++) {
20548 if (aux_data[i].seen)
20549 continue;
20550 memcpy(insn + i, &trap, sizeof(trap));
20551 aux_data[i].zext_dst = false;
20552 }
20553 }
20554
insn_is_cond_jump(u8 code)20555 static bool insn_is_cond_jump(u8 code)
20556 {
20557 u8 op;
20558
20559 op = BPF_OP(code);
20560 if (BPF_CLASS(code) == BPF_JMP32)
20561 return op != BPF_JA;
20562
20563 if (BPF_CLASS(code) != BPF_JMP)
20564 return false;
20565
20566 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
20567 }
20568
opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)20569 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
20570 {
20571 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20572 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
20573 struct bpf_insn *insn = env->prog->insnsi;
20574 const int insn_cnt = env->prog->len;
20575 int i;
20576
20577 for (i = 0; i < insn_cnt; i++, insn++) {
20578 if (!insn_is_cond_jump(insn->code))
20579 continue;
20580
20581 if (!aux_data[i + 1].seen)
20582 ja.off = insn->off;
20583 else if (!aux_data[i + 1 + insn->off].seen)
20584 ja.off = 0;
20585 else
20586 continue;
20587
20588 if (bpf_prog_is_offloaded(env->prog->aux))
20589 bpf_prog_offload_replace_insn(env, i, &ja);
20590
20591 memcpy(insn, &ja, sizeof(ja));
20592 }
20593 }
20594
opt_remove_dead_code(struct bpf_verifier_env * env)20595 static int opt_remove_dead_code(struct bpf_verifier_env *env)
20596 {
20597 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
20598 int insn_cnt = env->prog->len;
20599 int i, err;
20600
20601 for (i = 0; i < insn_cnt; i++) {
20602 int j;
20603
20604 j = 0;
20605 while (i + j < insn_cnt && !aux_data[i + j].seen)
20606 j++;
20607 if (!j)
20608 continue;
20609
20610 err = verifier_remove_insns(env, i, j);
20611 if (err)
20612 return err;
20613 insn_cnt = env->prog->len;
20614 }
20615
20616 return 0;
20617 }
20618
20619 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
20620 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
20621
opt_remove_nops(struct bpf_verifier_env * env)20622 static int opt_remove_nops(struct bpf_verifier_env *env)
20623 {
20624 struct bpf_insn *insn = env->prog->insnsi;
20625 int insn_cnt = env->prog->len;
20626 bool is_may_goto_0, is_ja;
20627 int i, err;
20628
20629 for (i = 0; i < insn_cnt; i++) {
20630 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
20631 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
20632
20633 if (!is_may_goto_0 && !is_ja)
20634 continue;
20635
20636 err = verifier_remove_insns(env, i, 1);
20637 if (err)
20638 return err;
20639 insn_cnt--;
20640 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
20641 i -= (is_may_goto_0 && i > 0) ? 2 : 1;
20642 }
20643
20644 return 0;
20645 }
20646
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)20647 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
20648 const union bpf_attr *attr)
20649 {
20650 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
20651 struct bpf_insn_aux_data *aux = env->insn_aux_data;
20652 int i, patch_len, delta = 0, len = env->prog->len;
20653 struct bpf_insn *insns = env->prog->insnsi;
20654 struct bpf_prog *new_prog;
20655 bool rnd_hi32;
20656
20657 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
20658 zext_patch[1] = BPF_ZEXT_REG(0);
20659 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
20660 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
20661 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
20662 for (i = 0; i < len; i++) {
20663 int adj_idx = i + delta;
20664 struct bpf_insn insn;
20665 int load_reg;
20666
20667 insn = insns[adj_idx];
20668 load_reg = insn_def_regno(&insn);
20669 if (!aux[adj_idx].zext_dst) {
20670 u8 code, class;
20671 u32 imm_rnd;
20672
20673 if (!rnd_hi32)
20674 continue;
20675
20676 code = insn.code;
20677 class = BPF_CLASS(code);
20678 if (load_reg == -1)
20679 continue;
20680
20681 /* NOTE: arg "reg" (the fourth one) is only used for
20682 * BPF_STX + SRC_OP, so it is safe to pass NULL
20683 * here.
20684 */
20685 if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
20686 if (class == BPF_LD &&
20687 BPF_MODE(code) == BPF_IMM)
20688 i++;
20689 continue;
20690 }
20691
20692 /* ctx load could be transformed into wider load. */
20693 if (class == BPF_LDX &&
20694 aux[adj_idx].ptr_type == PTR_TO_CTX)
20695 continue;
20696
20697 imm_rnd = get_random_u32();
20698 rnd_hi32_patch[0] = insn;
20699 rnd_hi32_patch[1].imm = imm_rnd;
20700 rnd_hi32_patch[3].dst_reg = load_reg;
20701 patch = rnd_hi32_patch;
20702 patch_len = 4;
20703 goto apply_patch_buffer;
20704 }
20705
20706 /* Add in an zero-extend instruction if a) the JIT has requested
20707 * it or b) it's a CMPXCHG.
20708 *
20709 * The latter is because: BPF_CMPXCHG always loads a value into
20710 * R0, therefore always zero-extends. However some archs'
20711 * equivalent instruction only does this load when the
20712 * comparison is successful. This detail of CMPXCHG is
20713 * orthogonal to the general zero-extension behaviour of the
20714 * CPU, so it's treated independently of bpf_jit_needs_zext.
20715 */
20716 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
20717 continue;
20718
20719 /* Zero-extension is done by the caller. */
20720 if (bpf_pseudo_kfunc_call(&insn))
20721 continue;
20722
20723 if (WARN_ON(load_reg == -1)) {
20724 verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
20725 return -EFAULT;
20726 }
20727
20728 zext_patch[0] = insn;
20729 zext_patch[1].dst_reg = load_reg;
20730 zext_patch[1].src_reg = load_reg;
20731 patch = zext_patch;
20732 patch_len = 2;
20733 apply_patch_buffer:
20734 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
20735 if (!new_prog)
20736 return -ENOMEM;
20737 env->prog = new_prog;
20738 insns = new_prog->insnsi;
20739 aux = env->insn_aux_data;
20740 delta += patch_len - 1;
20741 }
20742
20743 return 0;
20744 }
20745
20746 /* convert load instructions that access fields of a context type into a
20747 * sequence of instructions that access fields of the underlying structure:
20748 * struct __sk_buff -> struct sk_buff
20749 * struct bpf_sock_ops -> struct sock
20750 */
convert_ctx_accesses(struct bpf_verifier_env * env)20751 static int convert_ctx_accesses(struct bpf_verifier_env *env)
20752 {
20753 struct bpf_subprog_info *subprogs = env->subprog_info;
20754 const struct bpf_verifier_ops *ops = env->ops;
20755 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
20756 const int insn_cnt = env->prog->len;
20757 struct bpf_insn *epilogue_buf = env->epilogue_buf;
20758 struct bpf_insn *insn_buf = env->insn_buf;
20759 struct bpf_insn *insn;
20760 u32 target_size, size_default, off;
20761 struct bpf_prog *new_prog;
20762 enum bpf_access_type type;
20763 bool is_narrower_load;
20764 int epilogue_idx = 0;
20765
20766 if (ops->gen_epilogue) {
20767 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
20768 -(subprogs[0].stack_depth + 8));
20769 if (epilogue_cnt >= INSN_BUF_SIZE) {
20770 verbose(env, "bpf verifier is misconfigured\n");
20771 return -EINVAL;
20772 } else if (epilogue_cnt) {
20773 /* Save the ARG_PTR_TO_CTX for the epilogue to use */
20774 cnt = 0;
20775 subprogs[0].stack_depth += 8;
20776 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
20777 -subprogs[0].stack_depth);
20778 insn_buf[cnt++] = env->prog->insnsi[0];
20779 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
20780 if (!new_prog)
20781 return -ENOMEM;
20782 env->prog = new_prog;
20783 delta += cnt - 1;
20784
20785 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
20786 if (ret < 0)
20787 return ret;
20788 }
20789 }
20790
20791 if (ops->gen_prologue || env->seen_direct_write) {
20792 if (!ops->gen_prologue) {
20793 verbose(env, "bpf verifier is misconfigured\n");
20794 return -EINVAL;
20795 }
20796 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
20797 env->prog);
20798 if (cnt >= INSN_BUF_SIZE) {
20799 verbose(env, "bpf verifier is misconfigured\n");
20800 return -EINVAL;
20801 } else if (cnt) {
20802 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
20803 if (!new_prog)
20804 return -ENOMEM;
20805
20806 env->prog = new_prog;
20807 delta += cnt - 1;
20808
20809 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
20810 if (ret < 0)
20811 return ret;
20812 }
20813 }
20814
20815 if (delta)
20816 WARN_ON(adjust_jmp_off(env->prog, 0, delta));
20817
20818 if (bpf_prog_is_offloaded(env->prog->aux))
20819 return 0;
20820
20821 insn = env->prog->insnsi + delta;
20822
20823 for (i = 0; i < insn_cnt; i++, insn++) {
20824 bpf_convert_ctx_access_t convert_ctx_access;
20825 u8 mode;
20826
20827 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
20828 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
20829 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
20830 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
20831 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
20832 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
20833 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
20834 type = BPF_READ;
20835 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
20836 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
20837 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
20838 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
20839 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
20840 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
20841 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
20842 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
20843 type = BPF_WRITE;
20844 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
20845 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
20846 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
20847 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
20848 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
20849 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
20850 env->prog->aux->num_exentries++;
20851 continue;
20852 } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
20853 epilogue_cnt &&
20854 i + delta < subprogs[1].start) {
20855 /* Generate epilogue for the main prog */
20856 if (epilogue_idx) {
20857 /* jump back to the earlier generated epilogue */
20858 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
20859 cnt = 1;
20860 } else {
20861 memcpy(insn_buf, epilogue_buf,
20862 epilogue_cnt * sizeof(*epilogue_buf));
20863 cnt = epilogue_cnt;
20864 /* epilogue_idx cannot be 0. It must have at
20865 * least one ctx ptr saving insn before the
20866 * epilogue.
20867 */
20868 epilogue_idx = i + delta;
20869 }
20870 goto patch_insn_buf;
20871 } else {
20872 continue;
20873 }
20874
20875 if (type == BPF_WRITE &&
20876 env->insn_aux_data[i + delta].sanitize_stack_spill) {
20877 struct bpf_insn patch[] = {
20878 *insn,
20879 BPF_ST_NOSPEC(),
20880 };
20881
20882 cnt = ARRAY_SIZE(patch);
20883 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
20884 if (!new_prog)
20885 return -ENOMEM;
20886
20887 delta += cnt - 1;
20888 env->prog = new_prog;
20889 insn = new_prog->insnsi + i + delta;
20890 continue;
20891 }
20892
20893 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
20894 case PTR_TO_CTX:
20895 if (!ops->convert_ctx_access)
20896 continue;
20897 convert_ctx_access = ops->convert_ctx_access;
20898 break;
20899 case PTR_TO_SOCKET:
20900 case PTR_TO_SOCK_COMMON:
20901 convert_ctx_access = bpf_sock_convert_ctx_access;
20902 break;
20903 case PTR_TO_TCP_SOCK:
20904 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
20905 break;
20906 case PTR_TO_XDP_SOCK:
20907 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
20908 break;
20909 case PTR_TO_BTF_ID:
20910 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
20911 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
20912 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
20913 * be said once it is marked PTR_UNTRUSTED, hence we must handle
20914 * any faults for loads into such types. BPF_WRITE is disallowed
20915 * for this case.
20916 */
20917 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
20918 if (type == BPF_READ) {
20919 if (BPF_MODE(insn->code) == BPF_MEM)
20920 insn->code = BPF_LDX | BPF_PROBE_MEM |
20921 BPF_SIZE((insn)->code);
20922 else
20923 insn->code = BPF_LDX | BPF_PROBE_MEMSX |
20924 BPF_SIZE((insn)->code);
20925 env->prog->aux->num_exentries++;
20926 }
20927 continue;
20928 case PTR_TO_ARENA:
20929 if (BPF_MODE(insn->code) == BPF_MEMSX) {
20930 verbose(env, "sign extending loads from arena are not supported yet\n");
20931 return -EOPNOTSUPP;
20932 }
20933 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
20934 env->prog->aux->num_exentries++;
20935 continue;
20936 default:
20937 continue;
20938 }
20939
20940 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
20941 size = BPF_LDST_BYTES(insn);
20942 mode = BPF_MODE(insn->code);
20943
20944 /* If the read access is a narrower load of the field,
20945 * convert to a 4/8-byte load, to minimum program type specific
20946 * convert_ctx_access changes. If conversion is successful,
20947 * we will apply proper mask to the result.
20948 */
20949 is_narrower_load = size < ctx_field_size;
20950 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
20951 off = insn->off;
20952 if (is_narrower_load) {
20953 u8 size_code;
20954
20955 if (type == BPF_WRITE) {
20956 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
20957 return -EINVAL;
20958 }
20959
20960 size_code = BPF_H;
20961 if (ctx_field_size == 4)
20962 size_code = BPF_W;
20963 else if (ctx_field_size == 8)
20964 size_code = BPF_DW;
20965
20966 insn->off = off & ~(size_default - 1);
20967 insn->code = BPF_LDX | BPF_MEM | size_code;
20968 }
20969
20970 target_size = 0;
20971 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
20972 &target_size);
20973 if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
20974 (ctx_field_size && !target_size)) {
20975 verbose(env, "bpf verifier is misconfigured\n");
20976 return -EINVAL;
20977 }
20978
20979 if (is_narrower_load && size < target_size) {
20980 u8 shift = bpf_ctx_narrow_access_offset(
20981 off, size, size_default) * 8;
20982 if (shift && cnt + 1 >= INSN_BUF_SIZE) {
20983 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
20984 return -EINVAL;
20985 }
20986 if (ctx_field_size <= 4) {
20987 if (shift)
20988 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
20989 insn->dst_reg,
20990 shift);
20991 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
20992 (1 << size * 8) - 1);
20993 } else {
20994 if (shift)
20995 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
20996 insn->dst_reg,
20997 shift);
20998 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
20999 (1ULL << size * 8) - 1);
21000 }
21001 }
21002 if (mode == BPF_MEMSX)
21003 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
21004 insn->dst_reg, insn->dst_reg,
21005 size * 8, 0);
21006
21007 patch_insn_buf:
21008 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21009 if (!new_prog)
21010 return -ENOMEM;
21011
21012 delta += cnt - 1;
21013
21014 /* keep walking new program and skip insns we just inserted */
21015 env->prog = new_prog;
21016 insn = new_prog->insnsi + i + delta;
21017 }
21018
21019 return 0;
21020 }
21021
jit_subprogs(struct bpf_verifier_env * env)21022 static int jit_subprogs(struct bpf_verifier_env *env)
21023 {
21024 struct bpf_prog *prog = env->prog, **func, *tmp;
21025 int i, j, subprog_start, subprog_end = 0, len, subprog;
21026 struct bpf_map *map_ptr;
21027 struct bpf_insn *insn;
21028 void *old_bpf_func;
21029 int err, num_exentries;
21030
21031 if (env->subprog_cnt <= 1)
21032 return 0;
21033
21034 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
21035 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
21036 continue;
21037
21038 /* Upon error here we cannot fall back to interpreter but
21039 * need a hard reject of the program. Thus -EFAULT is
21040 * propagated in any case.
21041 */
21042 subprog = find_subprog(env, i + insn->imm + 1);
21043 if (subprog < 0) {
21044 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
21045 i + insn->imm + 1);
21046 return -EFAULT;
21047 }
21048 /* temporarily remember subprog id inside insn instead of
21049 * aux_data, since next loop will split up all insns into funcs
21050 */
21051 insn->off = subprog;
21052 /* remember original imm in case JIT fails and fallback
21053 * to interpreter will be needed
21054 */
21055 env->insn_aux_data[i].call_imm = insn->imm;
21056 /* point imm to __bpf_call_base+1 from JITs point of view */
21057 insn->imm = 1;
21058 if (bpf_pseudo_func(insn)) {
21059 #if defined(MODULES_VADDR)
21060 u64 addr = MODULES_VADDR;
21061 #else
21062 u64 addr = VMALLOC_START;
21063 #endif
21064 /* jit (e.g. x86_64) may emit fewer instructions
21065 * if it learns a u32 imm is the same as a u64 imm.
21066 * Set close enough to possible prog address.
21067 */
21068 insn[0].imm = (u32)addr;
21069 insn[1].imm = addr >> 32;
21070 }
21071 }
21072
21073 err = bpf_prog_alloc_jited_linfo(prog);
21074 if (err)
21075 goto out_undo_insn;
21076
21077 err = -ENOMEM;
21078 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
21079 if (!func)
21080 goto out_undo_insn;
21081
21082 for (i = 0; i < env->subprog_cnt; i++) {
21083 subprog_start = subprog_end;
21084 subprog_end = env->subprog_info[i + 1].start;
21085
21086 len = subprog_end - subprog_start;
21087 /* bpf_prog_run() doesn't call subprogs directly,
21088 * hence main prog stats include the runtime of subprogs.
21089 * subprogs don't have IDs and not reachable via prog_get_next_id
21090 * func[i]->stats will never be accessed and stays NULL
21091 */
21092 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
21093 if (!func[i])
21094 goto out_free;
21095 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
21096 len * sizeof(struct bpf_insn));
21097 func[i]->type = prog->type;
21098 func[i]->len = len;
21099 if (bpf_prog_calc_tag(func[i]))
21100 goto out_free;
21101 func[i]->is_func = 1;
21102 func[i]->sleepable = prog->sleepable;
21103 func[i]->aux->func_idx = i;
21104 /* Below members will be freed only at prog->aux */
21105 func[i]->aux->btf = prog->aux->btf;
21106 func[i]->aux->func_info = prog->aux->func_info;
21107 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
21108 func[i]->aux->poke_tab = prog->aux->poke_tab;
21109 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
21110
21111 for (j = 0; j < prog->aux->size_poke_tab; j++) {
21112 struct bpf_jit_poke_descriptor *poke;
21113
21114 poke = &prog->aux->poke_tab[j];
21115 if (poke->insn_idx < subprog_end &&
21116 poke->insn_idx >= subprog_start)
21117 poke->aux = func[i]->aux;
21118 }
21119
21120 func[i]->aux->name[0] = 'F';
21121 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
21122 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
21123 func[i]->aux->jits_use_priv_stack = true;
21124
21125 func[i]->jit_requested = 1;
21126 func[i]->blinding_requested = prog->blinding_requested;
21127 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
21128 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
21129 func[i]->aux->linfo = prog->aux->linfo;
21130 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
21131 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
21132 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
21133 func[i]->aux->arena = prog->aux->arena;
21134 num_exentries = 0;
21135 insn = func[i]->insnsi;
21136 for (j = 0; j < func[i]->len; j++, insn++) {
21137 if (BPF_CLASS(insn->code) == BPF_LDX &&
21138 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
21139 BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
21140 BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
21141 num_exentries++;
21142 if ((BPF_CLASS(insn->code) == BPF_STX ||
21143 BPF_CLASS(insn->code) == BPF_ST) &&
21144 BPF_MODE(insn->code) == BPF_PROBE_MEM32)
21145 num_exentries++;
21146 if (BPF_CLASS(insn->code) == BPF_STX &&
21147 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
21148 num_exentries++;
21149 }
21150 func[i]->aux->num_exentries = num_exentries;
21151 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
21152 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
21153 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
21154 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
21155 if (!i)
21156 func[i]->aux->exception_boundary = env->seen_exception;
21157 func[i] = bpf_int_jit_compile(func[i]);
21158 if (!func[i]->jited) {
21159 err = -ENOTSUPP;
21160 goto out_free;
21161 }
21162 cond_resched();
21163 }
21164
21165 /* at this point all bpf functions were successfully JITed
21166 * now populate all bpf_calls with correct addresses and
21167 * run last pass of JIT
21168 */
21169 for (i = 0; i < env->subprog_cnt; i++) {
21170 insn = func[i]->insnsi;
21171 for (j = 0; j < func[i]->len; j++, insn++) {
21172 if (bpf_pseudo_func(insn)) {
21173 subprog = insn->off;
21174 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
21175 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
21176 continue;
21177 }
21178 if (!bpf_pseudo_call(insn))
21179 continue;
21180 subprog = insn->off;
21181 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
21182 }
21183
21184 /* we use the aux data to keep a list of the start addresses
21185 * of the JITed images for each function in the program
21186 *
21187 * for some architectures, such as powerpc64, the imm field
21188 * might not be large enough to hold the offset of the start
21189 * address of the callee's JITed image from __bpf_call_base
21190 *
21191 * in such cases, we can lookup the start address of a callee
21192 * by using its subprog id, available from the off field of
21193 * the call instruction, as an index for this list
21194 */
21195 func[i]->aux->func = func;
21196 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
21197 func[i]->aux->real_func_cnt = env->subprog_cnt;
21198 }
21199 for (i = 0; i < env->subprog_cnt; i++) {
21200 old_bpf_func = func[i]->bpf_func;
21201 tmp = bpf_int_jit_compile(func[i]);
21202 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
21203 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
21204 err = -ENOTSUPP;
21205 goto out_free;
21206 }
21207 cond_resched();
21208 }
21209
21210 /* finally lock prog and jit images for all functions and
21211 * populate kallsysm. Begin at the first subprogram, since
21212 * bpf_prog_load will add the kallsyms for the main program.
21213 */
21214 for (i = 1; i < env->subprog_cnt; i++) {
21215 err = bpf_prog_lock_ro(func[i]);
21216 if (err)
21217 goto out_free;
21218 }
21219
21220 for (i = 1; i < env->subprog_cnt; i++)
21221 bpf_prog_kallsyms_add(func[i]);
21222
21223 /* Last step: make now unused interpreter insns from main
21224 * prog consistent for later dump requests, so they can
21225 * later look the same as if they were interpreted only.
21226 */
21227 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
21228 if (bpf_pseudo_func(insn)) {
21229 insn[0].imm = env->insn_aux_data[i].call_imm;
21230 insn[1].imm = insn->off;
21231 insn->off = 0;
21232 continue;
21233 }
21234 if (!bpf_pseudo_call(insn))
21235 continue;
21236 insn->off = env->insn_aux_data[i].call_imm;
21237 subprog = find_subprog(env, i + insn->off + 1);
21238 insn->imm = subprog;
21239 }
21240
21241 prog->jited = 1;
21242 prog->bpf_func = func[0]->bpf_func;
21243 prog->jited_len = func[0]->jited_len;
21244 prog->aux->extable = func[0]->aux->extable;
21245 prog->aux->num_exentries = func[0]->aux->num_exentries;
21246 prog->aux->func = func;
21247 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
21248 prog->aux->real_func_cnt = env->subprog_cnt;
21249 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
21250 prog->aux->exception_boundary = func[0]->aux->exception_boundary;
21251 bpf_prog_jit_attempt_done(prog);
21252 return 0;
21253 out_free:
21254 /* We failed JIT'ing, so at this point we need to unregister poke
21255 * descriptors from subprogs, so that kernel is not attempting to
21256 * patch it anymore as we're freeing the subprog JIT memory.
21257 */
21258 for (i = 0; i < prog->aux->size_poke_tab; i++) {
21259 map_ptr = prog->aux->poke_tab[i].tail_call.map;
21260 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
21261 }
21262 /* At this point we're guaranteed that poke descriptors are not
21263 * live anymore. We can just unlink its descriptor table as it's
21264 * released with the main prog.
21265 */
21266 for (i = 0; i < env->subprog_cnt; i++) {
21267 if (!func[i])
21268 continue;
21269 func[i]->aux->poke_tab = NULL;
21270 bpf_jit_free(func[i]);
21271 }
21272 kfree(func);
21273 out_undo_insn:
21274 /* cleanup main prog to be interpreted */
21275 prog->jit_requested = 0;
21276 prog->blinding_requested = 0;
21277 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
21278 if (!bpf_pseudo_call(insn))
21279 continue;
21280 insn->off = 0;
21281 insn->imm = env->insn_aux_data[i].call_imm;
21282 }
21283 bpf_prog_jit_attempt_done(prog);
21284 return err;
21285 }
21286
fixup_call_args(struct bpf_verifier_env * env)21287 static int fixup_call_args(struct bpf_verifier_env *env)
21288 {
21289 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
21290 struct bpf_prog *prog = env->prog;
21291 struct bpf_insn *insn = prog->insnsi;
21292 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
21293 int i, depth;
21294 #endif
21295 int err = 0;
21296
21297 if (env->prog->jit_requested &&
21298 !bpf_prog_is_offloaded(env->prog->aux)) {
21299 err = jit_subprogs(env);
21300 if (err == 0)
21301 return 0;
21302 if (err == -EFAULT)
21303 return err;
21304 }
21305 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
21306 if (has_kfunc_call) {
21307 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
21308 return -EINVAL;
21309 }
21310 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
21311 /* When JIT fails the progs with bpf2bpf calls and tail_calls
21312 * have to be rejected, since interpreter doesn't support them yet.
21313 */
21314 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
21315 return -EINVAL;
21316 }
21317 for (i = 0; i < prog->len; i++, insn++) {
21318 if (bpf_pseudo_func(insn)) {
21319 /* When JIT fails the progs with callback calls
21320 * have to be rejected, since interpreter doesn't support them yet.
21321 */
21322 verbose(env, "callbacks are not allowed in non-JITed programs\n");
21323 return -EINVAL;
21324 }
21325
21326 if (!bpf_pseudo_call(insn))
21327 continue;
21328 depth = get_callee_stack_depth(env, insn, i);
21329 if (depth < 0)
21330 return depth;
21331 bpf_patch_call_args(insn, depth);
21332 }
21333 err = 0;
21334 #endif
21335 return err;
21336 }
21337
21338 /* replace a generic kfunc with a specialized version if necessary */
specialize_kfunc(struct bpf_verifier_env * env,u32 func_id,u16 offset,unsigned long * addr)21339 static void specialize_kfunc(struct bpf_verifier_env *env,
21340 u32 func_id, u16 offset, unsigned long *addr)
21341 {
21342 struct bpf_prog *prog = env->prog;
21343 bool seen_direct_write;
21344 void *xdp_kfunc;
21345 bool is_rdonly;
21346
21347 if (bpf_dev_bound_kfunc_id(func_id)) {
21348 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
21349 if (xdp_kfunc) {
21350 *addr = (unsigned long)xdp_kfunc;
21351 return;
21352 }
21353 /* fallback to default kfunc when not supported by netdev */
21354 }
21355
21356 if (offset)
21357 return;
21358
21359 if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
21360 seen_direct_write = env->seen_direct_write;
21361 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
21362
21363 if (is_rdonly)
21364 *addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
21365
21366 /* restore env->seen_direct_write to its original value, since
21367 * may_access_direct_pkt_data mutates it
21368 */
21369 env->seen_direct_write = seen_direct_write;
21370 }
21371
21372 if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] &&
21373 bpf_lsm_has_d_inode_locked(prog))
21374 *addr = (unsigned long)bpf_set_dentry_xattr_locked;
21375
21376 if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] &&
21377 bpf_lsm_has_d_inode_locked(prog))
21378 *addr = (unsigned long)bpf_remove_dentry_xattr_locked;
21379 }
21380
__fixup_collection_insert_kfunc(struct bpf_insn_aux_data * insn_aux,u16 struct_meta_reg,u16 node_offset_reg,struct bpf_insn * insn,struct bpf_insn * insn_buf,int * cnt)21381 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
21382 u16 struct_meta_reg,
21383 u16 node_offset_reg,
21384 struct bpf_insn *insn,
21385 struct bpf_insn *insn_buf,
21386 int *cnt)
21387 {
21388 struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
21389 struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
21390
21391 insn_buf[0] = addr[0];
21392 insn_buf[1] = addr[1];
21393 insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
21394 insn_buf[3] = *insn;
21395 *cnt = 4;
21396 }
21397
fixup_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn * insn_buf,int insn_idx,int * cnt)21398 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
21399 struct bpf_insn *insn_buf, int insn_idx, int *cnt)
21400 {
21401 const struct bpf_kfunc_desc *desc;
21402
21403 if (!insn->imm) {
21404 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
21405 return -EINVAL;
21406 }
21407
21408 *cnt = 0;
21409
21410 /* insn->imm has the btf func_id. Replace it with an offset relative to
21411 * __bpf_call_base, unless the JIT needs to call functions that are
21412 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
21413 */
21414 desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
21415 if (!desc) {
21416 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
21417 insn->imm);
21418 return -EFAULT;
21419 }
21420
21421 if (!bpf_jit_supports_far_kfunc_call())
21422 insn->imm = BPF_CALL_IMM(desc->addr);
21423 if (insn->off)
21424 return 0;
21425 if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
21426 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
21427 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
21428 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
21429 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
21430
21431 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
21432 verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
21433 insn_idx);
21434 return -EFAULT;
21435 }
21436
21437 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
21438 insn_buf[1] = addr[0];
21439 insn_buf[2] = addr[1];
21440 insn_buf[3] = *insn;
21441 *cnt = 4;
21442 } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
21443 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
21444 desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
21445 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
21446 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
21447
21448 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
21449 verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
21450 insn_idx);
21451 return -EFAULT;
21452 }
21453
21454 if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
21455 !kptr_struct_meta) {
21456 verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
21457 insn_idx);
21458 return -EFAULT;
21459 }
21460
21461 insn_buf[0] = addr[0];
21462 insn_buf[1] = addr[1];
21463 insn_buf[2] = *insn;
21464 *cnt = 3;
21465 } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
21466 desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
21467 desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
21468 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
21469 int struct_meta_reg = BPF_REG_3;
21470 int node_offset_reg = BPF_REG_4;
21471
21472 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
21473 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
21474 struct_meta_reg = BPF_REG_4;
21475 node_offset_reg = BPF_REG_5;
21476 }
21477
21478 if (!kptr_struct_meta) {
21479 verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
21480 insn_idx);
21481 return -EFAULT;
21482 }
21483
21484 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
21485 node_offset_reg, insn, insn_buf, cnt);
21486 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
21487 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
21488 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
21489 *cnt = 1;
21490 } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
21491 struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
21492
21493 insn_buf[0] = ld_addrs[0];
21494 insn_buf[1] = ld_addrs[1];
21495 insn_buf[2] = *insn;
21496 *cnt = 3;
21497 }
21498 return 0;
21499 }
21500
21501 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
add_hidden_subprog(struct bpf_verifier_env * env,struct bpf_insn * patch,int len)21502 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
21503 {
21504 struct bpf_subprog_info *info = env->subprog_info;
21505 int cnt = env->subprog_cnt;
21506 struct bpf_prog *prog;
21507
21508 /* We only reserve one slot for hidden subprogs in subprog_info. */
21509 if (env->hidden_subprog_cnt) {
21510 verbose(env, "verifier internal error: only one hidden subprog supported\n");
21511 return -EFAULT;
21512 }
21513 /* We're not patching any existing instruction, just appending the new
21514 * ones for the hidden subprog. Hence all of the adjustment operations
21515 * in bpf_patch_insn_data are no-ops.
21516 */
21517 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
21518 if (!prog)
21519 return -ENOMEM;
21520 env->prog = prog;
21521 info[cnt + 1].start = info[cnt].start;
21522 info[cnt].start = prog->len - len + 1;
21523 env->subprog_cnt++;
21524 env->hidden_subprog_cnt++;
21525 return 0;
21526 }
21527
21528 /* Do various post-verification rewrites in a single program pass.
21529 * These rewrites simplify JIT and interpreter implementations.
21530 */
do_misc_fixups(struct bpf_verifier_env * env)21531 static int do_misc_fixups(struct bpf_verifier_env *env)
21532 {
21533 struct bpf_prog *prog = env->prog;
21534 enum bpf_attach_type eatype = prog->expected_attach_type;
21535 enum bpf_prog_type prog_type = resolve_prog_type(prog);
21536 struct bpf_insn *insn = prog->insnsi;
21537 const struct bpf_func_proto *fn;
21538 const int insn_cnt = prog->len;
21539 const struct bpf_map_ops *ops;
21540 struct bpf_insn_aux_data *aux;
21541 struct bpf_insn *insn_buf = env->insn_buf;
21542 struct bpf_prog *new_prog;
21543 struct bpf_map *map_ptr;
21544 int i, ret, cnt, delta = 0, cur_subprog = 0;
21545 struct bpf_subprog_info *subprogs = env->subprog_info;
21546 u16 stack_depth = subprogs[cur_subprog].stack_depth;
21547 u16 stack_depth_extra = 0;
21548
21549 if (env->seen_exception && !env->exception_callback_subprog) {
21550 struct bpf_insn patch[] = {
21551 env->prog->insnsi[insn_cnt - 1],
21552 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
21553 BPF_EXIT_INSN(),
21554 };
21555
21556 ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
21557 if (ret < 0)
21558 return ret;
21559 prog = env->prog;
21560 insn = prog->insnsi;
21561
21562 env->exception_callback_subprog = env->subprog_cnt - 1;
21563 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
21564 mark_subprog_exc_cb(env, env->exception_callback_subprog);
21565 }
21566
21567 for (i = 0; i < insn_cnt;) {
21568 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
21569 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
21570 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
21571 /* convert to 32-bit mov that clears upper 32-bit */
21572 insn->code = BPF_ALU | BPF_MOV | BPF_X;
21573 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
21574 insn->off = 0;
21575 insn->imm = 0;
21576 } /* cast from as(0) to as(1) should be handled by JIT */
21577 goto next_insn;
21578 }
21579
21580 if (env->insn_aux_data[i + delta].needs_zext)
21581 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
21582 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
21583
21584 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
21585 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
21586 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
21587 insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
21588 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
21589 insn->off == 1 && insn->imm == -1) {
21590 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
21591 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
21592 struct bpf_insn *patchlet;
21593 struct bpf_insn chk_and_sdiv[] = {
21594 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21595 BPF_NEG | BPF_K, insn->dst_reg,
21596 0, 0, 0),
21597 };
21598 struct bpf_insn chk_and_smod[] = {
21599 BPF_MOV32_IMM(insn->dst_reg, 0),
21600 };
21601
21602 patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
21603 cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
21604
21605 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
21606 if (!new_prog)
21607 return -ENOMEM;
21608
21609 delta += cnt - 1;
21610 env->prog = prog = new_prog;
21611 insn = new_prog->insnsi + i + delta;
21612 goto next_insn;
21613 }
21614
21615 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
21616 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
21617 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
21618 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
21619 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
21620 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
21621 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
21622 bool is_sdiv = isdiv && insn->off == 1;
21623 bool is_smod = !isdiv && insn->off == 1;
21624 struct bpf_insn *patchlet;
21625 struct bpf_insn chk_and_div[] = {
21626 /* [R,W]x div 0 -> 0 */
21627 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21628 BPF_JNE | BPF_K, insn->src_reg,
21629 0, 2, 0),
21630 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
21631 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21632 *insn,
21633 };
21634 struct bpf_insn chk_and_mod[] = {
21635 /* [R,W]x mod 0 -> [R,W]x */
21636 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21637 BPF_JEQ | BPF_K, insn->src_reg,
21638 0, 1 + (is64 ? 0 : 1), 0),
21639 *insn,
21640 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21641 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
21642 };
21643 struct bpf_insn chk_and_sdiv[] = {
21644 /* [R,W]x sdiv 0 -> 0
21645 * LLONG_MIN sdiv -1 -> LLONG_MIN
21646 * INT_MIN sdiv -1 -> INT_MIN
21647 */
21648 BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
21649 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21650 BPF_ADD | BPF_K, BPF_REG_AX,
21651 0, 0, 1),
21652 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21653 BPF_JGT | BPF_K, BPF_REG_AX,
21654 0, 4, 1),
21655 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21656 BPF_JEQ | BPF_K, BPF_REG_AX,
21657 0, 1, 0),
21658 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21659 BPF_MOV | BPF_K, insn->dst_reg,
21660 0, 0, 0),
21661 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
21662 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21663 BPF_NEG | BPF_K, insn->dst_reg,
21664 0, 0, 0),
21665 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21666 *insn,
21667 };
21668 struct bpf_insn chk_and_smod[] = {
21669 /* [R,W]x mod 0 -> [R,W]x */
21670 /* [R,W]x mod -1 -> 0 */
21671 BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
21672 BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
21673 BPF_ADD | BPF_K, BPF_REG_AX,
21674 0, 0, 1),
21675 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21676 BPF_JGT | BPF_K, BPF_REG_AX,
21677 0, 3, 1),
21678 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
21679 BPF_JEQ | BPF_K, BPF_REG_AX,
21680 0, 3 + (is64 ? 0 : 1), 1),
21681 BPF_MOV32_IMM(insn->dst_reg, 0),
21682 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21683 *insn,
21684 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
21685 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
21686 };
21687
21688 if (is_sdiv) {
21689 patchlet = chk_and_sdiv;
21690 cnt = ARRAY_SIZE(chk_and_sdiv);
21691 } else if (is_smod) {
21692 patchlet = chk_and_smod;
21693 cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
21694 } else {
21695 patchlet = isdiv ? chk_and_div : chk_and_mod;
21696 cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
21697 ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
21698 }
21699
21700 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
21701 if (!new_prog)
21702 return -ENOMEM;
21703
21704 delta += cnt - 1;
21705 env->prog = prog = new_prog;
21706 insn = new_prog->insnsi + i + delta;
21707 goto next_insn;
21708 }
21709
21710 /* Make it impossible to de-reference a userspace address */
21711 if (BPF_CLASS(insn->code) == BPF_LDX &&
21712 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
21713 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
21714 struct bpf_insn *patch = &insn_buf[0];
21715 u64 uaddress_limit = bpf_arch_uaddress_limit();
21716
21717 if (!uaddress_limit)
21718 goto next_insn;
21719
21720 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
21721 if (insn->off)
21722 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
21723 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
21724 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
21725 *patch++ = *insn;
21726 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
21727 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
21728
21729 cnt = patch - insn_buf;
21730 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21731 if (!new_prog)
21732 return -ENOMEM;
21733
21734 delta += cnt - 1;
21735 env->prog = prog = new_prog;
21736 insn = new_prog->insnsi + i + delta;
21737 goto next_insn;
21738 }
21739
21740 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
21741 if (BPF_CLASS(insn->code) == BPF_LD &&
21742 (BPF_MODE(insn->code) == BPF_ABS ||
21743 BPF_MODE(insn->code) == BPF_IND)) {
21744 cnt = env->ops->gen_ld_abs(insn, insn_buf);
21745 if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
21746 verbose(env, "bpf verifier is misconfigured\n");
21747 return -EINVAL;
21748 }
21749
21750 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21751 if (!new_prog)
21752 return -ENOMEM;
21753
21754 delta += cnt - 1;
21755 env->prog = prog = new_prog;
21756 insn = new_prog->insnsi + i + delta;
21757 goto next_insn;
21758 }
21759
21760 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
21761 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
21762 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
21763 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
21764 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
21765 struct bpf_insn *patch = &insn_buf[0];
21766 bool issrc, isneg, isimm;
21767 u32 off_reg;
21768
21769 aux = &env->insn_aux_data[i + delta];
21770 if (!aux->alu_state ||
21771 aux->alu_state == BPF_ALU_NON_POINTER)
21772 goto next_insn;
21773
21774 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
21775 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
21776 BPF_ALU_SANITIZE_SRC;
21777 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
21778
21779 off_reg = issrc ? insn->src_reg : insn->dst_reg;
21780 if (isimm) {
21781 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
21782 } else {
21783 if (isneg)
21784 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
21785 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
21786 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
21787 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
21788 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
21789 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
21790 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
21791 }
21792 if (!issrc)
21793 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
21794 insn->src_reg = BPF_REG_AX;
21795 if (isneg)
21796 insn->code = insn->code == code_add ?
21797 code_sub : code_add;
21798 *patch++ = *insn;
21799 if (issrc && isneg && !isimm)
21800 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
21801 cnt = patch - insn_buf;
21802
21803 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21804 if (!new_prog)
21805 return -ENOMEM;
21806
21807 delta += cnt - 1;
21808 env->prog = prog = new_prog;
21809 insn = new_prog->insnsi + i + delta;
21810 goto next_insn;
21811 }
21812
21813 if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
21814 int stack_off_cnt = -stack_depth - 16;
21815
21816 /*
21817 * Two 8 byte slots, depth-16 stores the count, and
21818 * depth-8 stores the start timestamp of the loop.
21819 *
21820 * The starting value of count is BPF_MAX_TIMED_LOOPS
21821 * (0xffff). Every iteration loads it and subs it by 1,
21822 * until the value becomes 0 in AX (thus, 1 in stack),
21823 * after which we call arch_bpf_timed_may_goto, which
21824 * either sets AX to 0xffff to keep looping, or to 0
21825 * upon timeout. AX is then stored into the stack. In
21826 * the next iteration, we either see 0 and break out, or
21827 * continue iterating until the next time value is 0
21828 * after subtraction, rinse and repeat.
21829 */
21830 stack_depth_extra = 16;
21831 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
21832 if (insn->off >= 0)
21833 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
21834 else
21835 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
21836 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
21837 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
21838 /*
21839 * AX is used as an argument to pass in stack_off_cnt
21840 * (to add to r10/fp), and also as the return value of
21841 * the call to arch_bpf_timed_may_goto.
21842 */
21843 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
21844 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
21845 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
21846 cnt = 7;
21847
21848 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21849 if (!new_prog)
21850 return -ENOMEM;
21851
21852 delta += cnt - 1;
21853 env->prog = prog = new_prog;
21854 insn = new_prog->insnsi + i + delta;
21855 goto next_insn;
21856 } else if (is_may_goto_insn(insn)) {
21857 int stack_off = -stack_depth - 8;
21858
21859 stack_depth_extra = 8;
21860 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
21861 if (insn->off >= 0)
21862 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
21863 else
21864 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
21865 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
21866 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
21867 cnt = 4;
21868
21869 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21870 if (!new_prog)
21871 return -ENOMEM;
21872
21873 delta += cnt - 1;
21874 env->prog = prog = new_prog;
21875 insn = new_prog->insnsi + i + delta;
21876 goto next_insn;
21877 }
21878
21879 if (insn->code != (BPF_JMP | BPF_CALL))
21880 goto next_insn;
21881 if (insn->src_reg == BPF_PSEUDO_CALL)
21882 goto next_insn;
21883 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
21884 ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
21885 if (ret)
21886 return ret;
21887 if (cnt == 0)
21888 goto next_insn;
21889
21890 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21891 if (!new_prog)
21892 return -ENOMEM;
21893
21894 delta += cnt - 1;
21895 env->prog = prog = new_prog;
21896 insn = new_prog->insnsi + i + delta;
21897 goto next_insn;
21898 }
21899
21900 /* Skip inlining the helper call if the JIT does it. */
21901 if (bpf_jit_inlines_helper_call(insn->imm))
21902 goto next_insn;
21903
21904 if (insn->imm == BPF_FUNC_get_route_realm)
21905 prog->dst_needed = 1;
21906 if (insn->imm == BPF_FUNC_get_prandom_u32)
21907 bpf_user_rnd_init_once();
21908 if (insn->imm == BPF_FUNC_override_return)
21909 prog->kprobe_override = 1;
21910 if (insn->imm == BPF_FUNC_tail_call) {
21911 /* If we tail call into other programs, we
21912 * cannot make any assumptions since they can
21913 * be replaced dynamically during runtime in
21914 * the program array.
21915 */
21916 prog->cb_access = 1;
21917 if (!allow_tail_call_in_subprogs(env))
21918 prog->aux->stack_depth = MAX_BPF_STACK;
21919 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
21920
21921 /* mark bpf_tail_call as different opcode to avoid
21922 * conditional branch in the interpreter for every normal
21923 * call and to prevent accidental JITing by JIT compiler
21924 * that doesn't support bpf_tail_call yet
21925 */
21926 insn->imm = 0;
21927 insn->code = BPF_JMP | BPF_TAIL_CALL;
21928
21929 aux = &env->insn_aux_data[i + delta];
21930 if (env->bpf_capable && !prog->blinding_requested &&
21931 prog->jit_requested &&
21932 !bpf_map_key_poisoned(aux) &&
21933 !bpf_map_ptr_poisoned(aux) &&
21934 !bpf_map_ptr_unpriv(aux)) {
21935 struct bpf_jit_poke_descriptor desc = {
21936 .reason = BPF_POKE_REASON_TAIL_CALL,
21937 .tail_call.map = aux->map_ptr_state.map_ptr,
21938 .tail_call.key = bpf_map_key_immediate(aux),
21939 .insn_idx = i + delta,
21940 };
21941
21942 ret = bpf_jit_add_poke_descriptor(prog, &desc);
21943 if (ret < 0) {
21944 verbose(env, "adding tail call poke descriptor failed\n");
21945 return ret;
21946 }
21947
21948 insn->imm = ret + 1;
21949 goto next_insn;
21950 }
21951
21952 if (!bpf_map_ptr_unpriv(aux))
21953 goto next_insn;
21954
21955 /* instead of changing every JIT dealing with tail_call
21956 * emit two extra insns:
21957 * if (index >= max_entries) goto out;
21958 * index &= array->index_mask;
21959 * to avoid out-of-bounds cpu speculation
21960 */
21961 if (bpf_map_ptr_poisoned(aux)) {
21962 verbose(env, "tail_call abusing map_ptr\n");
21963 return -EINVAL;
21964 }
21965
21966 map_ptr = aux->map_ptr_state.map_ptr;
21967 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
21968 map_ptr->max_entries, 2);
21969 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
21970 container_of(map_ptr,
21971 struct bpf_array,
21972 map)->index_mask);
21973 insn_buf[2] = *insn;
21974 cnt = 3;
21975 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
21976 if (!new_prog)
21977 return -ENOMEM;
21978
21979 delta += cnt - 1;
21980 env->prog = prog = new_prog;
21981 insn = new_prog->insnsi + i + delta;
21982 goto next_insn;
21983 }
21984
21985 if (insn->imm == BPF_FUNC_timer_set_callback) {
21986 /* The verifier will process callback_fn as many times as necessary
21987 * with different maps and the register states prepared by
21988 * set_timer_callback_state will be accurate.
21989 *
21990 * The following use case is valid:
21991 * map1 is shared by prog1, prog2, prog3.
21992 * prog1 calls bpf_timer_init for some map1 elements
21993 * prog2 calls bpf_timer_set_callback for some map1 elements.
21994 * Those that were not bpf_timer_init-ed will return -EINVAL.
21995 * prog3 calls bpf_timer_start for some map1 elements.
21996 * Those that were not both bpf_timer_init-ed and
21997 * bpf_timer_set_callback-ed will return -EINVAL.
21998 */
21999 struct bpf_insn ld_addrs[2] = {
22000 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
22001 };
22002
22003 insn_buf[0] = ld_addrs[0];
22004 insn_buf[1] = ld_addrs[1];
22005 insn_buf[2] = *insn;
22006 cnt = 3;
22007
22008 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22009 if (!new_prog)
22010 return -ENOMEM;
22011
22012 delta += cnt - 1;
22013 env->prog = prog = new_prog;
22014 insn = new_prog->insnsi + i + delta;
22015 goto patch_call_imm;
22016 }
22017
22018 if (is_storage_get_function(insn->imm)) {
22019 if (!in_sleepable(env) ||
22020 env->insn_aux_data[i + delta].storage_get_func_atomic)
22021 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
22022 else
22023 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
22024 insn_buf[1] = *insn;
22025 cnt = 2;
22026
22027 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22028 if (!new_prog)
22029 return -ENOMEM;
22030
22031 delta += cnt - 1;
22032 env->prog = prog = new_prog;
22033 insn = new_prog->insnsi + i + delta;
22034 goto patch_call_imm;
22035 }
22036
22037 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
22038 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
22039 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
22040 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
22041 */
22042 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
22043 insn_buf[1] = *insn;
22044 cnt = 2;
22045
22046 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22047 if (!new_prog)
22048 return -ENOMEM;
22049
22050 delta += cnt - 1;
22051 env->prog = prog = new_prog;
22052 insn = new_prog->insnsi + i + delta;
22053 goto patch_call_imm;
22054 }
22055
22056 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
22057 * and other inlining handlers are currently limited to 64 bit
22058 * only.
22059 */
22060 if (prog->jit_requested && BITS_PER_LONG == 64 &&
22061 (insn->imm == BPF_FUNC_map_lookup_elem ||
22062 insn->imm == BPF_FUNC_map_update_elem ||
22063 insn->imm == BPF_FUNC_map_delete_elem ||
22064 insn->imm == BPF_FUNC_map_push_elem ||
22065 insn->imm == BPF_FUNC_map_pop_elem ||
22066 insn->imm == BPF_FUNC_map_peek_elem ||
22067 insn->imm == BPF_FUNC_redirect_map ||
22068 insn->imm == BPF_FUNC_for_each_map_elem ||
22069 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
22070 aux = &env->insn_aux_data[i + delta];
22071 if (bpf_map_ptr_poisoned(aux))
22072 goto patch_call_imm;
22073
22074 map_ptr = aux->map_ptr_state.map_ptr;
22075 ops = map_ptr->ops;
22076 if (insn->imm == BPF_FUNC_map_lookup_elem &&
22077 ops->map_gen_lookup) {
22078 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
22079 if (cnt == -EOPNOTSUPP)
22080 goto patch_map_ops_generic;
22081 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
22082 verbose(env, "bpf verifier is misconfigured\n");
22083 return -EINVAL;
22084 }
22085
22086 new_prog = bpf_patch_insn_data(env, i + delta,
22087 insn_buf, cnt);
22088 if (!new_prog)
22089 return -ENOMEM;
22090
22091 delta += cnt - 1;
22092 env->prog = prog = new_prog;
22093 insn = new_prog->insnsi + i + delta;
22094 goto next_insn;
22095 }
22096
22097 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
22098 (void *(*)(struct bpf_map *map, void *key))NULL));
22099 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
22100 (long (*)(struct bpf_map *map, void *key))NULL));
22101 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
22102 (long (*)(struct bpf_map *map, void *key, void *value,
22103 u64 flags))NULL));
22104 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
22105 (long (*)(struct bpf_map *map, void *value,
22106 u64 flags))NULL));
22107 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
22108 (long (*)(struct bpf_map *map, void *value))NULL));
22109 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
22110 (long (*)(struct bpf_map *map, void *value))NULL));
22111 BUILD_BUG_ON(!__same_type(ops->map_redirect,
22112 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
22113 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
22114 (long (*)(struct bpf_map *map,
22115 bpf_callback_t callback_fn,
22116 void *callback_ctx,
22117 u64 flags))NULL));
22118 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
22119 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
22120
22121 patch_map_ops_generic:
22122 switch (insn->imm) {
22123 case BPF_FUNC_map_lookup_elem:
22124 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
22125 goto next_insn;
22126 case BPF_FUNC_map_update_elem:
22127 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
22128 goto next_insn;
22129 case BPF_FUNC_map_delete_elem:
22130 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
22131 goto next_insn;
22132 case BPF_FUNC_map_push_elem:
22133 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
22134 goto next_insn;
22135 case BPF_FUNC_map_pop_elem:
22136 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
22137 goto next_insn;
22138 case BPF_FUNC_map_peek_elem:
22139 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
22140 goto next_insn;
22141 case BPF_FUNC_redirect_map:
22142 insn->imm = BPF_CALL_IMM(ops->map_redirect);
22143 goto next_insn;
22144 case BPF_FUNC_for_each_map_elem:
22145 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
22146 goto next_insn;
22147 case BPF_FUNC_map_lookup_percpu_elem:
22148 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
22149 goto next_insn;
22150 }
22151
22152 goto patch_call_imm;
22153 }
22154
22155 /* Implement bpf_jiffies64 inline. */
22156 if (prog->jit_requested && BITS_PER_LONG == 64 &&
22157 insn->imm == BPF_FUNC_jiffies64) {
22158 struct bpf_insn ld_jiffies_addr[2] = {
22159 BPF_LD_IMM64(BPF_REG_0,
22160 (unsigned long)&jiffies),
22161 };
22162
22163 insn_buf[0] = ld_jiffies_addr[0];
22164 insn_buf[1] = ld_jiffies_addr[1];
22165 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
22166 BPF_REG_0, 0);
22167 cnt = 3;
22168
22169 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
22170 cnt);
22171 if (!new_prog)
22172 return -ENOMEM;
22173
22174 delta += cnt - 1;
22175 env->prog = prog = new_prog;
22176 insn = new_prog->insnsi + i + delta;
22177 goto next_insn;
22178 }
22179
22180 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
22181 /* Implement bpf_get_smp_processor_id() inline. */
22182 if (insn->imm == BPF_FUNC_get_smp_processor_id &&
22183 verifier_inlines_helper_call(env, insn->imm)) {
22184 /* BPF_FUNC_get_smp_processor_id inlining is an
22185 * optimization, so if cpu_number is ever
22186 * changed in some incompatible and hard to support
22187 * way, it's fine to back out this inlining logic
22188 */
22189 #ifdef CONFIG_SMP
22190 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
22191 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
22192 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
22193 cnt = 3;
22194 #else
22195 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
22196 cnt = 1;
22197 #endif
22198 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22199 if (!new_prog)
22200 return -ENOMEM;
22201
22202 delta += cnt - 1;
22203 env->prog = prog = new_prog;
22204 insn = new_prog->insnsi + i + delta;
22205 goto next_insn;
22206 }
22207 #endif
22208 /* Implement bpf_get_func_arg inline. */
22209 if (prog_type == BPF_PROG_TYPE_TRACING &&
22210 insn->imm == BPF_FUNC_get_func_arg) {
22211 /* Load nr_args from ctx - 8 */
22212 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
22213 insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
22214 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
22215 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
22216 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
22217 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
22218 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
22219 insn_buf[7] = BPF_JMP_A(1);
22220 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
22221 cnt = 9;
22222
22223 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22224 if (!new_prog)
22225 return -ENOMEM;
22226
22227 delta += cnt - 1;
22228 env->prog = prog = new_prog;
22229 insn = new_prog->insnsi + i + delta;
22230 goto next_insn;
22231 }
22232
22233 /* Implement bpf_get_func_ret inline. */
22234 if (prog_type == BPF_PROG_TYPE_TRACING &&
22235 insn->imm == BPF_FUNC_get_func_ret) {
22236 if (eatype == BPF_TRACE_FEXIT ||
22237 eatype == BPF_MODIFY_RETURN) {
22238 /* Load nr_args from ctx - 8 */
22239 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
22240 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
22241 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
22242 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
22243 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
22244 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
22245 cnt = 6;
22246 } else {
22247 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
22248 cnt = 1;
22249 }
22250
22251 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22252 if (!new_prog)
22253 return -ENOMEM;
22254
22255 delta += cnt - 1;
22256 env->prog = prog = new_prog;
22257 insn = new_prog->insnsi + i + delta;
22258 goto next_insn;
22259 }
22260
22261 /* Implement get_func_arg_cnt inline. */
22262 if (prog_type == BPF_PROG_TYPE_TRACING &&
22263 insn->imm == BPF_FUNC_get_func_arg_cnt) {
22264 /* Load nr_args from ctx - 8 */
22265 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
22266
22267 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
22268 if (!new_prog)
22269 return -ENOMEM;
22270
22271 env->prog = prog = new_prog;
22272 insn = new_prog->insnsi + i + delta;
22273 goto next_insn;
22274 }
22275
22276 /* Implement bpf_get_func_ip inline. */
22277 if (prog_type == BPF_PROG_TYPE_TRACING &&
22278 insn->imm == BPF_FUNC_get_func_ip) {
22279 /* Load IP address from ctx - 16 */
22280 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
22281
22282 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
22283 if (!new_prog)
22284 return -ENOMEM;
22285
22286 env->prog = prog = new_prog;
22287 insn = new_prog->insnsi + i + delta;
22288 goto next_insn;
22289 }
22290
22291 /* Implement bpf_get_branch_snapshot inline. */
22292 if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
22293 prog->jit_requested && BITS_PER_LONG == 64 &&
22294 insn->imm == BPF_FUNC_get_branch_snapshot) {
22295 /* We are dealing with the following func protos:
22296 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
22297 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
22298 */
22299 const u32 br_entry_size = sizeof(struct perf_branch_entry);
22300
22301 /* struct perf_branch_entry is part of UAPI and is
22302 * used as an array element, so extremely unlikely to
22303 * ever grow or shrink
22304 */
22305 BUILD_BUG_ON(br_entry_size != 24);
22306
22307 /* if (unlikely(flags)) return -EINVAL */
22308 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
22309
22310 /* Transform size (bytes) into number of entries (cnt = size / 24).
22311 * But to avoid expensive division instruction, we implement
22312 * divide-by-3 through multiplication, followed by further
22313 * division by 8 through 3-bit right shift.
22314 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
22315 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
22316 *
22317 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
22318 */
22319 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
22320 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
22321 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
22322
22323 /* call perf_snapshot_branch_stack implementation */
22324 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
22325 /* if (entry_cnt == 0) return -ENOENT */
22326 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
22327 /* return entry_cnt * sizeof(struct perf_branch_entry) */
22328 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
22329 insn_buf[7] = BPF_JMP_A(3);
22330 /* return -EINVAL; */
22331 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
22332 insn_buf[9] = BPF_JMP_A(1);
22333 /* return -ENOENT; */
22334 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
22335 cnt = 11;
22336
22337 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22338 if (!new_prog)
22339 return -ENOMEM;
22340
22341 delta += cnt - 1;
22342 env->prog = prog = new_prog;
22343 insn = new_prog->insnsi + i + delta;
22344 goto next_insn;
22345 }
22346
22347 /* Implement bpf_kptr_xchg inline */
22348 if (prog->jit_requested && BITS_PER_LONG == 64 &&
22349 insn->imm == BPF_FUNC_kptr_xchg &&
22350 bpf_jit_supports_ptr_xchg()) {
22351 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
22352 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
22353 cnt = 2;
22354
22355 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22356 if (!new_prog)
22357 return -ENOMEM;
22358
22359 delta += cnt - 1;
22360 env->prog = prog = new_prog;
22361 insn = new_prog->insnsi + i + delta;
22362 goto next_insn;
22363 }
22364 patch_call_imm:
22365 fn = env->ops->get_func_proto(insn->imm, env->prog);
22366 /* all functions that have prototype and verifier allowed
22367 * programs to call them, must be real in-kernel functions
22368 */
22369 if (!fn->func) {
22370 verbose(env,
22371 "kernel subsystem misconfigured func %s#%d\n",
22372 func_id_name(insn->imm), insn->imm);
22373 return -EFAULT;
22374 }
22375 insn->imm = fn->func - __bpf_call_base;
22376 next_insn:
22377 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
22378 subprogs[cur_subprog].stack_depth += stack_depth_extra;
22379 subprogs[cur_subprog].stack_extra = stack_depth_extra;
22380
22381 stack_depth = subprogs[cur_subprog].stack_depth;
22382 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
22383 verbose(env, "stack size %d(extra %d) is too large\n",
22384 stack_depth, stack_depth_extra);
22385 return -EINVAL;
22386 }
22387 cur_subprog++;
22388 stack_depth = subprogs[cur_subprog].stack_depth;
22389 stack_depth_extra = 0;
22390 }
22391 i++;
22392 insn++;
22393 }
22394
22395 env->prog->aux->stack_depth = subprogs[0].stack_depth;
22396 for (i = 0; i < env->subprog_cnt; i++) {
22397 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
22398 int subprog_start = subprogs[i].start;
22399 int stack_slots = subprogs[i].stack_extra / 8;
22400 int slots = delta, cnt = 0;
22401
22402 if (!stack_slots)
22403 continue;
22404 /* We need two slots in case timed may_goto is supported. */
22405 if (stack_slots > slots) {
22406 verbose(env, "verifier bug: stack_slots supports may_goto only\n");
22407 return -EFAULT;
22408 }
22409
22410 stack_depth = subprogs[i].stack_depth;
22411 if (bpf_jit_supports_timed_may_goto()) {
22412 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
22413 BPF_MAX_TIMED_LOOPS);
22414 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
22415 } else {
22416 /* Add ST insn to subprog prologue to init extra stack */
22417 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
22418 BPF_MAX_LOOPS);
22419 }
22420 /* Copy first actual insn to preserve it */
22421 insn_buf[cnt++] = env->prog->insnsi[subprog_start];
22422
22423 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
22424 if (!new_prog)
22425 return -ENOMEM;
22426 env->prog = prog = new_prog;
22427 /*
22428 * If may_goto is a first insn of a prog there could be a jmp
22429 * insn that points to it, hence adjust all such jmps to point
22430 * to insn after BPF_ST that inits may_goto count.
22431 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
22432 */
22433 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
22434 }
22435
22436 /* Since poke tab is now finalized, publish aux to tracker. */
22437 for (i = 0; i < prog->aux->size_poke_tab; i++) {
22438 map_ptr = prog->aux->poke_tab[i].tail_call.map;
22439 if (!map_ptr->ops->map_poke_track ||
22440 !map_ptr->ops->map_poke_untrack ||
22441 !map_ptr->ops->map_poke_run) {
22442 verbose(env, "bpf verifier is misconfigured\n");
22443 return -EINVAL;
22444 }
22445
22446 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
22447 if (ret < 0) {
22448 verbose(env, "tracking tail call prog failed\n");
22449 return ret;
22450 }
22451 }
22452
22453 sort_kfunc_descs_by_imm_off(env->prog);
22454
22455 return 0;
22456 }
22457
inline_bpf_loop(struct bpf_verifier_env * env,int position,s32 stack_base,u32 callback_subprogno,u32 * total_cnt)22458 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
22459 int position,
22460 s32 stack_base,
22461 u32 callback_subprogno,
22462 u32 *total_cnt)
22463 {
22464 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
22465 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
22466 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
22467 int reg_loop_max = BPF_REG_6;
22468 int reg_loop_cnt = BPF_REG_7;
22469 int reg_loop_ctx = BPF_REG_8;
22470
22471 struct bpf_insn *insn_buf = env->insn_buf;
22472 struct bpf_prog *new_prog;
22473 u32 callback_start;
22474 u32 call_insn_offset;
22475 s32 callback_offset;
22476 u32 cnt = 0;
22477
22478 /* This represents an inlined version of bpf_iter.c:bpf_loop,
22479 * be careful to modify this code in sync.
22480 */
22481
22482 /* Return error and jump to the end of the patch if
22483 * expected number of iterations is too big.
22484 */
22485 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
22486 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
22487 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
22488 /* spill R6, R7, R8 to use these as loop vars */
22489 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
22490 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
22491 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
22492 /* initialize loop vars */
22493 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
22494 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
22495 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
22496 /* loop header,
22497 * if reg_loop_cnt >= reg_loop_max skip the loop body
22498 */
22499 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
22500 /* callback call,
22501 * correct callback offset would be set after patching
22502 */
22503 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
22504 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
22505 insn_buf[cnt++] = BPF_CALL_REL(0);
22506 /* increment loop counter */
22507 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
22508 /* jump to loop header if callback returned 0 */
22509 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
22510 /* return value of bpf_loop,
22511 * set R0 to the number of iterations
22512 */
22513 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
22514 /* restore original values of R6, R7, R8 */
22515 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
22516 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
22517 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
22518
22519 *total_cnt = cnt;
22520 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
22521 if (!new_prog)
22522 return new_prog;
22523
22524 /* callback start is known only after patching */
22525 callback_start = env->subprog_info[callback_subprogno].start;
22526 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
22527 call_insn_offset = position + 12;
22528 callback_offset = callback_start - call_insn_offset - 1;
22529 new_prog->insnsi[call_insn_offset].imm = callback_offset;
22530
22531 return new_prog;
22532 }
22533
is_bpf_loop_call(struct bpf_insn * insn)22534 static bool is_bpf_loop_call(struct bpf_insn *insn)
22535 {
22536 return insn->code == (BPF_JMP | BPF_CALL) &&
22537 insn->src_reg == 0 &&
22538 insn->imm == BPF_FUNC_loop;
22539 }
22540
22541 /* For all sub-programs in the program (including main) check
22542 * insn_aux_data to see if there are bpf_loop calls that require
22543 * inlining. If such calls are found the calls are replaced with a
22544 * sequence of instructions produced by `inline_bpf_loop` function and
22545 * subprog stack_depth is increased by the size of 3 registers.
22546 * This stack space is used to spill values of the R6, R7, R8. These
22547 * registers are used to store the loop bound, counter and context
22548 * variables.
22549 */
optimize_bpf_loop(struct bpf_verifier_env * env)22550 static int optimize_bpf_loop(struct bpf_verifier_env *env)
22551 {
22552 struct bpf_subprog_info *subprogs = env->subprog_info;
22553 int i, cur_subprog = 0, cnt, delta = 0;
22554 struct bpf_insn *insn = env->prog->insnsi;
22555 int insn_cnt = env->prog->len;
22556 u16 stack_depth = subprogs[cur_subprog].stack_depth;
22557 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
22558 u16 stack_depth_extra = 0;
22559
22560 for (i = 0; i < insn_cnt; i++, insn++) {
22561 struct bpf_loop_inline_state *inline_state =
22562 &env->insn_aux_data[i + delta].loop_inline_state;
22563
22564 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
22565 struct bpf_prog *new_prog;
22566
22567 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
22568 new_prog = inline_bpf_loop(env,
22569 i + delta,
22570 -(stack_depth + stack_depth_extra),
22571 inline_state->callback_subprogno,
22572 &cnt);
22573 if (!new_prog)
22574 return -ENOMEM;
22575
22576 delta += cnt - 1;
22577 env->prog = new_prog;
22578 insn = new_prog->insnsi + i + delta;
22579 }
22580
22581 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
22582 subprogs[cur_subprog].stack_depth += stack_depth_extra;
22583 cur_subprog++;
22584 stack_depth = subprogs[cur_subprog].stack_depth;
22585 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
22586 stack_depth_extra = 0;
22587 }
22588 }
22589
22590 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
22591
22592 return 0;
22593 }
22594
22595 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
22596 * adjust subprograms stack depth when possible.
22597 */
remove_fastcall_spills_fills(struct bpf_verifier_env * env)22598 static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
22599 {
22600 struct bpf_subprog_info *subprog = env->subprog_info;
22601 struct bpf_insn_aux_data *aux = env->insn_aux_data;
22602 struct bpf_insn *insn = env->prog->insnsi;
22603 int insn_cnt = env->prog->len;
22604 u32 spills_num;
22605 bool modified = false;
22606 int i, j;
22607
22608 for (i = 0; i < insn_cnt; i++, insn++) {
22609 if (aux[i].fastcall_spills_num > 0) {
22610 spills_num = aux[i].fastcall_spills_num;
22611 /* NOPs would be removed by opt_remove_nops() */
22612 for (j = 1; j <= spills_num; ++j) {
22613 *(insn - j) = NOP;
22614 *(insn + j) = NOP;
22615 }
22616 modified = true;
22617 }
22618 if ((subprog + 1)->start == i + 1) {
22619 if (modified && !subprog->keep_fastcall_stack)
22620 subprog->stack_depth = -subprog->fastcall_stack_off;
22621 subprog++;
22622 modified = false;
22623 }
22624 }
22625
22626 return 0;
22627 }
22628
free_states(struct bpf_verifier_env * env)22629 static void free_states(struct bpf_verifier_env *env)
22630 {
22631 struct bpf_verifier_state_list *sl;
22632 struct list_head *head, *pos, *tmp;
22633 int i;
22634
22635 list_for_each_safe(pos, tmp, &env->free_list) {
22636 sl = container_of(pos, struct bpf_verifier_state_list, node);
22637 free_verifier_state(&sl->state, false);
22638 kfree(sl);
22639 }
22640 INIT_LIST_HEAD(&env->free_list);
22641
22642 if (!env->explored_states)
22643 return;
22644
22645 for (i = 0; i < state_htab_size(env); i++) {
22646 head = &env->explored_states[i];
22647
22648 list_for_each_safe(pos, tmp, head) {
22649 sl = container_of(pos, struct bpf_verifier_state_list, node);
22650 free_verifier_state(&sl->state, false);
22651 kfree(sl);
22652 }
22653 INIT_LIST_HEAD(&env->explored_states[i]);
22654 }
22655 }
22656
do_check_common(struct bpf_verifier_env * env,int subprog)22657 static int do_check_common(struct bpf_verifier_env *env, int subprog)
22658 {
22659 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
22660 struct bpf_subprog_info *sub = subprog_info(env, subprog);
22661 struct bpf_prog_aux *aux = env->prog->aux;
22662 struct bpf_verifier_state *state;
22663 struct bpf_reg_state *regs;
22664 int ret, i;
22665
22666 env->prev_linfo = NULL;
22667 env->pass_cnt++;
22668
22669 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
22670 if (!state)
22671 return -ENOMEM;
22672 state->curframe = 0;
22673 state->speculative = false;
22674 state->branches = 1;
22675 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
22676 if (!state->frame[0]) {
22677 kfree(state);
22678 return -ENOMEM;
22679 }
22680 env->cur_state = state;
22681 init_func_state(env, state->frame[0],
22682 BPF_MAIN_FUNC /* callsite */,
22683 0 /* frameno */,
22684 subprog);
22685 state->first_insn_idx = env->subprog_info[subprog].start;
22686 state->last_insn_idx = -1;
22687
22688 regs = state->frame[state->curframe]->regs;
22689 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
22690 const char *sub_name = subprog_name(env, subprog);
22691 struct bpf_subprog_arg_info *arg;
22692 struct bpf_reg_state *reg;
22693
22694 verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
22695 ret = btf_prepare_func_args(env, subprog);
22696 if (ret)
22697 goto out;
22698
22699 if (subprog_is_exc_cb(env, subprog)) {
22700 state->frame[0]->in_exception_callback_fn = true;
22701 /* We have already ensured that the callback returns an integer, just
22702 * like all global subprogs. We need to determine it only has a single
22703 * scalar argument.
22704 */
22705 if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
22706 verbose(env, "exception cb only supports single integer argument\n");
22707 ret = -EINVAL;
22708 goto out;
22709 }
22710 }
22711 for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
22712 arg = &sub->args[i - BPF_REG_1];
22713 reg = ®s[i];
22714
22715 if (arg->arg_type == ARG_PTR_TO_CTX) {
22716 reg->type = PTR_TO_CTX;
22717 mark_reg_known_zero(env, regs, i);
22718 } else if (arg->arg_type == ARG_ANYTHING) {
22719 reg->type = SCALAR_VALUE;
22720 mark_reg_unknown(env, regs, i);
22721 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
22722 /* assume unspecial LOCAL dynptr type */
22723 __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
22724 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
22725 reg->type = PTR_TO_MEM;
22726 if (arg->arg_type & PTR_MAYBE_NULL)
22727 reg->type |= PTR_MAYBE_NULL;
22728 mark_reg_known_zero(env, regs, i);
22729 reg->mem_size = arg->mem_size;
22730 reg->id = ++env->id_gen;
22731 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
22732 reg->type = PTR_TO_BTF_ID;
22733 if (arg->arg_type & PTR_MAYBE_NULL)
22734 reg->type |= PTR_MAYBE_NULL;
22735 if (arg->arg_type & PTR_UNTRUSTED)
22736 reg->type |= PTR_UNTRUSTED;
22737 if (arg->arg_type & PTR_TRUSTED)
22738 reg->type |= PTR_TRUSTED;
22739 mark_reg_known_zero(env, regs, i);
22740 reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
22741 reg->btf_id = arg->btf_id;
22742 reg->id = ++env->id_gen;
22743 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
22744 /* caller can pass either PTR_TO_ARENA or SCALAR */
22745 mark_reg_unknown(env, regs, i);
22746 } else {
22747 WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
22748 i - BPF_REG_1, arg->arg_type);
22749 ret = -EFAULT;
22750 goto out;
22751 }
22752 }
22753 } else {
22754 /* if main BPF program has associated BTF info, validate that
22755 * it's matching expected signature, and otherwise mark BTF
22756 * info for main program as unreliable
22757 */
22758 if (env->prog->aux->func_info_aux) {
22759 ret = btf_prepare_func_args(env, 0);
22760 if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
22761 env->prog->aux->func_info_aux[0].unreliable = true;
22762 }
22763
22764 /* 1st arg to a function */
22765 regs[BPF_REG_1].type = PTR_TO_CTX;
22766 mark_reg_known_zero(env, regs, BPF_REG_1);
22767 }
22768
22769 /* Acquire references for struct_ops program arguments tagged with "__ref" */
22770 if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
22771 for (i = 0; i < aux->ctx_arg_info_size; i++)
22772 aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
22773 acquire_reference(env, 0) : 0;
22774 }
22775
22776 ret = do_check(env);
22777 out:
22778 /* check for NULL is necessary, since cur_state can be freed inside
22779 * do_check() under memory pressure.
22780 */
22781 if (env->cur_state) {
22782 free_verifier_state(env->cur_state, true);
22783 env->cur_state = NULL;
22784 }
22785 while (!pop_stack(env, NULL, NULL, false));
22786 if (!ret && pop_log)
22787 bpf_vlog_reset(&env->log, 0);
22788 free_states(env);
22789 return ret;
22790 }
22791
22792 /* Lazily verify all global functions based on their BTF, if they are called
22793 * from main BPF program or any of subprograms transitively.
22794 * BPF global subprogs called from dead code are not validated.
22795 * All callable global functions must pass verification.
22796 * Otherwise the whole program is rejected.
22797 * Consider:
22798 * int bar(int);
22799 * int foo(int f)
22800 * {
22801 * return bar(f);
22802 * }
22803 * int bar(int b)
22804 * {
22805 * ...
22806 * }
22807 * foo() will be verified first for R1=any_scalar_value. During verification it
22808 * will be assumed that bar() already verified successfully and call to bar()
22809 * from foo() will be checked for type match only. Later bar() will be verified
22810 * independently to check that it's safe for R1=any_scalar_value.
22811 */
do_check_subprogs(struct bpf_verifier_env * env)22812 static int do_check_subprogs(struct bpf_verifier_env *env)
22813 {
22814 struct bpf_prog_aux *aux = env->prog->aux;
22815 struct bpf_func_info_aux *sub_aux;
22816 int i, ret, new_cnt;
22817
22818 if (!aux->func_info)
22819 return 0;
22820
22821 /* exception callback is presumed to be always called */
22822 if (env->exception_callback_subprog)
22823 subprog_aux(env, env->exception_callback_subprog)->called = true;
22824
22825 again:
22826 new_cnt = 0;
22827 for (i = 1; i < env->subprog_cnt; i++) {
22828 if (!subprog_is_global(env, i))
22829 continue;
22830
22831 sub_aux = subprog_aux(env, i);
22832 if (!sub_aux->called || sub_aux->verified)
22833 continue;
22834
22835 env->insn_idx = env->subprog_info[i].start;
22836 WARN_ON_ONCE(env->insn_idx == 0);
22837 ret = do_check_common(env, i);
22838 if (ret) {
22839 return ret;
22840 } else if (env->log.level & BPF_LOG_LEVEL) {
22841 verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
22842 i, subprog_name(env, i));
22843 }
22844
22845 /* We verified new global subprog, it might have called some
22846 * more global subprogs that we haven't verified yet, so we
22847 * need to do another pass over subprogs to verify those.
22848 */
22849 sub_aux->verified = true;
22850 new_cnt++;
22851 }
22852
22853 /* We can't loop forever as we verify at least one global subprog on
22854 * each pass.
22855 */
22856 if (new_cnt)
22857 goto again;
22858
22859 return 0;
22860 }
22861
do_check_main(struct bpf_verifier_env * env)22862 static int do_check_main(struct bpf_verifier_env *env)
22863 {
22864 int ret;
22865
22866 env->insn_idx = 0;
22867 ret = do_check_common(env, 0);
22868 if (!ret)
22869 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
22870 return ret;
22871 }
22872
22873
print_verification_stats(struct bpf_verifier_env * env)22874 static void print_verification_stats(struct bpf_verifier_env *env)
22875 {
22876 int i;
22877
22878 if (env->log.level & BPF_LOG_STATS) {
22879 verbose(env, "verification time %lld usec\n",
22880 div_u64(env->verification_time, 1000));
22881 verbose(env, "stack depth ");
22882 for (i = 0; i < env->subprog_cnt; i++) {
22883 u32 depth = env->subprog_info[i].stack_depth;
22884
22885 verbose(env, "%d", depth);
22886 if (i + 1 < env->subprog_cnt)
22887 verbose(env, "+");
22888 }
22889 verbose(env, "\n");
22890 }
22891 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
22892 "total_states %d peak_states %d mark_read %d\n",
22893 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
22894 env->max_states_per_insn, env->total_states,
22895 env->peak_states, env->longest_mark_read_walk);
22896 }
22897
bpf_prog_ctx_arg_info_init(struct bpf_prog * prog,const struct bpf_ctx_arg_aux * info,u32 cnt)22898 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
22899 const struct bpf_ctx_arg_aux *info, u32 cnt)
22900 {
22901 prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL);
22902 prog->aux->ctx_arg_info_size = cnt;
22903
22904 return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
22905 }
22906
check_struct_ops_btf_id(struct bpf_verifier_env * env)22907 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
22908 {
22909 const struct btf_type *t, *func_proto;
22910 const struct bpf_struct_ops_desc *st_ops_desc;
22911 const struct bpf_struct_ops *st_ops;
22912 const struct btf_member *member;
22913 struct bpf_prog *prog = env->prog;
22914 bool has_refcounted_arg = false;
22915 u32 btf_id, member_idx, member_off;
22916 struct btf *btf;
22917 const char *mname;
22918 int i, err;
22919
22920 if (!prog->gpl_compatible) {
22921 verbose(env, "struct ops programs must have a GPL compatible license\n");
22922 return -EINVAL;
22923 }
22924
22925 if (!prog->aux->attach_btf_id)
22926 return -ENOTSUPP;
22927
22928 btf = prog->aux->attach_btf;
22929 if (btf_is_module(btf)) {
22930 /* Make sure st_ops is valid through the lifetime of env */
22931 env->attach_btf_mod = btf_try_get_module(btf);
22932 if (!env->attach_btf_mod) {
22933 verbose(env, "struct_ops module %s is not found\n",
22934 btf_get_name(btf));
22935 return -ENOTSUPP;
22936 }
22937 }
22938
22939 btf_id = prog->aux->attach_btf_id;
22940 st_ops_desc = bpf_struct_ops_find(btf, btf_id);
22941 if (!st_ops_desc) {
22942 verbose(env, "attach_btf_id %u is not a supported struct\n",
22943 btf_id);
22944 return -ENOTSUPP;
22945 }
22946 st_ops = st_ops_desc->st_ops;
22947
22948 t = st_ops_desc->type;
22949 member_idx = prog->expected_attach_type;
22950 if (member_idx >= btf_type_vlen(t)) {
22951 verbose(env, "attach to invalid member idx %u of struct %s\n",
22952 member_idx, st_ops->name);
22953 return -EINVAL;
22954 }
22955
22956 member = &btf_type_member(t)[member_idx];
22957 mname = btf_name_by_offset(btf, member->name_off);
22958 func_proto = btf_type_resolve_func_ptr(btf, member->type,
22959 NULL);
22960 if (!func_proto) {
22961 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
22962 mname, member_idx, st_ops->name);
22963 return -EINVAL;
22964 }
22965
22966 member_off = __btf_member_bit_offset(t, member) / 8;
22967 err = bpf_struct_ops_supported(st_ops, member_off);
22968 if (err) {
22969 verbose(env, "attach to unsupported member %s of struct %s\n",
22970 mname, st_ops->name);
22971 return err;
22972 }
22973
22974 if (st_ops->check_member) {
22975 err = st_ops->check_member(t, member, prog);
22976
22977 if (err) {
22978 verbose(env, "attach to unsupported member %s of struct %s\n",
22979 mname, st_ops->name);
22980 return err;
22981 }
22982 }
22983
22984 if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
22985 verbose(env, "Private stack not supported by jit\n");
22986 return -EACCES;
22987 }
22988
22989 for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
22990 if (st_ops_desc->arg_info[member_idx].info->refcounted) {
22991 has_refcounted_arg = true;
22992 break;
22993 }
22994 }
22995
22996 /* Tail call is not allowed for programs with refcounted arguments since we
22997 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
22998 */
22999 for (i = 0; i < env->subprog_cnt; i++) {
23000 if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
23001 verbose(env, "program with __ref argument cannot tail call\n");
23002 return -EINVAL;
23003 }
23004 }
23005
23006 prog->aux->st_ops = st_ops;
23007 prog->aux->attach_st_ops_member_off = member_off;
23008
23009 prog->aux->attach_func_proto = func_proto;
23010 prog->aux->attach_func_name = mname;
23011 env->ops = st_ops->verifier_ops;
23012
23013 return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
23014 st_ops_desc->arg_info[member_idx].cnt);
23015 }
23016 #define SECURITY_PREFIX "security_"
23017
check_attach_modify_return(unsigned long addr,const char * func_name)23018 static int check_attach_modify_return(unsigned long addr, const char *func_name)
23019 {
23020 if (within_error_injection_list(addr) ||
23021 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
23022 return 0;
23023
23024 return -EINVAL;
23025 }
23026
23027 /* list of non-sleepable functions that are otherwise on
23028 * ALLOW_ERROR_INJECTION list
23029 */
23030 BTF_SET_START(btf_non_sleepable_error_inject)
23031 /* Three functions below can be called from sleepable and non-sleepable context.
23032 * Assume non-sleepable from bpf safety point of view.
23033 */
BTF_ID(func,__filemap_add_folio)23034 BTF_ID(func, __filemap_add_folio)
23035 #ifdef CONFIG_FAIL_PAGE_ALLOC
23036 BTF_ID(func, should_fail_alloc_page)
23037 #endif
23038 #ifdef CONFIG_FAILSLAB
23039 BTF_ID(func, should_failslab)
23040 #endif
23041 BTF_SET_END(btf_non_sleepable_error_inject)
23042
23043 static int check_non_sleepable_error_inject(u32 btf_id)
23044 {
23045 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
23046 }
23047
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)23048 int bpf_check_attach_target(struct bpf_verifier_log *log,
23049 const struct bpf_prog *prog,
23050 const struct bpf_prog *tgt_prog,
23051 u32 btf_id,
23052 struct bpf_attach_target_info *tgt_info)
23053 {
23054 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
23055 bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
23056 char trace_symbol[KSYM_SYMBOL_LEN];
23057 const char prefix[] = "btf_trace_";
23058 struct bpf_raw_event_map *btp;
23059 int ret = 0, subprog = -1, i;
23060 const struct btf_type *t;
23061 bool conservative = true;
23062 const char *tname, *fname;
23063 struct btf *btf;
23064 long addr = 0;
23065 struct module *mod = NULL;
23066
23067 if (!btf_id) {
23068 bpf_log(log, "Tracing programs must provide btf_id\n");
23069 return -EINVAL;
23070 }
23071 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
23072 if (!btf) {
23073 bpf_log(log,
23074 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
23075 return -EINVAL;
23076 }
23077 t = btf_type_by_id(btf, btf_id);
23078 if (!t) {
23079 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
23080 return -EINVAL;
23081 }
23082 tname = btf_name_by_offset(btf, t->name_off);
23083 if (!tname) {
23084 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
23085 return -EINVAL;
23086 }
23087 if (tgt_prog) {
23088 struct bpf_prog_aux *aux = tgt_prog->aux;
23089 bool tgt_changes_pkt_data;
23090 bool tgt_might_sleep;
23091
23092 if (bpf_prog_is_dev_bound(prog->aux) &&
23093 !bpf_prog_dev_bound_match(prog, tgt_prog)) {
23094 bpf_log(log, "Target program bound device mismatch");
23095 return -EINVAL;
23096 }
23097
23098 for (i = 0; i < aux->func_info_cnt; i++)
23099 if (aux->func_info[i].type_id == btf_id) {
23100 subprog = i;
23101 break;
23102 }
23103 if (subprog == -1) {
23104 bpf_log(log, "Subprog %s doesn't exist\n", tname);
23105 return -EINVAL;
23106 }
23107 if (aux->func && aux->func[subprog]->aux->exception_cb) {
23108 bpf_log(log,
23109 "%s programs cannot attach to exception callback\n",
23110 prog_extension ? "Extension" : "FENTRY/FEXIT");
23111 return -EINVAL;
23112 }
23113 conservative = aux->func_info_aux[subprog].unreliable;
23114 if (prog_extension) {
23115 if (conservative) {
23116 bpf_log(log,
23117 "Cannot replace static functions\n");
23118 return -EINVAL;
23119 }
23120 if (!prog->jit_requested) {
23121 bpf_log(log,
23122 "Extension programs should be JITed\n");
23123 return -EINVAL;
23124 }
23125 tgt_changes_pkt_data = aux->func
23126 ? aux->func[subprog]->aux->changes_pkt_data
23127 : aux->changes_pkt_data;
23128 if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
23129 bpf_log(log,
23130 "Extension program changes packet data, while original does not\n");
23131 return -EINVAL;
23132 }
23133
23134 tgt_might_sleep = aux->func
23135 ? aux->func[subprog]->aux->might_sleep
23136 : aux->might_sleep;
23137 if (prog->aux->might_sleep && !tgt_might_sleep) {
23138 bpf_log(log,
23139 "Extension program may sleep, while original does not\n");
23140 return -EINVAL;
23141 }
23142 }
23143 if (!tgt_prog->jited) {
23144 bpf_log(log, "Can attach to only JITed progs\n");
23145 return -EINVAL;
23146 }
23147 if (prog_tracing) {
23148 if (aux->attach_tracing_prog) {
23149 /*
23150 * Target program is an fentry/fexit which is already attached
23151 * to another tracing program. More levels of nesting
23152 * attachment are not allowed.
23153 */
23154 bpf_log(log, "Cannot nest tracing program attach more than once\n");
23155 return -EINVAL;
23156 }
23157 } else if (tgt_prog->type == prog->type) {
23158 /*
23159 * To avoid potential call chain cycles, prevent attaching of a
23160 * program extension to another extension. It's ok to attach
23161 * fentry/fexit to extension program.
23162 */
23163 bpf_log(log, "Cannot recursively attach\n");
23164 return -EINVAL;
23165 }
23166 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
23167 prog_extension &&
23168 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
23169 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
23170 /* Program extensions can extend all program types
23171 * except fentry/fexit. The reason is the following.
23172 * The fentry/fexit programs are used for performance
23173 * analysis, stats and can be attached to any program
23174 * type. When extension program is replacing XDP function
23175 * it is necessary to allow performance analysis of all
23176 * functions. Both original XDP program and its program
23177 * extension. Hence attaching fentry/fexit to
23178 * BPF_PROG_TYPE_EXT is allowed. If extending of
23179 * fentry/fexit was allowed it would be possible to create
23180 * long call chain fentry->extension->fentry->extension
23181 * beyond reasonable stack size. Hence extending fentry
23182 * is not allowed.
23183 */
23184 bpf_log(log, "Cannot extend fentry/fexit\n");
23185 return -EINVAL;
23186 }
23187 } else {
23188 if (prog_extension) {
23189 bpf_log(log, "Cannot replace kernel functions\n");
23190 return -EINVAL;
23191 }
23192 }
23193
23194 switch (prog->expected_attach_type) {
23195 case BPF_TRACE_RAW_TP:
23196 if (tgt_prog) {
23197 bpf_log(log,
23198 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
23199 return -EINVAL;
23200 }
23201 if (!btf_type_is_typedef(t)) {
23202 bpf_log(log, "attach_btf_id %u is not a typedef\n",
23203 btf_id);
23204 return -EINVAL;
23205 }
23206 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
23207 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
23208 btf_id, tname);
23209 return -EINVAL;
23210 }
23211 tname += sizeof(prefix) - 1;
23212
23213 /* The func_proto of "btf_trace_##tname" is generated from typedef without argument
23214 * names. Thus using bpf_raw_event_map to get argument names.
23215 */
23216 btp = bpf_get_raw_tracepoint(tname);
23217 if (!btp)
23218 return -EINVAL;
23219 fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
23220 trace_symbol);
23221 bpf_put_raw_tracepoint(btp);
23222
23223 if (fname)
23224 ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
23225
23226 if (!fname || ret < 0) {
23227 bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
23228 prefix, tname);
23229 t = btf_type_by_id(btf, t->type);
23230 if (!btf_type_is_ptr(t))
23231 /* should never happen in valid vmlinux build */
23232 return -EINVAL;
23233 } else {
23234 t = btf_type_by_id(btf, ret);
23235 if (!btf_type_is_func(t))
23236 /* should never happen in valid vmlinux build */
23237 return -EINVAL;
23238 }
23239
23240 t = btf_type_by_id(btf, t->type);
23241 if (!btf_type_is_func_proto(t))
23242 /* should never happen in valid vmlinux build */
23243 return -EINVAL;
23244
23245 break;
23246 case BPF_TRACE_ITER:
23247 if (!btf_type_is_func(t)) {
23248 bpf_log(log, "attach_btf_id %u is not a function\n",
23249 btf_id);
23250 return -EINVAL;
23251 }
23252 t = btf_type_by_id(btf, t->type);
23253 if (!btf_type_is_func_proto(t))
23254 return -EINVAL;
23255 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
23256 if (ret)
23257 return ret;
23258 break;
23259 default:
23260 if (!prog_extension)
23261 return -EINVAL;
23262 fallthrough;
23263 case BPF_MODIFY_RETURN:
23264 case BPF_LSM_MAC:
23265 case BPF_LSM_CGROUP:
23266 case BPF_TRACE_FENTRY:
23267 case BPF_TRACE_FEXIT:
23268 if (!btf_type_is_func(t)) {
23269 bpf_log(log, "attach_btf_id %u is not a function\n",
23270 btf_id);
23271 return -EINVAL;
23272 }
23273 if (prog_extension &&
23274 btf_check_type_match(log, prog, btf, t))
23275 return -EINVAL;
23276 t = btf_type_by_id(btf, t->type);
23277 if (!btf_type_is_func_proto(t))
23278 return -EINVAL;
23279
23280 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
23281 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
23282 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
23283 return -EINVAL;
23284
23285 if (tgt_prog && conservative)
23286 t = NULL;
23287
23288 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
23289 if (ret < 0)
23290 return ret;
23291
23292 if (tgt_prog) {
23293 if (subprog == 0)
23294 addr = (long) tgt_prog->bpf_func;
23295 else
23296 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
23297 } else {
23298 if (btf_is_module(btf)) {
23299 mod = btf_try_get_module(btf);
23300 if (mod)
23301 addr = find_kallsyms_symbol_value(mod, tname);
23302 else
23303 addr = 0;
23304 } else {
23305 addr = kallsyms_lookup_name(tname);
23306 }
23307 if (!addr) {
23308 module_put(mod);
23309 bpf_log(log,
23310 "The address of function %s cannot be found\n",
23311 tname);
23312 return -ENOENT;
23313 }
23314 }
23315
23316 if (prog->sleepable) {
23317 ret = -EINVAL;
23318 switch (prog->type) {
23319 case BPF_PROG_TYPE_TRACING:
23320
23321 /* fentry/fexit/fmod_ret progs can be sleepable if they are
23322 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
23323 */
23324 if (!check_non_sleepable_error_inject(btf_id) &&
23325 within_error_injection_list(addr))
23326 ret = 0;
23327 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
23328 * in the fmodret id set with the KF_SLEEPABLE flag.
23329 */
23330 else {
23331 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
23332 prog);
23333
23334 if (flags && (*flags & KF_SLEEPABLE))
23335 ret = 0;
23336 }
23337 break;
23338 case BPF_PROG_TYPE_LSM:
23339 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
23340 * Only some of them are sleepable.
23341 */
23342 if (bpf_lsm_is_sleepable_hook(btf_id))
23343 ret = 0;
23344 break;
23345 default:
23346 break;
23347 }
23348 if (ret) {
23349 module_put(mod);
23350 bpf_log(log, "%s is not sleepable\n", tname);
23351 return ret;
23352 }
23353 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
23354 if (tgt_prog) {
23355 module_put(mod);
23356 bpf_log(log, "can't modify return codes of BPF programs\n");
23357 return -EINVAL;
23358 }
23359 ret = -EINVAL;
23360 if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
23361 !check_attach_modify_return(addr, tname))
23362 ret = 0;
23363 if (ret) {
23364 module_put(mod);
23365 bpf_log(log, "%s() is not modifiable\n", tname);
23366 return ret;
23367 }
23368 }
23369
23370 break;
23371 }
23372 tgt_info->tgt_addr = addr;
23373 tgt_info->tgt_name = tname;
23374 tgt_info->tgt_type = t;
23375 tgt_info->tgt_mod = mod;
23376 return 0;
23377 }
23378
BTF_SET_START(btf_id_deny)23379 BTF_SET_START(btf_id_deny)
23380 BTF_ID_UNUSED
23381 #ifdef CONFIG_SMP
23382 BTF_ID(func, migrate_disable)
23383 BTF_ID(func, migrate_enable)
23384 #endif
23385 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
23386 BTF_ID(func, rcu_read_unlock_strict)
23387 #endif
23388 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
23389 BTF_ID(func, preempt_count_add)
23390 BTF_ID(func, preempt_count_sub)
23391 #endif
23392 #ifdef CONFIG_PREEMPT_RCU
23393 BTF_ID(func, __rcu_read_lock)
23394 BTF_ID(func, __rcu_read_unlock)
23395 #endif
23396 BTF_SET_END(btf_id_deny)
23397
23398 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
23399 * Currently, we must manually list all __noreturn functions here. Once a more
23400 * robust solution is implemented, this workaround can be removed.
23401 */
23402 BTF_SET_START(noreturn_deny)
23403 #ifdef CONFIG_IA32_EMULATION
23404 BTF_ID(func, __ia32_sys_exit)
23405 BTF_ID(func, __ia32_sys_exit_group)
23406 #endif
23407 #ifdef CONFIG_KUNIT
23408 BTF_ID(func, __kunit_abort)
23409 BTF_ID(func, kunit_try_catch_throw)
23410 #endif
23411 #ifdef CONFIG_MODULES
23412 BTF_ID(func, __module_put_and_kthread_exit)
23413 #endif
23414 #ifdef CONFIG_X86_64
23415 BTF_ID(func, __x64_sys_exit)
23416 BTF_ID(func, __x64_sys_exit_group)
23417 #endif
23418 BTF_ID(func, do_exit)
23419 BTF_ID(func, do_group_exit)
23420 BTF_ID(func, kthread_complete_and_exit)
23421 BTF_ID(func, kthread_exit)
23422 BTF_ID(func, make_task_dead)
23423 BTF_SET_END(noreturn_deny)
23424
23425 static bool can_be_sleepable(struct bpf_prog *prog)
23426 {
23427 if (prog->type == BPF_PROG_TYPE_TRACING) {
23428 switch (prog->expected_attach_type) {
23429 case BPF_TRACE_FENTRY:
23430 case BPF_TRACE_FEXIT:
23431 case BPF_MODIFY_RETURN:
23432 case BPF_TRACE_ITER:
23433 return true;
23434 default:
23435 return false;
23436 }
23437 }
23438 return prog->type == BPF_PROG_TYPE_LSM ||
23439 prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
23440 prog->type == BPF_PROG_TYPE_STRUCT_OPS;
23441 }
23442
check_attach_btf_id(struct bpf_verifier_env * env)23443 static int check_attach_btf_id(struct bpf_verifier_env *env)
23444 {
23445 struct bpf_prog *prog = env->prog;
23446 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
23447 struct bpf_attach_target_info tgt_info = {};
23448 u32 btf_id = prog->aux->attach_btf_id;
23449 struct bpf_trampoline *tr;
23450 int ret;
23451 u64 key;
23452
23453 if (prog->type == BPF_PROG_TYPE_SYSCALL) {
23454 if (prog->sleepable)
23455 /* attach_btf_id checked to be zero already */
23456 return 0;
23457 verbose(env, "Syscall programs can only be sleepable\n");
23458 return -EINVAL;
23459 }
23460
23461 if (prog->sleepable && !can_be_sleepable(prog)) {
23462 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
23463 return -EINVAL;
23464 }
23465
23466 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
23467 return check_struct_ops_btf_id(env);
23468
23469 if (prog->type != BPF_PROG_TYPE_TRACING &&
23470 prog->type != BPF_PROG_TYPE_LSM &&
23471 prog->type != BPF_PROG_TYPE_EXT)
23472 return 0;
23473
23474 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
23475 if (ret)
23476 return ret;
23477
23478 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
23479 /* to make freplace equivalent to their targets, they need to
23480 * inherit env->ops and expected_attach_type for the rest of the
23481 * verification
23482 */
23483 env->ops = bpf_verifier_ops[tgt_prog->type];
23484 prog->expected_attach_type = tgt_prog->expected_attach_type;
23485 }
23486
23487 /* store info about the attachment target that will be used later */
23488 prog->aux->attach_func_proto = tgt_info.tgt_type;
23489 prog->aux->attach_func_name = tgt_info.tgt_name;
23490 prog->aux->mod = tgt_info.tgt_mod;
23491
23492 if (tgt_prog) {
23493 prog->aux->saved_dst_prog_type = tgt_prog->type;
23494 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
23495 }
23496
23497 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
23498 prog->aux->attach_btf_trace = true;
23499 return 0;
23500 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
23501 return bpf_iter_prog_supported(prog);
23502 }
23503
23504 if (prog->type == BPF_PROG_TYPE_LSM) {
23505 ret = bpf_lsm_verify_prog(&env->log, prog);
23506 if (ret < 0)
23507 return ret;
23508 } else if (prog->type == BPF_PROG_TYPE_TRACING &&
23509 btf_id_set_contains(&btf_id_deny, btf_id)) {
23510 return -EINVAL;
23511 } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
23512 prog->expected_attach_type == BPF_MODIFY_RETURN) &&
23513 btf_id_set_contains(&noreturn_deny, btf_id)) {
23514 verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n");
23515 return -EINVAL;
23516 }
23517
23518 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
23519 tr = bpf_trampoline_get(key, &tgt_info);
23520 if (!tr)
23521 return -ENOMEM;
23522
23523 if (tgt_prog && tgt_prog->aux->tail_call_reachable)
23524 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
23525
23526 prog->aux->dst_trampoline = tr;
23527 return 0;
23528 }
23529
bpf_get_btf_vmlinux(void)23530 struct btf *bpf_get_btf_vmlinux(void)
23531 {
23532 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
23533 mutex_lock(&bpf_verifier_lock);
23534 if (!btf_vmlinux)
23535 btf_vmlinux = btf_parse_vmlinux();
23536 mutex_unlock(&bpf_verifier_lock);
23537 }
23538 return btf_vmlinux;
23539 }
23540
23541 /*
23542 * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
23543 * this case expect that every file descriptor in the array is either a map or
23544 * a BTF. Everything else is considered to be trash.
23545 */
add_fd_from_fd_array(struct bpf_verifier_env * env,int fd)23546 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
23547 {
23548 struct bpf_map *map;
23549 struct btf *btf;
23550 CLASS(fd, f)(fd);
23551 int err;
23552
23553 map = __bpf_map_get(f);
23554 if (!IS_ERR(map)) {
23555 err = __add_used_map(env, map);
23556 if (err < 0)
23557 return err;
23558 return 0;
23559 }
23560
23561 btf = __btf_get_by_fd(f);
23562 if (!IS_ERR(btf)) {
23563 err = __add_used_btf(env, btf);
23564 if (err < 0)
23565 return err;
23566 return 0;
23567 }
23568
23569 verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
23570 return PTR_ERR(map);
23571 }
23572
process_fd_array(struct bpf_verifier_env * env,union bpf_attr * attr,bpfptr_t uattr)23573 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
23574 {
23575 size_t size = sizeof(int);
23576 int ret;
23577 int fd;
23578 u32 i;
23579
23580 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
23581
23582 /*
23583 * The only difference between old (no fd_array_cnt is given) and new
23584 * APIs is that in the latter case the fd_array is expected to be
23585 * continuous and is scanned for map fds right away
23586 */
23587 if (!attr->fd_array_cnt)
23588 return 0;
23589
23590 /* Check for integer overflow */
23591 if (attr->fd_array_cnt >= (U32_MAX / size)) {
23592 verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
23593 return -EINVAL;
23594 }
23595
23596 for (i = 0; i < attr->fd_array_cnt; i++) {
23597 if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
23598 return -EFAULT;
23599
23600 ret = add_fd_from_fd_array(env, fd);
23601 if (ret)
23602 return ret;
23603 }
23604
23605 return 0;
23606 }
23607
can_fallthrough(struct bpf_insn * insn)23608 static bool can_fallthrough(struct bpf_insn *insn)
23609 {
23610 u8 class = BPF_CLASS(insn->code);
23611 u8 opcode = BPF_OP(insn->code);
23612
23613 if (class != BPF_JMP && class != BPF_JMP32)
23614 return true;
23615
23616 if (opcode == BPF_EXIT || opcode == BPF_JA)
23617 return false;
23618
23619 return true;
23620 }
23621
can_jump(struct bpf_insn * insn)23622 static bool can_jump(struct bpf_insn *insn)
23623 {
23624 u8 class = BPF_CLASS(insn->code);
23625 u8 opcode = BPF_OP(insn->code);
23626
23627 if (class != BPF_JMP && class != BPF_JMP32)
23628 return false;
23629
23630 switch (opcode) {
23631 case BPF_JA:
23632 case BPF_JEQ:
23633 case BPF_JNE:
23634 case BPF_JLT:
23635 case BPF_JLE:
23636 case BPF_JGT:
23637 case BPF_JGE:
23638 case BPF_JSGT:
23639 case BPF_JSGE:
23640 case BPF_JSLT:
23641 case BPF_JSLE:
23642 case BPF_JCOND:
23643 return true;
23644 }
23645
23646 return false;
23647 }
23648
insn_successors(struct bpf_prog * prog,u32 idx,u32 succ[2])23649 static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
23650 {
23651 struct bpf_insn *insn = &prog->insnsi[idx];
23652 int i = 0, insn_sz;
23653 u32 dst;
23654
23655 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
23656 if (can_fallthrough(insn) && idx + 1 < prog->len)
23657 succ[i++] = idx + insn_sz;
23658
23659 if (can_jump(insn)) {
23660 dst = idx + jmp_offset(insn) + 1;
23661 if (i == 0 || succ[0] != dst)
23662 succ[i++] = dst;
23663 }
23664
23665 return i;
23666 }
23667
23668 /* Each field is a register bitmask */
23669 struct insn_live_regs {
23670 u16 use; /* registers read by instruction */
23671 u16 def; /* registers written by instruction */
23672 u16 in; /* registers that may be alive before instruction */
23673 u16 out; /* registers that may be alive after instruction */
23674 };
23675
23676 /* Bitmask with 1s for all caller saved registers */
23677 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
23678
23679 /* Compute info->{use,def} fields for the instruction */
compute_insn_live_regs(struct bpf_verifier_env * env,struct bpf_insn * insn,struct insn_live_regs * info)23680 static void compute_insn_live_regs(struct bpf_verifier_env *env,
23681 struct bpf_insn *insn,
23682 struct insn_live_regs *info)
23683 {
23684 struct call_summary cs;
23685 u8 class = BPF_CLASS(insn->code);
23686 u8 code = BPF_OP(insn->code);
23687 u8 mode = BPF_MODE(insn->code);
23688 u16 src = BIT(insn->src_reg);
23689 u16 dst = BIT(insn->dst_reg);
23690 u16 r0 = BIT(0);
23691 u16 def = 0;
23692 u16 use = 0xffff;
23693
23694 switch (class) {
23695 case BPF_LD:
23696 switch (mode) {
23697 case BPF_IMM:
23698 if (BPF_SIZE(insn->code) == BPF_DW) {
23699 def = dst;
23700 use = 0;
23701 }
23702 break;
23703 case BPF_LD | BPF_ABS:
23704 case BPF_LD | BPF_IND:
23705 /* stick with defaults */
23706 break;
23707 }
23708 break;
23709 case BPF_LDX:
23710 switch (mode) {
23711 case BPF_MEM:
23712 case BPF_MEMSX:
23713 def = dst;
23714 use = src;
23715 break;
23716 }
23717 break;
23718 case BPF_ST:
23719 switch (mode) {
23720 case BPF_MEM:
23721 def = 0;
23722 use = dst;
23723 break;
23724 }
23725 break;
23726 case BPF_STX:
23727 switch (mode) {
23728 case BPF_MEM:
23729 def = 0;
23730 use = dst | src;
23731 break;
23732 case BPF_ATOMIC:
23733 switch (insn->imm) {
23734 case BPF_CMPXCHG:
23735 use = r0 | dst | src;
23736 def = r0;
23737 break;
23738 case BPF_LOAD_ACQ:
23739 def = dst;
23740 use = src;
23741 break;
23742 case BPF_STORE_REL:
23743 def = 0;
23744 use = dst | src;
23745 break;
23746 default:
23747 use = dst | src;
23748 if (insn->imm & BPF_FETCH)
23749 def = src;
23750 else
23751 def = 0;
23752 }
23753 break;
23754 }
23755 break;
23756 case BPF_ALU:
23757 case BPF_ALU64:
23758 switch (code) {
23759 case BPF_END:
23760 use = dst;
23761 def = dst;
23762 break;
23763 case BPF_MOV:
23764 def = dst;
23765 if (BPF_SRC(insn->code) == BPF_K)
23766 use = 0;
23767 else
23768 use = src;
23769 break;
23770 default:
23771 def = dst;
23772 if (BPF_SRC(insn->code) == BPF_K)
23773 use = dst;
23774 else
23775 use = dst | src;
23776 }
23777 break;
23778 case BPF_JMP:
23779 case BPF_JMP32:
23780 switch (code) {
23781 case BPF_JA:
23782 case BPF_JCOND:
23783 def = 0;
23784 use = 0;
23785 break;
23786 case BPF_EXIT:
23787 def = 0;
23788 use = r0;
23789 break;
23790 case BPF_CALL:
23791 def = ALL_CALLER_SAVED_REGS;
23792 use = def & ~BIT(BPF_REG_0);
23793 if (get_call_summary(env, insn, &cs))
23794 use = GENMASK(cs.num_params, 1);
23795 break;
23796 default:
23797 def = 0;
23798 if (BPF_SRC(insn->code) == BPF_K)
23799 use = dst;
23800 else
23801 use = dst | src;
23802 }
23803 break;
23804 }
23805
23806 info->def = def;
23807 info->use = use;
23808 }
23809
23810 /* Compute may-live registers after each instruction in the program.
23811 * The register is live after the instruction I if it is read by some
23812 * instruction S following I during program execution and is not
23813 * overwritten between I and S.
23814 *
23815 * Store result in env->insn_aux_data[i].live_regs.
23816 */
compute_live_registers(struct bpf_verifier_env * env)23817 static int compute_live_registers(struct bpf_verifier_env *env)
23818 {
23819 struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
23820 struct bpf_insn *insns = env->prog->insnsi;
23821 struct insn_live_regs *state;
23822 int insn_cnt = env->prog->len;
23823 int err = 0, i, j;
23824 bool changed;
23825
23826 /* Use the following algorithm:
23827 * - define the following:
23828 * - I.use : a set of all registers read by instruction I;
23829 * - I.def : a set of all registers written by instruction I;
23830 * - I.in : a set of all registers that may be alive before I execution;
23831 * - I.out : a set of all registers that may be alive after I execution;
23832 * - insn_successors(I): a set of instructions S that might immediately
23833 * follow I for some program execution;
23834 * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
23835 * - visit each instruction in a postorder and update
23836 * state[i].in, state[i].out as follows:
23837 *
23838 * state[i].out = U [state[s].in for S in insn_successors(i)]
23839 * state[i].in = (state[i].out / state[i].def) U state[i].use
23840 *
23841 * (where U stands for set union, / stands for set difference)
23842 * - repeat the computation while {in,out} fields changes for
23843 * any instruction.
23844 */
23845 state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL);
23846 if (!state) {
23847 err = -ENOMEM;
23848 goto out;
23849 }
23850
23851 for (i = 0; i < insn_cnt; ++i)
23852 compute_insn_live_regs(env, &insns[i], &state[i]);
23853
23854 changed = true;
23855 while (changed) {
23856 changed = false;
23857 for (i = 0; i < env->cfg.cur_postorder; ++i) {
23858 int insn_idx = env->cfg.insn_postorder[i];
23859 struct insn_live_regs *live = &state[insn_idx];
23860 int succ_num;
23861 u32 succ[2];
23862 u16 new_out = 0;
23863 u16 new_in = 0;
23864
23865 succ_num = insn_successors(env->prog, insn_idx, succ);
23866 for (int s = 0; s < succ_num; ++s)
23867 new_out |= state[succ[s]].in;
23868 new_in = (new_out & ~live->def) | live->use;
23869 if (new_out != live->out || new_in != live->in) {
23870 live->in = new_in;
23871 live->out = new_out;
23872 changed = true;
23873 }
23874 }
23875 }
23876
23877 for (i = 0; i < insn_cnt; ++i)
23878 insn_aux[i].live_regs_before = state[i].in;
23879
23880 if (env->log.level & BPF_LOG_LEVEL2) {
23881 verbose(env, "Live regs before insn:\n");
23882 for (i = 0; i < insn_cnt; ++i) {
23883 verbose(env, "%3d: ", i);
23884 for (j = BPF_REG_0; j < BPF_REG_10; ++j)
23885 if (insn_aux[i].live_regs_before & BIT(j))
23886 verbose(env, "%d", j);
23887 else
23888 verbose(env, ".");
23889 verbose(env, " ");
23890 verbose_insn(env, &insns[i]);
23891 if (bpf_is_ldimm64(&insns[i]))
23892 i++;
23893 }
23894 }
23895
23896 out:
23897 kvfree(state);
23898 kvfree(env->cfg.insn_postorder);
23899 env->cfg.insn_postorder = NULL;
23900 env->cfg.cur_postorder = 0;
23901 return err;
23902 }
23903
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,bpfptr_t uattr,__u32 uattr_size)23904 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
23905 {
23906 u64 start_time = ktime_get_ns();
23907 struct bpf_verifier_env *env;
23908 int i, len, ret = -EINVAL, err;
23909 u32 log_true_size;
23910 bool is_priv;
23911
23912 /* no program is valid */
23913 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
23914 return -EINVAL;
23915
23916 /* 'struct bpf_verifier_env' can be global, but since it's not small,
23917 * allocate/free it every time bpf_check() is called
23918 */
23919 env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
23920 if (!env)
23921 return -ENOMEM;
23922
23923 env->bt.env = env;
23924
23925 len = (*prog)->len;
23926 env->insn_aux_data =
23927 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
23928 ret = -ENOMEM;
23929 if (!env->insn_aux_data)
23930 goto err_free_env;
23931 for (i = 0; i < len; i++)
23932 env->insn_aux_data[i].orig_idx = i;
23933 env->prog = *prog;
23934 env->ops = bpf_verifier_ops[env->prog->type];
23935
23936 env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
23937 env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
23938 env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
23939 env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
23940 env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
23941
23942 bpf_get_btf_vmlinux();
23943
23944 /* grab the mutex to protect few globals used by verifier */
23945 if (!is_priv)
23946 mutex_lock(&bpf_verifier_lock);
23947
23948 /* user could have requested verbose verifier output
23949 * and supplied buffer to store the verification trace
23950 */
23951 ret = bpf_vlog_init(&env->log, attr->log_level,
23952 (char __user *) (unsigned long) attr->log_buf,
23953 attr->log_size);
23954 if (ret)
23955 goto err_unlock;
23956
23957 ret = process_fd_array(env, attr, uattr);
23958 if (ret)
23959 goto skip_full_check;
23960
23961 mark_verifier_state_clean(env);
23962
23963 if (IS_ERR(btf_vmlinux)) {
23964 /* Either gcc or pahole or kernel are broken. */
23965 verbose(env, "in-kernel BTF is malformed\n");
23966 ret = PTR_ERR(btf_vmlinux);
23967 goto skip_full_check;
23968 }
23969
23970 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
23971 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
23972 env->strict_alignment = true;
23973 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
23974 env->strict_alignment = false;
23975
23976 if (is_priv)
23977 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
23978 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
23979
23980 env->explored_states = kvcalloc(state_htab_size(env),
23981 sizeof(struct list_head),
23982 GFP_USER);
23983 ret = -ENOMEM;
23984 if (!env->explored_states)
23985 goto skip_full_check;
23986
23987 for (i = 0; i < state_htab_size(env); i++)
23988 INIT_LIST_HEAD(&env->explored_states[i]);
23989 INIT_LIST_HEAD(&env->free_list);
23990
23991 ret = check_btf_info_early(env, attr, uattr);
23992 if (ret < 0)
23993 goto skip_full_check;
23994
23995 ret = add_subprog_and_kfunc(env);
23996 if (ret < 0)
23997 goto skip_full_check;
23998
23999 ret = check_subprogs(env);
24000 if (ret < 0)
24001 goto skip_full_check;
24002
24003 ret = check_btf_info(env, attr, uattr);
24004 if (ret < 0)
24005 goto skip_full_check;
24006
24007 ret = resolve_pseudo_ldimm64(env);
24008 if (ret < 0)
24009 goto skip_full_check;
24010
24011 if (bpf_prog_is_offloaded(env->prog->aux)) {
24012 ret = bpf_prog_offload_verifier_prep(env->prog);
24013 if (ret)
24014 goto skip_full_check;
24015 }
24016
24017 ret = check_cfg(env);
24018 if (ret < 0)
24019 goto skip_full_check;
24020
24021 ret = check_attach_btf_id(env);
24022 if (ret)
24023 goto skip_full_check;
24024
24025 ret = compute_live_registers(env);
24026 if (ret < 0)
24027 goto skip_full_check;
24028
24029 ret = mark_fastcall_patterns(env);
24030 if (ret < 0)
24031 goto skip_full_check;
24032
24033 ret = do_check_main(env);
24034 ret = ret ?: do_check_subprogs(env);
24035
24036 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
24037 ret = bpf_prog_offload_finalize(env);
24038
24039 skip_full_check:
24040 kvfree(env->explored_states);
24041
24042 /* might decrease stack depth, keep it before passes that
24043 * allocate additional slots.
24044 */
24045 if (ret == 0)
24046 ret = remove_fastcall_spills_fills(env);
24047
24048 if (ret == 0)
24049 ret = check_max_stack_depth(env);
24050
24051 /* instruction rewrites happen after this point */
24052 if (ret == 0)
24053 ret = optimize_bpf_loop(env);
24054
24055 if (is_priv) {
24056 if (ret == 0)
24057 opt_hard_wire_dead_code_branches(env);
24058 if (ret == 0)
24059 ret = opt_remove_dead_code(env);
24060 if (ret == 0)
24061 ret = opt_remove_nops(env);
24062 } else {
24063 if (ret == 0)
24064 sanitize_dead_code(env);
24065 }
24066
24067 if (ret == 0)
24068 /* program is valid, convert *(u32*)(ctx + off) accesses */
24069 ret = convert_ctx_accesses(env);
24070
24071 if (ret == 0)
24072 ret = do_misc_fixups(env);
24073
24074 /* do 32-bit optimization after insn patching has done so those patched
24075 * insns could be handled correctly.
24076 */
24077 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
24078 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
24079 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
24080 : false;
24081 }
24082
24083 if (ret == 0)
24084 ret = fixup_call_args(env);
24085
24086 env->verification_time = ktime_get_ns() - start_time;
24087 print_verification_stats(env);
24088 env->prog->aux->verified_insns = env->insn_processed;
24089
24090 /* preserve original error even if log finalization is successful */
24091 err = bpf_vlog_finalize(&env->log, &log_true_size);
24092 if (err)
24093 ret = err;
24094
24095 if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
24096 copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
24097 &log_true_size, sizeof(log_true_size))) {
24098 ret = -EFAULT;
24099 goto err_release_maps;
24100 }
24101
24102 if (ret)
24103 goto err_release_maps;
24104
24105 if (env->used_map_cnt) {
24106 /* if program passed verifier, update used_maps in bpf_prog_info */
24107 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
24108 sizeof(env->used_maps[0]),
24109 GFP_KERNEL);
24110
24111 if (!env->prog->aux->used_maps) {
24112 ret = -ENOMEM;
24113 goto err_release_maps;
24114 }
24115
24116 memcpy(env->prog->aux->used_maps, env->used_maps,
24117 sizeof(env->used_maps[0]) * env->used_map_cnt);
24118 env->prog->aux->used_map_cnt = env->used_map_cnt;
24119 }
24120 if (env->used_btf_cnt) {
24121 /* if program passed verifier, update used_btfs in bpf_prog_aux */
24122 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
24123 sizeof(env->used_btfs[0]),
24124 GFP_KERNEL);
24125 if (!env->prog->aux->used_btfs) {
24126 ret = -ENOMEM;
24127 goto err_release_maps;
24128 }
24129
24130 memcpy(env->prog->aux->used_btfs, env->used_btfs,
24131 sizeof(env->used_btfs[0]) * env->used_btf_cnt);
24132 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
24133 }
24134 if (env->used_map_cnt || env->used_btf_cnt) {
24135 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
24136 * bpf_ld_imm64 instructions
24137 */
24138 convert_pseudo_ld_imm64(env);
24139 }
24140
24141 adjust_btf_func(env);
24142
24143 err_release_maps:
24144 if (!env->prog->aux->used_maps)
24145 /* if we didn't copy map pointers into bpf_prog_info, release
24146 * them now. Otherwise free_used_maps() will release them.
24147 */
24148 release_maps(env);
24149 if (!env->prog->aux->used_btfs)
24150 release_btfs(env);
24151
24152 /* extension progs temporarily inherit the attach_type of their targets
24153 for verification purposes, so set it back to zero before returning
24154 */
24155 if (env->prog->type == BPF_PROG_TYPE_EXT)
24156 env->prog->expected_attach_type = 0;
24157
24158 *prog = env->prog;
24159
24160 module_put(env->attach_btf_mod);
24161 err_unlock:
24162 if (!is_priv)
24163 mutex_unlock(&bpf_verifier_lock);
24164 vfree(env->insn_aux_data);
24165 kvfree(env->insn_hist);
24166 err_free_env:
24167 kvfree(env->cfg.insn_postorder);
24168 kvfree(env);
24169 return ret;
24170 }
24171