xref: /linux/kernel/bpf/verifier.c (revision 6c3e8a4d476521bc33362e90b2569548f1adb7a4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/cnum.h>
30 #include <linux/bpf_mem_alloc.h>
31 #include <net/xdp.h>
32 #include <linux/trace_events.h>
33 #include <linux/kallsyms.h>
34 
35 #include "disasm.h"
36 
37 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
38 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
39 	[_id] = & _name ## _verifier_ops,
40 #define BPF_MAP_TYPE(_id, _ops)
41 #define BPF_LINK_TYPE(_id, _name)
42 #include <linux/bpf_types.h>
43 #undef BPF_PROG_TYPE
44 #undef BPF_MAP_TYPE
45 #undef BPF_LINK_TYPE
46 };
47 
48 enum bpf_features {
49 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
50 	BPF_FEAT_STREAMS	     = 1,
51 	__MAX_BPF_FEAT,
52 };
53 
54 struct bpf_mem_alloc bpf_global_percpu_ma;
55 static bool bpf_global_percpu_ma_set;
56 
57 /* bpf_check() is a static code analyzer that walks eBPF program
58  * instruction by instruction and updates register/stack state.
59  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
60  *
61  * The first pass is depth-first-search to check that the program is a DAG.
62  * It rejects the following programs:
63  * - larger than BPF_MAXINSNS insns
64  * - if loop is present (detected via back-edge)
65  * - unreachable insns exist (shouldn't be a forest. program = one function)
66  * - out of bounds or malformed jumps
67  * The second pass is all possible path descent from the 1st insn.
68  * Since it's analyzing all paths through the program, the length of the
69  * analysis is limited to 64k insn, which may be hit even if total number of
70  * insn is less then 4K, but there are too many branches that change stack/regs.
71  * Number of 'branches to be analyzed' is limited to 1k
72  *
73  * On entry to each instruction, each register has a type, and the instruction
74  * changes the types of the registers depending on instruction semantics.
75  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
76  * copied to R1.
77  *
78  * All registers are 64-bit.
79  * R0 - return register
80  * R1-R5 argument passing registers
81  * R6-R9 callee saved registers
82  * R10 - frame pointer read-only
83  *
84  * At the start of BPF program the register R1 contains a pointer to bpf_context
85  * and has type PTR_TO_CTX.
86  *
87  * Verifier tracks arithmetic operations on pointers in case:
88  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
89  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
90  * 1st insn copies R10 (which has FRAME_PTR) type into R1
91  * and 2nd arithmetic instruction is pattern matched to recognize
92  * that it wants to construct a pointer to some element within stack.
93  * So after 2nd insn, the register R1 has type PTR_TO_STACK
94  * (and -20 constant is saved for further stack bounds checking).
95  * Meaning that this reg is a pointer to stack plus known immediate constant.
96  *
97  * Most of the time the registers have SCALAR_VALUE type, which
98  * means the register has some value, but it's not a valid pointer.
99  * (like pointer plus pointer becomes SCALAR_VALUE type)
100  *
101  * When verifier sees load or store instructions the type of base register
102  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
103  * four pointer types recognized by check_mem_access() function.
104  *
105  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
106  * and the range of [ptr, ptr + map's value_size) is accessible.
107  *
108  * registers used to pass values to function calls are checked against
109  * function argument constraints.
110  *
111  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
112  * It means that the register type passed to this function must be
113  * PTR_TO_STACK and it will be used inside the function as
114  * 'pointer to map element key'
115  *
116  * For example the argument constraints for bpf_map_lookup_elem():
117  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
118  *   .arg1_type = ARG_CONST_MAP_PTR,
119  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
120  *
121  * ret_type says that this function returns 'pointer to map elem value or null'
122  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
123  * 2nd argument should be a pointer to stack, which will be used inside
124  * the helper function as a pointer to map element key.
125  *
126  * On the kernel side the helper function looks like:
127  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
128  * {
129  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
130  *    void *key = (void *) (unsigned long) r2;
131  *    void *value;
132  *
133  *    here kernel can access 'key' and 'map' pointers safely, knowing that
134  *    [key, key + map->key_size) bytes are valid and were initialized on
135  *    the stack of eBPF program.
136  * }
137  *
138  * Corresponding eBPF program may look like:
139  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
140  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
141  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
142  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
143  * here verifier looks at prototype of map_lookup_elem() and sees:
144  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
145  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
146  *
147  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
148  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
149  * and were initialized prior to this call.
150  * If it's ok, then verifier allows this BPF_CALL insn and looks at
151  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
152  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
153  * returns either pointer to map value or NULL.
154  *
155  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
156  * insn, the register holding that pointer in the true branch changes state to
157  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
158  * branch. See check_cond_jmp_op().
159  *
160  * After the call R0 is set to return type of the function and registers R1-R5
161  * are set to NOT_INIT to indicate that they are no longer readable.
162  *
163  * The following reference types represent a potential reference to a kernel
164  * resource which, after first being allocated, must be checked and freed by
165  * the BPF program:
166  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
167  *
168  * When the verifier sees a helper call return a reference type, it allocates a
169  * pointer id for the reference and stores it in the current function state.
170  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
171  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
172  * passes through a NULL-check conditional. For the branch wherein the state is
173  * changed to CONST_IMM, the verifier releases the reference.
174  *
175  * For each helper function that allocates a reference, such as
176  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
177  * bpf_sk_release(). When a reference type passes into the release function,
178  * the verifier also releases the reference. If any unchecked or unreleased
179  * reference remains at the end of the program, the verifier rejects it.
180  */
181 
182 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
183 struct bpf_verifier_stack_elem {
184 	/* verifier state is 'st'
185 	 * before processing instruction 'insn_idx'
186 	 * and after processing instruction 'prev_insn_idx'
187 	 */
188 	struct bpf_verifier_state st;
189 	int insn_idx;
190 	int prev_insn_idx;
191 	struct bpf_verifier_stack_elem *next;
192 	/* length of verifier log at the time this state was pushed on stack */
193 	u32 log_pos;
194 };
195 
196 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
197 #define BPF_COMPLEXITY_LIMIT_STATES	64
198 
199 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
200 
201 #define BPF_PRIV_STACK_MIN_SIZE		64
202 
203 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id);
204 static int release_reference_nomark(struct bpf_verifier_state *state, int id);
205 static int release_reference(struct bpf_verifier_env *env, int id);
206 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
207 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
208 static int ref_set_non_owning(struct bpf_verifier_env *env,
209 			      struct bpf_reg_state *reg);
210 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg);
211 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
212 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
213 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
214 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
215 
216 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
217 			      struct bpf_map *map,
218 			      bool unpriv, bool poison)
219 {
220 	unpriv |= bpf_map_ptr_unpriv(aux);
221 	aux->map_ptr_state.unpriv = unpriv;
222 	aux->map_ptr_state.poison = poison;
223 	aux->map_ptr_state.map_ptr = map;
224 }
225 
226 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
227 {
228 	bool poisoned = bpf_map_key_poisoned(aux);
229 
230 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
231 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
232 }
233 
234 static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg)
235 {
236 	ref_obj->id = reg->id;
237 	ref_obj->parent_id = reg->parent_id;
238 	ref_obj->cnt++;
239 }
240 
241 static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj)
242 {
243 	if (ref_obj->cnt > 1) {
244 		verifier_bug(env, "function expects only one referenced object but got %d\n",
245 			     ref_obj->cnt);
246 		return -EFAULT;
247 	}
248 
249 	return 0;
250 }
251 
252 struct bpf_call_arg_meta {
253 	struct bpf_map_desc map;
254 	struct bpf_dynptr_desc dynptr;
255 	struct ref_obj_desc ref_obj;
256 	bool raw_mode;
257 	bool pkt_access;
258 	u8 release_regno;
259 	int regno;
260 	int access_size;
261 	int mem_size;
262 	u64 msize_max_value;
263 	int func_id;
264 	struct btf *btf;
265 	u32 btf_id;
266 	struct btf *ret_btf;
267 	u32 ret_btf_id;
268 	u32 subprogno;
269 	struct btf_field *kptr_field;
270 	s64 const_map_key;
271 };
272 
273 struct bpf_kfunc_meta {
274 	struct btf *btf;
275 	const struct btf_type *proto;
276 	const char *name;
277 	const u32 *flags;
278 	s32 id;
279 };
280 
281 struct btf *btf_vmlinux;
282 
283 typedef struct argno {
284 	int argno;
285 } argno_t;
286 
287 static argno_t argno_from_reg(u32 regno)
288 {
289 	return (argno_t){ .argno = regno };
290 }
291 
292 static argno_t argno_from_arg(u32 arg)
293 {
294 	return (argno_t){ .argno = -arg };
295 }
296 
297 static int reg_from_argno(argno_t a)
298 {
299 	if (a.argno >= 0)
300 		return a.argno;
301 	if (a.argno >= -MAX_BPF_FUNC_REG_ARGS)
302 		return -a.argno;
303 	return -1;
304 }
305 
306 static int arg_from_argno(argno_t a)
307 {
308 	if (a.argno < 0)
309 		return -a.argno;
310 	return -1;
311 }
312 
313 static int arg_idx_from_argno(argno_t a)
314 {
315 	return arg_from_argno(a) - 1;
316 }
317 
318 static const char *btf_type_name(const struct btf *btf, u32 id)
319 {
320 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
321 }
322 
323 static DEFINE_MUTEX(bpf_verifier_lock);
324 static DEFINE_MUTEX(bpf_percpu_ma_lock);
325 
326 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
327 {
328 	struct bpf_verifier_env *env = private_data;
329 	va_list args;
330 
331 	if (!bpf_verifier_log_needed(&env->log))
332 		return;
333 
334 	va_start(args, fmt);
335 	bpf_verifier_vlog(&env->log, fmt, args);
336 	va_end(args);
337 }
338 
339 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
340 				   struct bpf_reg_state *reg,
341 				   struct bpf_retval_range range, const char *ctx,
342 				   const char *reg_name)
343 {
344 	bool unknown = true;
345 
346 	verbose(env, "%s the register %s has", ctx, reg_name);
347 	if (reg_smin(reg) > S64_MIN) {
348 		verbose(env, " smin=%lld", reg_smin(reg));
349 		unknown = false;
350 	}
351 	if (reg_smax(reg) < S64_MAX) {
352 		verbose(env, " smax=%lld", reg_smax(reg));
353 		unknown = false;
354 	}
355 	if (unknown)
356 		verbose(env, " unknown scalar value");
357 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
358 }
359 
360 static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
361 {
362 	enum bpf_reg_type type;
363 
364 	type = reg->type;
365 	if (type_may_be_null(type))
366 		return false;
367 
368 	type = base_type(type);
369 	return type == PTR_TO_SOCKET ||
370 		type == PTR_TO_TCP_SOCK ||
371 		type == PTR_TO_MAP_VALUE ||
372 		type == PTR_TO_MAP_KEY ||
373 		type == PTR_TO_SOCK_COMMON ||
374 		(type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) ||
375 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
376 		type == CONST_PTR_TO_MAP;
377 }
378 
379 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
380 {
381 	struct btf_record *rec = NULL;
382 	struct btf_struct_meta *meta;
383 
384 	if (reg->type == PTR_TO_MAP_VALUE) {
385 		rec = reg->map_ptr->record;
386 	} else if (type_is_ptr_alloc_obj(reg->type)) {
387 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
388 		if (meta)
389 			rec = meta->record;
390 	}
391 	return rec;
392 }
393 
394 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
395 {
396 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
397 
398 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
399 }
400 
401 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
402 {
403 	const struct btf_type *type, *func, *func_proto;
404 	const struct btf *btf = env->prog->aux->btf;
405 	u32 btf_id;
406 
407 	btf_id = env->prog->aux->func_info[subprog].type_id;
408 
409 	func = btf_type_by_id(btf, btf_id);
410 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
411 		return false;
412 
413 	func_proto = btf_type_by_id(btf, func->type);
414 	if (!func_proto)
415 		return false;
416 
417 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
418 	if (!type)
419 		return false;
420 
421 	return btf_type_is_void(type);
422 }
423 
424 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
425 {
426 	struct bpf_func_info *info;
427 
428 	if (!env->prog->aux->func_info)
429 		return "";
430 
431 	info = &env->prog->aux->func_info[subprog];
432 	return btf_type_name(env->prog->aux->btf, info->type_id);
433 }
434 
435 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
436 {
437 	struct bpf_subprog_info *info = subprog_info(env, subprog);
438 
439 	info->is_cb = true;
440 	info->is_async_cb = true;
441 	info->is_exception_cb = true;
442 }
443 
444 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
445 {
446 	return subprog_info(env, subprog)->is_exception_cb;
447 }
448 
449 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
450 {
451 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
452 }
453 
454 static bool type_is_rdonly_mem(u32 type)
455 {
456 	return type & MEM_RDONLY;
457 }
458 
459 static bool is_acquire_function(enum bpf_func_id func_id,
460 				const struct bpf_map *map)
461 {
462 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
463 
464 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
465 	    func_id == BPF_FUNC_sk_lookup_udp ||
466 	    func_id == BPF_FUNC_skc_lookup_tcp ||
467 	    func_id == BPF_FUNC_ringbuf_reserve ||
468 	    func_id == BPF_FUNC_kptr_xchg)
469 		return true;
470 
471 	if (func_id == BPF_FUNC_map_lookup_elem &&
472 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
473 	     map_type == BPF_MAP_TYPE_SOCKHASH))
474 		return true;
475 
476 	return false;
477 }
478 
479 static bool is_ptr_cast_function(enum bpf_func_id func_id)
480 {
481 	return func_id == BPF_FUNC_tcp_sock ||
482 		func_id == BPF_FUNC_sk_fullsock ||
483 		func_id == BPF_FUNC_skc_to_tcp_sock ||
484 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
485 		func_id == BPF_FUNC_skc_to_udp6_sock ||
486 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
487 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
488 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
489 }
490 
491 static bool is_sync_callback_calling_kfunc(u32 btf_id);
492 static bool is_async_callback_calling_kfunc(u32 btf_id);
493 static bool is_callback_calling_kfunc(u32 btf_id);
494 
495 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
496 static bool is_task_work_add_kfunc(u32 func_id);
497 
498 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
499 {
500 	return func_id == BPF_FUNC_for_each_map_elem ||
501 	       func_id == BPF_FUNC_find_vma ||
502 	       func_id == BPF_FUNC_loop ||
503 	       func_id == BPF_FUNC_user_ringbuf_drain;
504 }
505 
506 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
507 {
508 	return func_id == BPF_FUNC_timer_set_callback;
509 }
510 
511 static bool is_callback_calling_function(enum bpf_func_id func_id)
512 {
513 	return is_sync_callback_calling_function(func_id) ||
514 	       is_async_callback_calling_function(func_id);
515 }
516 
517 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
518 {
519 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
520 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
521 }
522 
523 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
524 {
525 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
526 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
527 }
528 
529 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
530 {
531 	/* bpf_timer callbacks are never sleepable. */
532 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
533 		return false;
534 
535 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
536 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
537 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
538 		return true;
539 
540 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
541 	return false;
542 }
543 
544 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
545 {
546 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
547 }
548 
549 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
550 {
551        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
552 
553        /* We need to check that slots between [spi - nr_slots + 1, spi] are
554 	* within [0, allocated_stack).
555 	*
556 	* Please note that the spi grows downwards. For example, a dynptr
557 	* takes the size of two stack slots; the first slot will be at
558 	* spi and the second slot will be at spi - 1.
559 	*/
560        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
561 }
562 
563 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
564 			          const char *obj_kind, int nr_slots)
565 {
566 	int off, spi;
567 
568 	if (!tnum_is_const(reg->var_off)) {
569 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
570 		return -EINVAL;
571 	}
572 
573 	off = reg->var_off.value;
574 	if (off % BPF_REG_SIZE) {
575 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
576 		return -EINVAL;
577 	}
578 
579 	spi = bpf_get_spi(off);
580 	if (spi + 1 < nr_slots) {
581 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
582 		return -EINVAL;
583 	}
584 
585 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
586 		return -ERANGE;
587 	return spi;
588 }
589 
590 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
591 {
592 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
593 }
594 
595 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
596 {
597 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
598 }
599 
600 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
601 {
602 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
603 }
604 
605 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
606 {
607 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
608 	case DYNPTR_TYPE_LOCAL:
609 		return BPF_DYNPTR_TYPE_LOCAL;
610 	case DYNPTR_TYPE_RINGBUF:
611 		return BPF_DYNPTR_TYPE_RINGBUF;
612 	case DYNPTR_TYPE_SKB:
613 		return BPF_DYNPTR_TYPE_SKB;
614 	case DYNPTR_TYPE_XDP:
615 		return BPF_DYNPTR_TYPE_XDP;
616 	case DYNPTR_TYPE_SKB_META:
617 		return BPF_DYNPTR_TYPE_SKB_META;
618 	case DYNPTR_TYPE_FILE:
619 		return BPF_DYNPTR_TYPE_FILE;
620 	default:
621 		return BPF_DYNPTR_TYPE_INVALID;
622 	}
623 }
624 
625 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
626 {
627 	switch (type) {
628 	case BPF_DYNPTR_TYPE_LOCAL:
629 		return DYNPTR_TYPE_LOCAL;
630 	case BPF_DYNPTR_TYPE_RINGBUF:
631 		return DYNPTR_TYPE_RINGBUF;
632 	case BPF_DYNPTR_TYPE_SKB:
633 		return DYNPTR_TYPE_SKB;
634 	case BPF_DYNPTR_TYPE_XDP:
635 		return DYNPTR_TYPE_XDP;
636 	case BPF_DYNPTR_TYPE_SKB_META:
637 		return DYNPTR_TYPE_SKB_META;
638 	case BPF_DYNPTR_TYPE_FILE:
639 		return DYNPTR_TYPE_FILE;
640 	default:
641 		return 0;
642 	}
643 }
644 
645 static bool dynptr_type_referenced(enum bpf_dynptr_type type)
646 {
647 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
648 }
649 
650 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
651 			      enum bpf_dynptr_type type,
652 			      bool first_slot, int id, int parent_id);
653 
654 
655 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
656 				   struct bpf_reg_state *sreg1,
657 				   struct bpf_reg_state *sreg2,
658 				   enum bpf_dynptr_type type, int parent_id)
659 {
660 	int id = ++env->id_gen;
661 
662 	__mark_dynptr_reg(sreg1, type, true, id, parent_id);
663 	__mark_dynptr_reg(sreg2, type, false, id, parent_id);
664 }
665 
666 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
667 			       struct bpf_reg_state *reg,
668 			       enum bpf_dynptr_type type)
669 {
670 	__mark_dynptr_reg(reg, type, true, ++env->id_gen, 0);
671 }
672 
673 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
674 				        struct bpf_func_state *state, int spi);
675 
676 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
677 				   enum bpf_arg_type arg_type, int insn_idx,
678 				   struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
679 {
680 	struct bpf_func_state *state = bpf_func(env, reg);
681 	int spi, i, err, parent_id = 0;
682 	enum bpf_dynptr_type type;
683 
684 	spi = dynptr_get_spi(env, reg);
685 	if (spi < 0)
686 		return spi;
687 
688 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
689 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
690 	 * to ensure that for the following example:
691 	 *	[d1][d1][d2][d2]
692 	 * spi    3   2   1   0
693 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
694 	 * case they do belong to same dynptr, second call won't see slot_type
695 	 * as STACK_DYNPTR and will simply skip destruction.
696 	 */
697 	err = destroy_if_dynptr_stack_slot(env, state, spi);
698 	if (err)
699 		return err;
700 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
701 	if (err)
702 		return err;
703 
704 	for (i = 0; i < BPF_REG_SIZE; i++) {
705 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
706 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
707 	}
708 
709 	type = arg_to_dynptr_type(arg_type);
710 	if (type == BPF_DYNPTR_TYPE_INVALID)
711 		return -EINVAL;
712 
713 	if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */
714 		err = validate_ref_obj(env, ref_obj);
715 		if (err)
716 			return err;
717 
718 		/* Track parent's id if the parent is a referenced object */
719 		parent_id = ref_obj->id;
720 
721 		if (dynptr_type_referenced(type)) {
722 			int id;
723 
724 			/*
725 			 * Create an intermediate reference that tracks the referenced
726 			 * object for the referenced dynptr. Freeing a referenced dynptr
727 			 * through helpers/kfuncs will invalidate all clones.
728 			 */
729 			id = acquire_reference(env, insn_idx, parent_id);
730 			if (id < 0)
731 				return id;
732 
733 			parent_id = id;
734 		}
735 	} else { /* bpf_dynptr_clone() */
736 		parent_id = dynptr->parent_id;
737 	}
738 
739 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
740 			       &state->stack[spi - 1].spilled_ptr, type, parent_id);
741 
742 	return 0;
743 }
744 
745 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack)
746 {
747 	int i;
748 
749 	for (i = 0; i < BPF_REG_SIZE; i++) {
750 		stack[0].slot_type[i] = STACK_INVALID;
751 		stack[1].slot_type[i] = STACK_INVALID;
752 	}
753 
754 	bpf_mark_reg_not_init(env, &stack[0].spilled_ptr);
755 	bpf_mark_reg_not_init(env, &stack[1].spilled_ptr);
756 }
757 
758 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
759 {
760 	struct bpf_func_state *state = bpf_func(env, reg);
761 	int spi;
762 
763 	spi = dynptr_get_spi(env, reg);
764 	if (spi < 0)
765 		return spi;
766 
767 	/*
768 	 * For referenced dynptr, release the parent ref which cascades to
769 	 * all clones and derived slices. For non-referenced dynptr, only
770 	 * the dynptr and slices derived from it will be invalidated.
771 	 */
772 	reg = &state->stack[spi].spilled_ptr;
773 	return release_reference(env, dynptr_type_referenced(reg->dynptr.type)
774 				      ? reg->parent_id
775 				      : reg->id);
776 }
777 
778 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
779 			       struct bpf_reg_state *reg);
780 
781 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
782 {
783 	if (!env->allow_ptr_leaks)
784 		bpf_mark_reg_not_init(env, reg);
785 	else
786 		__mark_reg_unknown(env, reg);
787 }
788 
789 static int dynptr_ref_cnt(struct bpf_verifier_env *env, int v_parent_id)
790 {
791 	struct bpf_stack_state *stack;
792 	struct bpf_func_state *state;
793 	struct bpf_reg_state *reg;
794 	int ref_cnt = 0;
795 
796 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, 1 << STACK_DYNPTR, ({
797 		if (!stack || stack->slot_type[0] != STACK_DYNPTR)
798 			continue;
799 		if (!stack->spilled_ptr.dynptr.first_slot)
800 			continue;
801 		if (stack->spilled_ptr.parent_id == v_parent_id)
802 			ref_cnt++;
803 	}));
804 
805 	return ref_cnt;
806 }
807 
808 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
809 				        struct bpf_func_state *state, int spi)
810 {
811 	int err = 0;
812 
813 	/* We always ensure that STACK_DYNPTR is never set partially,
814 	 * hence just checking for slot_type[0] is enough. This is
815 	 * different for STACK_SPILL, where it may be only set for
816 	 * 1 byte, so code has to use is_spilled_reg.
817 	 */
818 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
819 		return 0;
820 
821 	/* Reposition spi to first slot */
822 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
823 		spi = spi + 1;
824 
825 	/*
826 	 * A referenced dynptr can be overwritten only if there is at
827 	 * least one other dynptr sharing the same virtual ref parent,
828 	 * ensuring the reference can still be properly released.
829 	 */
830 	if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type) &&
831 	    dynptr_ref_cnt(env, state->stack[spi].spilled_ptr.parent_id) <= 1) {
832 		verbose(env, "cannot overwrite referenced dynptr\n");
833 		return -EINVAL;
834 	}
835 
836 	/* Invalidate the dynptr and any derived slices */
837 	err = release_reference(env, state->stack[spi].spilled_ptr.id);
838 	if (!err) {
839 		mark_stack_slot_scratched(env, spi);
840 		mark_stack_slot_scratched(env, spi - 1);
841 	}
842 
843 	return err;
844 }
845 
846 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
847 {
848 	int spi;
849 
850 	if (reg->type == CONST_PTR_TO_DYNPTR)
851 		return false;
852 
853 	spi = dynptr_get_spi(env, reg);
854 
855 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
856 	 * error because this just means the stack state hasn't been updated yet.
857 	 * We will do check_mem_access to check and update stack bounds later.
858 	 */
859 	if (spi < 0 && spi != -ERANGE)
860 		return false;
861 
862 	/* We don't need to check if the stack slots are marked by previous
863 	 * dynptr initializations because we allow overwriting existing unreferenced
864 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
865 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
866 	 * touching are completely destructed before we reinitialize them for a new
867 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
868 	 * instead of delaying it until the end where the user will get "Unreleased
869 	 * reference" error.
870 	 */
871 	return true;
872 }
873 
874 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
875 {
876 	struct bpf_func_state *state = bpf_func(env, reg);
877 	int i, spi;
878 
879 	/* This already represents first slot of initialized bpf_dynptr.
880 	 *
881 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
882 	 * check_func_arg_reg_off's logic, so we don't need to check its
883 	 * offset and alignment.
884 	 */
885 	if (reg->type == CONST_PTR_TO_DYNPTR)
886 		return true;
887 
888 	spi = dynptr_get_spi(env, reg);
889 	if (spi < 0)
890 		return false;
891 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
892 		return false;
893 
894 	for (i = 0; i < BPF_REG_SIZE; i++) {
895 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
896 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
897 			return false;
898 	}
899 
900 	return true;
901 }
902 
903 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
904 				    enum bpf_arg_type arg_type)
905 {
906 	struct bpf_func_state *state = bpf_func(env, reg);
907 	enum bpf_dynptr_type dynptr_type;
908 	int spi;
909 
910 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
911 	if (arg_type == ARG_PTR_TO_DYNPTR)
912 		return true;
913 
914 	dynptr_type = arg_to_dynptr_type(arg_type);
915 	if (reg->type == CONST_PTR_TO_DYNPTR) {
916 		return reg->dynptr.type == dynptr_type;
917 	} else {
918 		spi = dynptr_get_spi(env, reg);
919 		if (spi < 0)
920 			return false;
921 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
922 	}
923 }
924 
925 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
926 
927 static bool in_rcu_cs(struct bpf_verifier_env *env);
928 
929 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
930 
931 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
932 				 struct bpf_kfunc_call_arg_meta *meta,
933 				 struct bpf_reg_state *reg, int insn_idx,
934 				 struct btf *btf, u32 btf_id, int nr_slots)
935 {
936 	struct bpf_func_state *state = bpf_func(env, reg);
937 	int spi, i, j, id;
938 
939 	spi = iter_get_spi(env, reg, nr_slots);
940 	if (spi < 0)
941 		return spi;
942 
943 	id = acquire_reference(env, insn_idx, 0);
944 	if (id < 0)
945 		return id;
946 
947 	for (i = 0; i < nr_slots; i++) {
948 		struct bpf_stack_state *slot = &state->stack[spi - i];
949 		struct bpf_reg_state *st = &slot->spilled_ptr;
950 
951 		__mark_reg_known_zero(st);
952 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
953 		if (is_kfunc_rcu_protected(meta)) {
954 			if (in_rcu_cs(env))
955 				st->type |= MEM_RCU;
956 			else
957 				st->type |= PTR_UNTRUSTED;
958 		}
959 		st->id = i == 0 ? id : 0;
960 		st->iter.btf = btf;
961 		st->iter.btf_id = btf_id;
962 		st->iter.state = BPF_ITER_STATE_ACTIVE;
963 		st->iter.depth = 0;
964 
965 		for (j = 0; j < BPF_REG_SIZE; j++)
966 			slot->slot_type[j] = STACK_ITER;
967 
968 		mark_stack_slot_scratched(env, spi - i);
969 	}
970 
971 	return 0;
972 }
973 
974 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
975 				   struct bpf_reg_state *reg, int nr_slots)
976 {
977 	struct bpf_func_state *state = bpf_func(env, reg);
978 	int spi, i, j;
979 
980 	spi = iter_get_spi(env, reg, nr_slots);
981 	if (spi < 0)
982 		return spi;
983 
984 	for (i = 0; i < nr_slots; i++) {
985 		struct bpf_stack_state *slot = &state->stack[spi - i];
986 		struct bpf_reg_state *st = &slot->spilled_ptr;
987 
988 		if (i == 0)
989 			WARN_ON_ONCE(release_reference(env, st->id));
990 
991 		bpf_mark_reg_not_init(env, st);
992 
993 		for (j = 0; j < BPF_REG_SIZE; j++)
994 			slot->slot_type[j] = STACK_INVALID;
995 
996 		mark_stack_slot_scratched(env, spi - i);
997 	}
998 
999 	return 0;
1000 }
1001 
1002 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1003 				     struct bpf_reg_state *reg, int nr_slots)
1004 {
1005 	struct bpf_func_state *state = bpf_func(env, reg);
1006 	int spi, i, j;
1007 
1008 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1009 	 * will do check_mem_access to check and update stack bounds later, so
1010 	 * return true for that case.
1011 	 */
1012 	spi = iter_get_spi(env, reg, nr_slots);
1013 	if (spi == -ERANGE)
1014 		return true;
1015 	if (spi < 0)
1016 		return false;
1017 
1018 	for (i = 0; i < nr_slots; i++) {
1019 		struct bpf_stack_state *slot = &state->stack[spi - i];
1020 
1021 		for (j = 0; j < BPF_REG_SIZE; j++)
1022 			if (slot->slot_type[j] == STACK_ITER)
1023 				return false;
1024 	}
1025 
1026 	return true;
1027 }
1028 
1029 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1030 				   struct btf *btf, u32 btf_id, int nr_slots)
1031 {
1032 	struct bpf_func_state *state = bpf_func(env, reg);
1033 	int spi, i, j;
1034 
1035 	spi = iter_get_spi(env, reg, nr_slots);
1036 	if (spi < 0)
1037 		return -EINVAL;
1038 
1039 	for (i = 0; i < nr_slots; i++) {
1040 		struct bpf_stack_state *slot = &state->stack[spi - i];
1041 		struct bpf_reg_state *st = &slot->spilled_ptr;
1042 
1043 		if (st->type & PTR_UNTRUSTED)
1044 			return -EPROTO;
1045 		/* only main (first) slot has id set */
1046 		if (i == 0 && !st->id)
1047 			return -EINVAL;
1048 		if (i != 0 && st->id)
1049 			return -EINVAL;
1050 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1051 			return -EINVAL;
1052 
1053 		for (j = 0; j < BPF_REG_SIZE; j++)
1054 			if (slot->slot_type[j] != STACK_ITER)
1055 				return -EINVAL;
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1062 static int release_irq_state(struct bpf_verifier_state *state, int id);
1063 
1064 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1065 				     struct bpf_kfunc_call_arg_meta *meta,
1066 				     struct bpf_reg_state *reg, int insn_idx,
1067 				     int kfunc_class)
1068 {
1069 	struct bpf_func_state *state = bpf_func(env, reg);
1070 	struct bpf_stack_state *slot;
1071 	struct bpf_reg_state *st;
1072 	int spi, i, id;
1073 
1074 	spi = irq_flag_get_spi(env, reg);
1075 	if (spi < 0)
1076 		return spi;
1077 
1078 	id = acquire_irq_state(env, insn_idx);
1079 	if (id < 0)
1080 		return id;
1081 
1082 	slot = &state->stack[spi];
1083 	st = &slot->spilled_ptr;
1084 
1085 	__mark_reg_known_zero(st);
1086 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1087 	st->id = id;
1088 	st->irq.kfunc_class = kfunc_class;
1089 
1090 	for (i = 0; i < BPF_REG_SIZE; i++)
1091 		slot->slot_type[i] = STACK_IRQ_FLAG;
1092 
1093 	mark_stack_slot_scratched(env, spi);
1094 	return 0;
1095 }
1096 
1097 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1098 				      int kfunc_class)
1099 {
1100 	struct bpf_func_state *state = bpf_func(env, reg);
1101 	struct bpf_stack_state *slot;
1102 	struct bpf_reg_state *st;
1103 	int spi, i, err;
1104 
1105 	spi = irq_flag_get_spi(env, reg);
1106 	if (spi < 0)
1107 		return spi;
1108 
1109 	slot = &state->stack[spi];
1110 	st = &slot->spilled_ptr;
1111 
1112 	if (st->irq.kfunc_class != kfunc_class) {
1113 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1114 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1115 
1116 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1117 			flag_kfunc, used_kfunc);
1118 		return -EINVAL;
1119 	}
1120 
1121 	err = release_irq_state(env->cur_state, st->id);
1122 	WARN_ON_ONCE(err && err != -EACCES);
1123 	if (err) {
1124 		int insn_idx = 0;
1125 
1126 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1127 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1128 				insn_idx = env->cur_state->refs[i].insn_idx;
1129 				break;
1130 			}
1131 		}
1132 
1133 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1134 			env->cur_state->active_irq_id, insn_idx);
1135 		return err;
1136 	}
1137 
1138 	bpf_mark_reg_not_init(env, st);
1139 
1140 	for (i = 0; i < BPF_REG_SIZE; i++)
1141 		slot->slot_type[i] = STACK_INVALID;
1142 
1143 	mark_stack_slot_scratched(env, spi);
1144 	return 0;
1145 }
1146 
1147 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1148 {
1149 	struct bpf_func_state *state = bpf_func(env, reg);
1150 	struct bpf_stack_state *slot;
1151 	int spi, i;
1152 
1153 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1154 	 * will do check_mem_access to check and update stack bounds later, so
1155 	 * return true for that case.
1156 	 */
1157 	spi = irq_flag_get_spi(env, reg);
1158 	if (spi == -ERANGE)
1159 		return true;
1160 	if (spi < 0)
1161 		return false;
1162 
1163 	slot = &state->stack[spi];
1164 
1165 	for (i = 0; i < BPF_REG_SIZE; i++)
1166 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1167 			return false;
1168 	return true;
1169 }
1170 
1171 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1172 {
1173 	struct bpf_func_state *state = bpf_func(env, reg);
1174 	struct bpf_stack_state *slot;
1175 	struct bpf_reg_state *st;
1176 	int spi, i;
1177 
1178 	spi = irq_flag_get_spi(env, reg);
1179 	if (spi < 0)
1180 		return -EINVAL;
1181 
1182 	slot = &state->stack[spi];
1183 	st = &slot->spilled_ptr;
1184 
1185 	if (!st->id)
1186 		return -EINVAL;
1187 
1188 	for (i = 0; i < BPF_REG_SIZE; i++)
1189 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1190 			return -EINVAL;
1191 	return 0;
1192 }
1193 
1194 /* Check if given stack slot is "special":
1195  *   - spilled register state (STACK_SPILL);
1196  *   - dynptr state (STACK_DYNPTR);
1197  *   - iter state (STACK_ITER).
1198  *   - irq flag state (STACK_IRQ_FLAG)
1199  */
1200 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1201 {
1202 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1203 
1204 	switch (type) {
1205 	case STACK_SPILL:
1206 	case STACK_DYNPTR:
1207 	case STACK_ITER:
1208 	case STACK_IRQ_FLAG:
1209 		return true;
1210 	case STACK_INVALID:
1211 	case STACK_POISON:
1212 	case STACK_MISC:
1213 	case STACK_ZERO:
1214 		return false;
1215 	default:
1216 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1217 		return true;
1218 	}
1219 }
1220 
1221 /* The reg state of a pointer or a bounded scalar was saved when
1222  * it was spilled to the stack.
1223  */
1224 
1225 /*
1226  * Mark stack slot as STACK_MISC, unless it is already:
1227  * - STACK_INVALID, in which case they are equivalent.
1228  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1229  * - STACK_POISON, which truly forbids access to the slot.
1230  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1231  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1232  * unnecessary as both are considered equivalent when loading data and pruning,
1233  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1234  * slots.
1235  */
1236 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1237 {
1238 	if (*stype == STACK_ZERO)
1239 		return;
1240 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1241 		return;
1242 	*stype = STACK_MISC;
1243 }
1244 
1245 static void scrub_spilled_slot(u8 *stype)
1246 {
1247 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1248 		*stype = STACK_MISC;
1249 }
1250 
1251 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1252  * small to hold src. This is different from krealloc since we don't want to preserve
1253  * the contents of dst.
1254  *
1255  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1256  * not be allocated.
1257  */
1258 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1259 {
1260 	size_t alloc_bytes;
1261 	void *orig = dst;
1262 	size_t bytes;
1263 
1264 	if (ZERO_OR_NULL_PTR(src))
1265 		goto out;
1266 
1267 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1268 		return NULL;
1269 
1270 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1271 	dst = krealloc(orig, alloc_bytes, flags);
1272 	if (!dst) {
1273 		kfree(orig);
1274 		return NULL;
1275 	}
1276 
1277 	memcpy(dst, src, bytes);
1278 out:
1279 	return dst ? dst : ZERO_SIZE_PTR;
1280 }
1281 
1282 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1283  * small to hold new_n items. new items are zeroed out if the array grows.
1284  *
1285  * Contrary to krealloc_array, does not free arr if new_n is zero.
1286  */
1287 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1288 {
1289 	size_t alloc_size;
1290 	void *new_arr;
1291 
1292 	if (!new_n || old_n == new_n)
1293 		goto out;
1294 
1295 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1296 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1297 	if (!new_arr) {
1298 		kfree(arr);
1299 		return NULL;
1300 	}
1301 	arr = new_arr;
1302 
1303 	if (new_n > old_n)
1304 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1305 
1306 out:
1307 	return arr ? arr : ZERO_SIZE_PTR;
1308 }
1309 
1310 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1311 {
1312 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1313 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1314 	if (!dst->refs)
1315 		return -ENOMEM;
1316 
1317 	dst->acquired_refs = src->acquired_refs;
1318 	dst->active_locks = src->active_locks;
1319 	dst->active_preempt_locks = src->active_preempt_locks;
1320 	dst->active_rcu_locks = src->active_rcu_locks;
1321 	dst->active_irq_id = src->active_irq_id;
1322 	dst->active_lock_id = src->active_lock_id;
1323 	dst->active_lock_ptr = src->active_lock_ptr;
1324 	return 0;
1325 }
1326 
1327 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1328 {
1329 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1330 
1331 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1332 				GFP_KERNEL_ACCOUNT);
1333 	if (!dst->stack)
1334 		return -ENOMEM;
1335 
1336 	dst->allocated_stack = src->allocated_stack;
1337 
1338 	/* copy stack args state */
1339 	n = src->out_stack_arg_cnt;
1340 	if (n) {
1341 		dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n,
1342 						 sizeof(struct bpf_reg_state),
1343 						 GFP_KERNEL_ACCOUNT);
1344 		if (!dst->stack_arg_regs)
1345 			return -ENOMEM;
1346 	}
1347 
1348 	dst->out_stack_arg_cnt = src->out_stack_arg_cnt;
1349 	return 0;
1350 }
1351 
1352 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1353 {
1354 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1355 				    sizeof(struct bpf_reference_state));
1356 	if (!state->refs)
1357 		return -ENOMEM;
1358 
1359 	state->acquired_refs = n;
1360 	return 0;
1361 }
1362 
1363 /* Possibly update state->allocated_stack to be at least size bytes. Also
1364  * possibly update the function's high-water mark in its bpf_subprog_info.
1365  */
1366 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1367 {
1368 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1369 
1370 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1371 	size = round_up(size, BPF_REG_SIZE);
1372 	n = size / BPF_REG_SIZE;
1373 
1374 	if (old_n >= n)
1375 		return 0;
1376 
1377 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1378 	if (!state->stack)
1379 		return -ENOMEM;
1380 
1381 	state->allocated_stack = size;
1382 
1383 	/* update known max for given subprogram */
1384 	if (env->subprog_info[state->subprogno].stack_depth < size)
1385 		env->subprog_info[state->subprogno].stack_depth = size;
1386 
1387 	return 0;
1388 }
1389 
1390 static int grow_stack_arg_slots(struct bpf_verifier_env *env,
1391 				struct bpf_func_state *state, int cnt)
1392 {
1393 	size_t old_n = state->out_stack_arg_cnt;
1394 
1395 	if (old_n >= cnt)
1396 		return 0;
1397 
1398 	state->stack_arg_regs = realloc_array(state->stack_arg_regs, old_n, cnt,
1399 					      sizeof(struct bpf_reg_state));
1400 	if (!state->stack_arg_regs)
1401 		return -ENOMEM;
1402 
1403 	state->out_stack_arg_cnt = cnt;
1404 	return 0;
1405 }
1406 
1407 /* Acquire a pointer id from the env and update the state->refs to include
1408  * this new pointer reference.
1409  * On success, returns a valid pointer id to associate with the register
1410  * On failure, returns a negative errno.
1411  */
1412 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1413 {
1414 	struct bpf_verifier_state *state = env->cur_state;
1415 	int new_ofs = state->acquired_refs;
1416 	int err;
1417 
1418 	err = resize_reference_state(state, state->acquired_refs + 1);
1419 	if (err)
1420 		return NULL;
1421 	state->refs[new_ofs].insn_idx = insn_idx;
1422 
1423 	return &state->refs[new_ofs];
1424 }
1425 
1426 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id)
1427 {
1428 	struct bpf_reference_state *s;
1429 
1430 	s = acquire_reference_state(env, insn_idx);
1431 	if (!s)
1432 		return -ENOMEM;
1433 	s->type = REF_TYPE_PTR;
1434 	s->id = ++env->id_gen;
1435 	s->parent_id = parent_id;
1436 	return s->id;
1437 }
1438 
1439 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1440 			      int id, void *ptr)
1441 {
1442 	struct bpf_verifier_state *state = env->cur_state;
1443 	struct bpf_reference_state *s;
1444 
1445 	s = acquire_reference_state(env, insn_idx);
1446 	if (!s)
1447 		return -ENOMEM;
1448 	s->type = type;
1449 	s->id = id;
1450 	s->ptr = ptr;
1451 
1452 	state->active_locks++;
1453 	state->active_lock_id = id;
1454 	state->active_lock_ptr = ptr;
1455 	return 0;
1456 }
1457 
1458 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1459 {
1460 	struct bpf_verifier_state *state = env->cur_state;
1461 	struct bpf_reference_state *s;
1462 
1463 	s = acquire_reference_state(env, insn_idx);
1464 	if (!s)
1465 		return -ENOMEM;
1466 	s->type = REF_TYPE_IRQ;
1467 	s->id = ++env->id_gen;
1468 
1469 	state->active_irq_id = s->id;
1470 	return s->id;
1471 }
1472 
1473 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1474 {
1475 	int last_idx;
1476 	size_t rem;
1477 
1478 	/* IRQ state requires the relative ordering of elements remaining the
1479 	 * same, since it relies on the refs array to behave as a stack, so that
1480 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1481 	 * the array instead of swapping the final element into the deleted idx.
1482 	 */
1483 	last_idx = state->acquired_refs - 1;
1484 	rem = state->acquired_refs - idx - 1;
1485 	if (last_idx && idx != last_idx)
1486 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1487 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1488 	state->acquired_refs--;
1489 	return;
1490 }
1491 
1492 static bool find_reference_state(struct bpf_verifier_state *state, int id)
1493 {
1494 	int i;
1495 
1496 	for (i = 0; i < state->acquired_refs; i++) {
1497 		if (state->refs[i].type != REF_TYPE_PTR)
1498 			continue;
1499 		if (state->refs[i].id == id)
1500 			return true;
1501 	}
1502 
1503 	return false;
1504 }
1505 
1506 static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
1507 {
1508 	return find_reference_state(env->cur_state, reg->id);
1509 }
1510 
1511 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1512 {
1513 	void *prev_ptr = NULL;
1514 	u32 prev_id = 0;
1515 	int i;
1516 
1517 	for (i = 0; i < state->acquired_refs; i++) {
1518 		if (state->refs[i].type == type && state->refs[i].id == id &&
1519 		    state->refs[i].ptr == ptr) {
1520 			release_reference_state(state, i);
1521 			state->active_locks--;
1522 			/* Reassign active lock (id, ptr). */
1523 			state->active_lock_id = prev_id;
1524 			state->active_lock_ptr = prev_ptr;
1525 			return 0;
1526 		}
1527 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1528 			prev_id = state->refs[i].id;
1529 			prev_ptr = state->refs[i].ptr;
1530 		}
1531 	}
1532 	return -EINVAL;
1533 }
1534 
1535 static int release_irq_state(struct bpf_verifier_state *state, int id)
1536 {
1537 	u32 prev_id = 0;
1538 	int i;
1539 
1540 	if (id != state->active_irq_id)
1541 		return -EACCES;
1542 
1543 	for (i = 0; i < state->acquired_refs; i++) {
1544 		if (state->refs[i].type != REF_TYPE_IRQ)
1545 			continue;
1546 		if (state->refs[i].id == id) {
1547 			release_reference_state(state, i);
1548 			state->active_irq_id = prev_id;
1549 			return 0;
1550 		} else {
1551 			prev_id = state->refs[i].id;
1552 		}
1553 	}
1554 	return -EINVAL;
1555 }
1556 
1557 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1558 						   int id, void *ptr)
1559 {
1560 	int i;
1561 
1562 	for (i = 0; i < state->acquired_refs; i++) {
1563 		struct bpf_reference_state *s = &state->refs[i];
1564 
1565 		if (!(s->type & type))
1566 			continue;
1567 
1568 		if (s->id == id && s->ptr == ptr)
1569 			return s;
1570 	}
1571 	return NULL;
1572 }
1573 
1574 static void free_func_state(struct bpf_func_state *state)
1575 {
1576 	if (!state)
1577 		return;
1578 	kfree(state->stack_arg_regs);
1579 	kfree(state->stack);
1580 	kfree(state);
1581 }
1582 
1583 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1584 {
1585 	kfree(state->jmp_history);
1586 	state->jmp_history = NULL;
1587 	state->jmp_history_cnt = 0;
1588 }
1589 
1590 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1591 			    bool free_self)
1592 {
1593 	int i;
1594 
1595 	for (i = 0; i <= state->curframe; i++) {
1596 		free_func_state(state->frame[i]);
1597 		state->frame[i] = NULL;
1598 	}
1599 	kfree(state->refs);
1600 	bpf_clear_jmp_history(state);
1601 	if (free_self)
1602 		kfree(state);
1603 }
1604 
1605 /* copy verifier state from src to dst growing dst stack space
1606  * when necessary to accommodate larger src stack
1607  */
1608 static int copy_func_state(struct bpf_func_state *dst,
1609 			   const struct bpf_func_state *src)
1610 {
1611 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1612 	return copy_stack_state(dst, src);
1613 }
1614 
1615 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1616 			   const struct bpf_verifier_state *src)
1617 {
1618 	struct bpf_func_state *dst;
1619 	int i, err;
1620 
1621 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1622 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1623 					  GFP_KERNEL_ACCOUNT);
1624 	if (!dst_state->jmp_history)
1625 		return -ENOMEM;
1626 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1627 
1628 	/* if dst has more stack frames then src frame, free them, this is also
1629 	 * necessary in case of exceptional exits using bpf_throw.
1630 	 */
1631 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1632 		free_func_state(dst_state->frame[i]);
1633 		dst_state->frame[i] = NULL;
1634 	}
1635 	err = copy_reference_state(dst_state, src);
1636 	if (err)
1637 		return err;
1638 	dst_state->speculative = src->speculative;
1639 	dst_state->in_sleepable = src->in_sleepable;
1640 	dst_state->curframe = src->curframe;
1641 	dst_state->branches = src->branches;
1642 	dst_state->parent = src->parent;
1643 	dst_state->first_insn_idx = src->first_insn_idx;
1644 	dst_state->last_insn_idx = src->last_insn_idx;
1645 	dst_state->dfs_depth = src->dfs_depth;
1646 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1647 	dst_state->may_goto_depth = src->may_goto_depth;
1648 	dst_state->equal_state = src->equal_state;
1649 	for (i = 0; i <= src->curframe; i++) {
1650 		dst = dst_state->frame[i];
1651 		if (!dst) {
1652 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1653 			if (!dst)
1654 				return -ENOMEM;
1655 			dst_state->frame[i] = dst;
1656 		}
1657 		err = copy_func_state(dst, src->frame[i]);
1658 		if (err)
1659 			return err;
1660 	}
1661 	return 0;
1662 }
1663 
1664 static u32 state_htab_size(struct bpf_verifier_env *env)
1665 {
1666 	return env->prog->len;
1667 }
1668 
1669 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1670 {
1671 	struct bpf_verifier_state *cur = env->cur_state;
1672 	struct bpf_func_state *state = cur->frame[cur->curframe];
1673 
1674 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1675 }
1676 
1677 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1678 {
1679 	int fr;
1680 
1681 	if (a->curframe != b->curframe)
1682 		return false;
1683 
1684 	for (fr = a->curframe; fr >= 0; fr--)
1685 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1686 			return false;
1687 
1688 	return true;
1689 }
1690 
1691 
1692 void bpf_free_backedges(struct bpf_scc_visit *visit)
1693 {
1694 	struct bpf_scc_backedge *backedge, *next;
1695 
1696 	for (backedge = visit->backedges; backedge; backedge = next) {
1697 		bpf_free_verifier_state(&backedge->state, false);
1698 		next = backedge->next;
1699 		kfree(backedge);
1700 	}
1701 	visit->backedges = NULL;
1702 }
1703 
1704 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1705 		     int *insn_idx, bool pop_log)
1706 {
1707 	struct bpf_verifier_state *cur = env->cur_state;
1708 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1709 	int err;
1710 
1711 	if (env->head == NULL)
1712 		return -ENOENT;
1713 
1714 	if (cur) {
1715 		err = bpf_copy_verifier_state(cur, &head->st);
1716 		if (err)
1717 			return err;
1718 	}
1719 	if (pop_log)
1720 		bpf_vlog_reset(&env->log, head->log_pos);
1721 	if (insn_idx)
1722 		*insn_idx = head->insn_idx;
1723 	if (prev_insn_idx)
1724 		*prev_insn_idx = head->prev_insn_idx;
1725 	elem = head->next;
1726 	bpf_free_verifier_state(&head->st, false);
1727 	kfree(head);
1728 	env->head = elem;
1729 	env->stack_size--;
1730 	return 0;
1731 }
1732 
1733 static bool error_recoverable_with_nospec(int err)
1734 {
1735 	/* Should only return true for non-fatal errors that are allowed to
1736 	 * occur during speculative verification. For these we can insert a
1737 	 * nospec and the program might still be accepted. Do not include
1738 	 * something like ENOMEM because it is likely to re-occur for the next
1739 	 * architectural path once it has been recovered-from in all speculative
1740 	 * paths.
1741 	 */
1742 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1743 }
1744 
1745 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1746 					     int insn_idx, int prev_insn_idx,
1747 					     bool speculative)
1748 {
1749 	struct bpf_verifier_state *cur = env->cur_state;
1750 	struct bpf_verifier_stack_elem *elem;
1751 	int err;
1752 
1753 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1754 	if (!elem)
1755 		return ERR_PTR(-ENOMEM);
1756 
1757 	elem->insn_idx = insn_idx;
1758 	elem->prev_insn_idx = prev_insn_idx;
1759 	elem->next = env->head;
1760 	elem->log_pos = env->log.end_pos;
1761 	env->head = elem;
1762 	env->stack_size++;
1763 	err = bpf_copy_verifier_state(&elem->st, cur);
1764 	if (err)
1765 		return ERR_PTR(-ENOMEM);
1766 	elem->st.speculative |= speculative;
1767 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1768 		verbose(env, "The sequence of %d jumps is too complex.\n",
1769 			env->stack_size);
1770 		return ERR_PTR(-E2BIG);
1771 	}
1772 	if (elem->st.parent) {
1773 		++elem->st.parent->branches;
1774 		/* WARN_ON(branches > 2) technically makes sense here,
1775 		 * but
1776 		 * 1. speculative states will bump 'branches' for non-branch
1777 		 * instructions
1778 		 * 2. is_state_visited() heuristics may decide not to create
1779 		 * a new state for a sequence of branches and all such current
1780 		 * and cloned states will be pointing to a single parent state
1781 		 * which might have large 'branches' count.
1782 		 */
1783 	}
1784 	return &elem->st;
1785 }
1786 
1787 static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno)
1788 {
1789 	char *buf = env->tmp_arg_name;
1790 	int len = sizeof(env->tmp_arg_name);
1791 	int arg, regno = reg_from_argno(argno);
1792 
1793 	if (regno >= 0) {
1794 		snprintf(buf, len, "R%d", regno);
1795 	} else {
1796 		arg = arg_from_argno(argno);
1797 		snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE);
1798 	}
1799 
1800 	return buf;
1801 }
1802 
1803 static const int caller_saved[CALLER_SAVED_REGS] = {
1804 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1805 };
1806 
1807 /* This helper doesn't clear reg->id */
1808 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1809 {
1810 	reg->var_off = tnum_const(imm);
1811 	reg->r64 = cnum64_from_urange(imm, imm);
1812 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1813 }
1814 
1815 /* Mark the unknown part of a register (variable offset or scalar value) as
1816  * known to have the value @imm.
1817  */
1818 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1819 {
1820 	/* Clear off and union(map_ptr, range) */
1821 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1822 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1823 	reg->id = 0;
1824 	reg->parent_id = 0;
1825 	___mark_reg_known(reg, imm);
1826 }
1827 
1828 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1829 {
1830 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1831 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1832 }
1833 
1834 /* Mark the 'variable offset' part of a register as zero.  This should be
1835  * used only on registers holding a pointer type.
1836  */
1837 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1838 {
1839 	__mark_reg_known(reg, 0);
1840 }
1841 
1842 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1843 {
1844 	__mark_reg_known(reg, 0);
1845 	reg->type = SCALAR_VALUE;
1846 	/* all scalars are assumed imprecise initially (unless unprivileged,
1847 	 * in which case everything is forced to be precise)
1848 	 */
1849 	reg->precise = !env->bpf_capable;
1850 }
1851 
1852 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1853 				struct bpf_reg_state *regs, u32 regno)
1854 {
1855 	__mark_reg_known_zero(regs + regno);
1856 }
1857 
1858 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1859 			      bool first_slot, int id, int parent_id)
1860 {
1861 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1862 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1863 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1864 	 */
1865 	__mark_reg_known_zero(reg);
1866 	reg->type = CONST_PTR_TO_DYNPTR;
1867 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1868 	reg->id = id;
1869 	reg->parent_id = parent_id;
1870 	reg->dynptr.type = type;
1871 	reg->dynptr.first_slot = first_slot;
1872 }
1873 
1874 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1875 {
1876 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1877 		const struct bpf_map *map = reg->map_ptr;
1878 
1879 		if (map->inner_map_meta) {
1880 			reg->type = CONST_PTR_TO_MAP;
1881 			reg->map_ptr = map->inner_map_meta;
1882 			/* transfer reg's id which is unique for every map_lookup_elem
1883 			 * as UID of the inner map.
1884 			 */
1885 			if (btf_record_has_field(map->inner_map_meta->record,
1886 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1887 				reg->map_uid = reg->id;
1888 			}
1889 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1890 			reg->type = PTR_TO_XDP_SOCK;
1891 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1892 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1893 			reg->type = PTR_TO_SOCKET;
1894 		} else {
1895 			reg->type = PTR_TO_MAP_VALUE;
1896 		}
1897 		return;
1898 	}
1899 
1900 	reg->type &= ~PTR_MAYBE_NULL;
1901 }
1902 
1903 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1904 				struct btf_field_graph_root *ds_head)
1905 {
1906 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1907 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1908 	regs[regno].btf = ds_head->btf;
1909 	regs[regno].btf_id = ds_head->value_btf_id;
1910 }
1911 
1912 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1913 {
1914 	return type_is_pkt_pointer(reg->type);
1915 }
1916 
1917 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1918 {
1919 	return reg_is_pkt_pointer(reg) ||
1920 	       reg->type == PTR_TO_PACKET_END;
1921 }
1922 
1923 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1924 {
1925 	return base_type(reg->type) == PTR_TO_MEM &&
1926 	       (reg->type &
1927 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1928 }
1929 
1930 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1931 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1932 				    enum bpf_reg_type which)
1933 {
1934 	/* The register can already have a range from prior markings.
1935 	 * This is fine as long as it hasn't been advanced from its
1936 	 * origin.
1937 	 */
1938 	return reg->type == which &&
1939 	       reg->id == 0 &&
1940 	       tnum_equals_const(reg->var_off, 0);
1941 }
1942 
1943 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1944 {
1945 	reg->r32 = CNUM32_UNBOUNDED;
1946 }
1947 
1948 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1949 {
1950 	reg->r64 = CNUM64_UNBOUNDED;
1951 }
1952 
1953 /* Reset the min/max bounds of a register */
1954 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1955 {
1956 	__mark_reg64_unbounded(reg);
1957 	__mark_reg32_unbounded(reg);
1958 }
1959 
1960 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1961 {
1962 	__mark_reg64_unbounded(reg);
1963 	reg->var_off = tnum_unknown;
1964 }
1965 
1966 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1967 {
1968 	__mark_reg32_unbounded(reg);
1969 	reg->var_off = tnum_unknown;
1970 }
1971 
1972 static struct cnum32 cnum32_from_tnum(struct tnum tnum)
1973 {
1974 	tnum = tnum_subreg(tnum);
1975 	if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN))
1976 		/* min signed is max(sign bit) | min(other bits) */
1977 		/* max signed is min(sign bit) | max(other bits) */
1978 		return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN),
1979 					  tnum.value | (tnum.mask & S32_MAX));
1980 	else
1981 		return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask));
1982 }
1983 
1984 static struct cnum64 cnum64_from_tnum(struct tnum tnum)
1985 {
1986 	if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN))
1987 		/* min signed is max(sign bit) | min(other bits) */
1988 		/* max signed is min(sign bit) | max(other bits) */
1989 		return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN),
1990 					  tnum.value | (tnum.mask & S64_MAX));
1991 	else
1992 		return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask));
1993 }
1994 
1995 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1996 {
1997 	cnum32_intersect_with(&reg->r32, cnum32_from_tnum(reg->var_off));
1998 }
1999 
2000 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2001 {
2002 	u64 tnum_next, tmax;
2003 	bool umin_in_tnum;
2004 
2005 	cnum64_intersect_with(&reg->r64, cnum64_from_tnum(reg->var_off));
2006 
2007 	/* Check if u64 and tnum overlap in a single value */
2008 	tnum_next = tnum_step(reg->var_off, reg_umin(reg));
2009 	umin_in_tnum = (reg_umin(reg) & ~reg->var_off.mask) == reg->var_off.value;
2010 	tmax = reg->var_off.value | reg->var_off.mask;
2011 	if (umin_in_tnum && tnum_next > reg_umax(reg)) {
2012 		/* The u64 range and the tnum only overlap in umin.
2013 		 * u64:  ---[xxxxxx]-----
2014 		 * tnum: --xx----------x-
2015 		 */
2016 		___mark_reg_known(reg, reg_umin(reg));
2017 	} else if (!umin_in_tnum && tnum_next == tmax) {
2018 		/* The u64 range and the tnum only overlap in the maximum value
2019 		 * represented by the tnum, called tmax.
2020 		 * u64:  ---[xxxxxx]-----
2021 		 * tnum: xx-----x--------
2022 		 */
2023 		___mark_reg_known(reg, tmax);
2024 	} else if (!umin_in_tnum && tnum_next <= reg_umax(reg) &&
2025 		   tnum_step(reg->var_off, tnum_next) > reg_umax(reg)) {
2026 		/* The u64 range and the tnum only overlap in between umin
2027 		 * (excluded) and umax.
2028 		 * u64:  ---[xxxxxx]-----
2029 		 * tnum: xx----x-------x-
2030 		 */
2031 		___mark_reg_known(reg, tnum_next);
2032 	}
2033 }
2034 
2035 static void __update_reg_bounds(struct bpf_reg_state *reg)
2036 {
2037 	__update_reg32_bounds(reg);
2038 	__update_reg64_bounds(reg);
2039 }
2040 
2041 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2042 {
2043 	cnum32_intersect_with(&reg->r32, cnum32_from_cnum64(reg->r64));
2044 }
2045 
2046 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2047 {
2048 	reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32);
2049 }
2050 
2051 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2052 {
2053 	deduce_bounds_32_from_64(reg);
2054 	deduce_bounds_64_from_32(reg);
2055 }
2056 
2057 /* Attempts to improve var_off based on unsigned min/max information */
2058 static void __reg_bound_offset(struct bpf_reg_state *reg)
2059 {
2060 	struct tnum var64_off = tnum_intersect(reg->var_off,
2061 					       tnum_range(reg_umin(reg),
2062 							  reg_umax(reg)));
2063 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2064 					       tnum_range(reg_u32_min(reg),
2065 							  reg_u32_max(reg)));
2066 
2067 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2068 }
2069 
2070 static bool range_bounds_violation(struct bpf_reg_state *reg);
2071 
2072 static void reg_bounds_sync(struct bpf_reg_state *reg)
2073 {
2074 	/* If the input reg_state is invalid, we can exit early */
2075 	if (range_bounds_violation(reg))
2076 		return;
2077 	/* We might have learned new bounds from the var_off. */
2078 	__update_reg_bounds(reg);
2079 	/* We might have learned something about the sign bit. */
2080 	__reg_deduce_bounds(reg);
2081 	__reg_deduce_bounds(reg);
2082 	/* We might have learned some bits from the bounds. */
2083 	__reg_bound_offset(reg);
2084 	/* Intersecting with the old var_off might have improved our bounds
2085 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2086 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2087 	 */
2088 	__update_reg_bounds(reg);
2089 }
2090 
2091 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2092 {
2093 	if (!tnum_is_const(reg->var_off))
2094 		return false;
2095 
2096 	return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value;
2097 }
2098 
2099 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2100 {
2101 	if (!tnum_subreg_is_const(reg->var_off))
2102 		return false;
2103 
2104 	return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value;
2105 }
2106 
2107 static bool range_bounds_violation(struct bpf_reg_state *reg)
2108 {
2109 	return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64);
2110 }
2111 
2112 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2113 				   struct bpf_reg_state *reg, const char *ctx)
2114 {
2115 	const char *msg;
2116 
2117 	if (range_bounds_violation(reg)) {
2118 		msg = "range bounds violation";
2119 		goto out;
2120 	}
2121 
2122 	if (const_tnum_range_mismatch(reg)) {
2123 		msg = "const tnum out of sync with range bounds";
2124 		goto out;
2125 	}
2126 
2127 	if (const_tnum_range_mismatch_32(reg)) {
2128 		msg = "const subreg tnum out of sync with range bounds";
2129 		goto out;
2130 	}
2131 
2132 	return 0;
2133 out:
2134 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} "
2135 		     "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)",
2136 		     ctx, msg,
2137 		     reg->r64.base, reg->r64.size,
2138 		     reg->r32.base, reg->r32.size,
2139 		     reg->var_off.value, reg->var_off.mask);
2140 	if (env->test_reg_invariants)
2141 		return -EFAULT;
2142 	__mark_reg_unbounded(reg);
2143 	return 0;
2144 }
2145 
2146 /* Mark a register as having a completely unknown (scalar) value. */
2147 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2148 {
2149 	s32 subreg_def = reg->subreg_def;
2150 
2151 	memset(reg, 0, sizeof(*reg));
2152 	reg->type = SCALAR_VALUE;
2153 	reg->var_off = tnum_unknown;
2154 	reg->subreg_def = subreg_def;
2155 	__mark_reg_unbounded(reg);
2156 }
2157 
2158 /* Mark a register as having a completely unknown (scalar) value,
2159  * initialize .precise as true when not bpf capable.
2160  */
2161 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2162 			       struct bpf_reg_state *reg)
2163 {
2164 	bpf_mark_reg_unknown_imprecise(reg);
2165 	reg->precise = !env->bpf_capable;
2166 }
2167 
2168 static void mark_reg_unknown(struct bpf_verifier_env *env,
2169 			     struct bpf_reg_state *regs, u32 regno)
2170 {
2171 	__mark_reg_unknown(env, regs + regno);
2172 }
2173 
2174 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2175 				struct bpf_reg_state *regs,
2176 				u32 regno,
2177 				s32 s32_min,
2178 				s32 s32_max)
2179 {
2180 	struct bpf_reg_state *reg = regs + regno;
2181 
2182 	reg_set_srange32(reg,
2183 			 max_t(s32, reg_s32_min(reg), s32_min),
2184 			 min_t(s32, reg_s32_max(reg), s32_max));
2185 	reg_set_srange64(reg,
2186 			 max_t(s64, reg_smin(reg), s32_min),
2187 			 min_t(s64, reg_smax(reg), s32_max));
2188 
2189 	reg_bounds_sync(reg);
2190 
2191 	return reg_bounds_sanity_check(env, reg, "s32_range");
2192 }
2193 
2194 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2195 			   struct bpf_reg_state *reg)
2196 {
2197 	__mark_reg_unknown(env, reg);
2198 	reg->type = NOT_INIT;
2199 }
2200 
2201 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2202 			   struct bpf_reg_state *regs, u32 regno,
2203 			   enum bpf_reg_type reg_type,
2204 			   struct btf *btf, u32 btf_id,
2205 			   enum bpf_type_flag flag)
2206 {
2207 	switch (reg_type) {
2208 	case SCALAR_VALUE:
2209 		mark_reg_unknown(env, regs, regno);
2210 		return 0;
2211 	case PTR_TO_BTF_ID:
2212 		mark_reg_known_zero(env, regs, regno);
2213 		regs[regno].type = PTR_TO_BTF_ID | flag;
2214 		regs[regno].btf = btf;
2215 		regs[regno].btf_id = btf_id;
2216 		if (type_may_be_null(flag))
2217 			regs[regno].id = ++env->id_gen;
2218 		return 0;
2219 	case PTR_TO_MEM:
2220 		mark_reg_known_zero(env, regs, regno);
2221 		regs[regno].type = PTR_TO_MEM | flag;
2222 		regs[regno].mem_size = 0;
2223 		return 0;
2224 	default:
2225 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2226 		return -EFAULT;
2227 	}
2228 }
2229 
2230 #define DEF_NOT_SUBREG	(0)
2231 static void init_reg_state(struct bpf_verifier_env *env,
2232 			   struct bpf_func_state *state)
2233 {
2234 	struct bpf_reg_state *regs = state->regs;
2235 	int i;
2236 
2237 	for (i = 0; i < MAX_BPF_REG; i++) {
2238 		bpf_mark_reg_not_init(env, &regs[i]);
2239 		regs[i].subreg_def = DEF_NOT_SUBREG;
2240 	}
2241 
2242 	/* frame pointer */
2243 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2244 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2245 	regs[BPF_REG_FP].frameno = state->frameno;
2246 }
2247 
2248 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2249 {
2250 	/*
2251 	 * return_32bit is set to false by default and set explicitly
2252 	 * by the caller when necessary.
2253 	 */
2254 	return (struct bpf_retval_range){ minval, maxval, false };
2255 }
2256 
2257 static void init_func_state(struct bpf_verifier_env *env,
2258 			    struct bpf_func_state *state,
2259 			    int callsite, int frameno, int subprogno)
2260 {
2261 	state->callsite = callsite;
2262 	state->frameno = frameno;
2263 	state->subprogno = subprogno;
2264 	state->callback_ret_range = retval_range(0, 0);
2265 	init_reg_state(env, state);
2266 	mark_verifier_state_scratched(env);
2267 }
2268 
2269 /* Similar to push_stack(), but for async callbacks */
2270 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2271 						int insn_idx, int prev_insn_idx,
2272 						int subprog, bool is_sleepable)
2273 {
2274 	struct bpf_verifier_stack_elem *elem;
2275 	struct bpf_func_state *frame;
2276 
2277 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2278 	if (!elem)
2279 		return ERR_PTR(-ENOMEM);
2280 
2281 	elem->insn_idx = insn_idx;
2282 	elem->prev_insn_idx = prev_insn_idx;
2283 	elem->next = env->head;
2284 	elem->log_pos = env->log.end_pos;
2285 	env->head = elem;
2286 	env->stack_size++;
2287 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2288 		verbose(env,
2289 			"The sequence of %d jumps is too complex for async cb.\n",
2290 			env->stack_size);
2291 		return ERR_PTR(-E2BIG);
2292 	}
2293 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2294 	 * The caller state doesn't matter.
2295 	 * This is async callback. It starts in a fresh stack.
2296 	 * Initialize it similar to do_check_common().
2297 	 */
2298 	elem->st.branches = 1;
2299 	elem->st.in_sleepable = is_sleepable;
2300 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2301 	if (!frame)
2302 		return ERR_PTR(-ENOMEM);
2303 	init_func_state(env, frame,
2304 			BPF_MAIN_FUNC /* callsite */,
2305 			0 /* frameno within this callchain */,
2306 			subprog /* subprog number within this prog */);
2307 	elem->st.frame[0] = frame;
2308 	return &elem->st;
2309 }
2310 
2311 
2312 static int cmp_subprogs(const void *a, const void *b)
2313 {
2314 	return ((struct bpf_subprog_info *)a)->start -
2315 	       ((struct bpf_subprog_info *)b)->start;
2316 }
2317 
2318 /* Find subprogram that contains instruction at 'off' */
2319 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2320 {
2321 	struct bpf_subprog_info *vals = env->subprog_info;
2322 	int l, r, m;
2323 
2324 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2325 		return NULL;
2326 
2327 	l = 0;
2328 	r = env->subprog_cnt - 1;
2329 	while (l < r) {
2330 		m = l + (r - l + 1) / 2;
2331 		if (vals[m].start <= off)
2332 			l = m;
2333 		else
2334 			r = m - 1;
2335 	}
2336 	return &vals[l];
2337 }
2338 
2339 /* Find subprogram that starts exactly at 'off' */
2340 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2341 {
2342 	struct bpf_subprog_info *p;
2343 
2344 	p = bpf_find_containing_subprog(env, off);
2345 	if (!p || p->start != off)
2346 		return -ENOENT;
2347 	return p - env->subprog_info;
2348 }
2349 
2350 static int add_subprog(struct bpf_verifier_env *env, int off)
2351 {
2352 	int insn_cnt = env->prog->len;
2353 	int ret;
2354 
2355 	if (off >= insn_cnt || off < 0) {
2356 		verbose(env, "call to invalid destination\n");
2357 		return -EINVAL;
2358 	}
2359 	ret = bpf_find_subprog(env, off);
2360 	if (ret >= 0)
2361 		return ret;
2362 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2363 		verbose(env, "too many subprograms\n");
2364 		return -E2BIG;
2365 	}
2366 	/* determine subprog starts. The end is one before the next starts */
2367 	env->subprog_info[env->subprog_cnt++].start = off;
2368 	sort(env->subprog_info, env->subprog_cnt,
2369 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2370 	return env->subprog_cnt - 1;
2371 }
2372 
2373 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2374 {
2375 	struct bpf_prog_aux *aux = env->prog->aux;
2376 	struct btf *btf = aux->btf;
2377 	const struct btf_type *t;
2378 	u32 main_btf_id, id;
2379 	const char *name;
2380 	int ret, i;
2381 
2382 	/* Non-zero func_info_cnt implies valid btf */
2383 	if (!aux->func_info_cnt)
2384 		return 0;
2385 	main_btf_id = aux->func_info[0].type_id;
2386 
2387 	t = btf_type_by_id(btf, main_btf_id);
2388 	if (!t) {
2389 		verbose(env, "invalid btf id for main subprog in func_info\n");
2390 		return -EINVAL;
2391 	}
2392 
2393 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2394 	if (IS_ERR(name)) {
2395 		ret = PTR_ERR(name);
2396 		/* If there is no tag present, there is no exception callback */
2397 		if (ret == -ENOENT)
2398 			ret = 0;
2399 		else if (ret == -EEXIST)
2400 			verbose(env, "multiple exception callback tags for main subprog\n");
2401 		return ret;
2402 	}
2403 
2404 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2405 	if (ret < 0) {
2406 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2407 		return ret;
2408 	}
2409 	id = ret;
2410 	t = btf_type_by_id(btf, id);
2411 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2412 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2413 		return -EINVAL;
2414 	}
2415 	ret = 0;
2416 	for (i = 0; i < aux->func_info_cnt; i++) {
2417 		if (aux->func_info[i].type_id != id)
2418 			continue;
2419 		ret = aux->func_info[i].insn_off;
2420 		/* Further func_info and subprog checks will also happen
2421 		 * later, so assume this is the right insn_off for now.
2422 		 */
2423 		if (!ret) {
2424 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2425 			ret = -EINVAL;
2426 		}
2427 	}
2428 	if (!ret) {
2429 		verbose(env, "exception callback type id not found in func_info\n");
2430 		ret = -EINVAL;
2431 	}
2432 	return ret;
2433 }
2434 
2435 #define MAX_KFUNC_BTFS	256
2436 
2437 struct bpf_kfunc_btf {
2438 	struct btf *btf;
2439 	struct module *module;
2440 	u16 offset;
2441 };
2442 
2443 struct bpf_kfunc_btf_tab {
2444 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2445 	u32 nr_descs;
2446 };
2447 
2448 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2449 {
2450 	const struct bpf_kfunc_desc *d0 = a;
2451 	const struct bpf_kfunc_desc *d1 = b;
2452 
2453 	/* func_id is not greater than BTF_MAX_TYPE */
2454 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2455 }
2456 
2457 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2458 {
2459 	const struct bpf_kfunc_btf *d0 = a;
2460 	const struct bpf_kfunc_btf *d1 = b;
2461 
2462 	return d0->offset - d1->offset;
2463 }
2464 
2465 static struct bpf_kfunc_desc *
2466 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2467 {
2468 	struct bpf_kfunc_desc desc = {
2469 		.func_id = func_id,
2470 		.offset = offset,
2471 	};
2472 	struct bpf_kfunc_desc_tab *tab;
2473 
2474 	tab = prog->aux->kfunc_tab;
2475 	return bsearch(&desc, tab->descs, tab->nr_descs,
2476 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2477 }
2478 
2479 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2480 		       u16 btf_fd_idx, u8 **func_addr)
2481 {
2482 	const struct bpf_kfunc_desc *desc;
2483 
2484 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2485 	if (!desc)
2486 		return -EFAULT;
2487 
2488 	*func_addr = (u8 *)desc->addr;
2489 	return 0;
2490 }
2491 
2492 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2493 					 s16 offset)
2494 {
2495 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2496 	struct bpf_kfunc_btf_tab *tab;
2497 	struct bpf_kfunc_btf *b;
2498 	struct module *mod;
2499 	struct btf *btf;
2500 	int btf_fd;
2501 
2502 	tab = env->prog->aux->kfunc_btf_tab;
2503 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2504 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2505 	if (!b) {
2506 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2507 			verbose(env, "too many different module BTFs\n");
2508 			return ERR_PTR(-E2BIG);
2509 		}
2510 
2511 		if (bpfptr_is_null(env->fd_array)) {
2512 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2513 			return ERR_PTR(-EPROTO);
2514 		}
2515 
2516 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2517 					    offset * sizeof(btf_fd),
2518 					    sizeof(btf_fd)))
2519 			return ERR_PTR(-EFAULT);
2520 
2521 		btf = btf_get_by_fd(btf_fd);
2522 		if (IS_ERR(btf)) {
2523 			verbose(env, "invalid module BTF fd specified\n");
2524 			return btf;
2525 		}
2526 
2527 		if (!btf_is_module(btf)) {
2528 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2529 			btf_put(btf);
2530 			return ERR_PTR(-EINVAL);
2531 		}
2532 
2533 		mod = btf_try_get_module(btf);
2534 		if (!mod) {
2535 			btf_put(btf);
2536 			return ERR_PTR(-ENXIO);
2537 		}
2538 
2539 		b = &tab->descs[tab->nr_descs++];
2540 		b->btf = btf;
2541 		b->module = mod;
2542 		b->offset = offset;
2543 
2544 		/* sort() reorders entries by value, so b may no longer point
2545 		 * to the right entry after this
2546 		 */
2547 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2548 		     kfunc_btf_cmp_by_off, NULL);
2549 	} else {
2550 		btf = b->btf;
2551 	}
2552 
2553 	return btf;
2554 }
2555 
2556 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2557 {
2558 	if (!tab)
2559 		return;
2560 
2561 	while (tab->nr_descs--) {
2562 		module_put(tab->descs[tab->nr_descs].module);
2563 		btf_put(tab->descs[tab->nr_descs].btf);
2564 	}
2565 	kfree(tab);
2566 }
2567 
2568 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2569 {
2570 	if (offset) {
2571 		if (offset < 0) {
2572 			/* In the future, this can be allowed to increase limit
2573 			 * of fd index into fd_array, interpreted as u16.
2574 			 */
2575 			verbose(env, "negative offset disallowed for kernel module function call\n");
2576 			return ERR_PTR(-EINVAL);
2577 		}
2578 
2579 		return __find_kfunc_desc_btf(env, offset);
2580 	}
2581 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2582 }
2583 
2584 #define KF_IMPL_SUFFIX "_impl"
2585 
2586 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2587 						    struct btf *btf,
2588 						    const char *func_name)
2589 {
2590 	char *buf = env->tmp_str_buf;
2591 	const struct btf_type *func;
2592 	s32 impl_id;
2593 	int len;
2594 
2595 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2596 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2597 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2598 		return NULL;
2599 	}
2600 
2601 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2602 	if (impl_id <= 0) {
2603 		verbose(env, "cannot find function %s in BTF\n", buf);
2604 		return NULL;
2605 	}
2606 
2607 	func = btf_type_by_id(btf, impl_id);
2608 
2609 	return btf_type_by_id(btf, func->type);
2610 }
2611 
2612 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2613 			    s32 func_id,
2614 			    s16 offset,
2615 			    struct bpf_kfunc_meta *kfunc)
2616 {
2617 	const struct btf_type *func, *func_proto;
2618 	const char *func_name;
2619 	u32 *kfunc_flags;
2620 	struct btf *btf;
2621 
2622 	if (func_id <= 0) {
2623 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2624 		return -EINVAL;
2625 	}
2626 
2627 	btf = find_kfunc_desc_btf(env, offset);
2628 	if (IS_ERR(btf)) {
2629 		verbose(env, "failed to find BTF for kernel function\n");
2630 		return PTR_ERR(btf);
2631 	}
2632 
2633 	/*
2634 	 * Note that kfunc_flags may be NULL at this point, which
2635 	 * means that we couldn't find func_id in any relevant
2636 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2637 	 * call.  However we don't fail with an error here,
2638 	 * and let the caller decide what to do with NULL kfunc->flags.
2639 	 */
2640 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2641 
2642 	func = btf_type_by_id(btf, func_id);
2643 	if (!func || !btf_type_is_func(func)) {
2644 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2645 		return -EINVAL;
2646 	}
2647 
2648 	func_name = btf_name_by_offset(btf, func->name_off);
2649 
2650 	/*
2651 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2652 	 * can be found through the counterpart _impl kfunc.
2653 	 */
2654 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2655 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2656 	else
2657 		func_proto = btf_type_by_id(btf, func->type);
2658 
2659 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2660 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2661 			func_id);
2662 		return -EINVAL;
2663 	}
2664 
2665 	memset(kfunc, 0, sizeof(*kfunc));
2666 	kfunc->btf = btf;
2667 	kfunc->id = func_id;
2668 	kfunc->name = func_name;
2669 	kfunc->proto = func_proto;
2670 	kfunc->flags = kfunc_flags;
2671 
2672 	return 0;
2673 }
2674 
2675 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2676 {
2677 	struct bpf_kfunc_btf_tab *btf_tab;
2678 	struct btf_func_model func_model;
2679 	struct bpf_kfunc_desc_tab *tab;
2680 	struct bpf_prog_aux *prog_aux;
2681 	struct bpf_kfunc_meta kfunc;
2682 	struct bpf_kfunc_desc *desc;
2683 	unsigned long addr;
2684 	int err;
2685 
2686 	prog_aux = env->prog->aux;
2687 	tab = prog_aux->kfunc_tab;
2688 	btf_tab = prog_aux->kfunc_btf_tab;
2689 	if (!tab) {
2690 		if (!btf_vmlinux) {
2691 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2692 			return -ENOTSUPP;
2693 		}
2694 
2695 		if (!env->prog->jit_requested) {
2696 			verbose(env, "JIT is required for calling kernel function\n");
2697 			return -ENOTSUPP;
2698 		}
2699 
2700 		if (!bpf_jit_supports_kfunc_call()) {
2701 			verbose(env, "JIT does not support calling kernel function\n");
2702 			return -ENOTSUPP;
2703 		}
2704 
2705 		if (!env->prog->gpl_compatible) {
2706 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2707 			return -EINVAL;
2708 		}
2709 
2710 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
2711 		if (!tab)
2712 			return -ENOMEM;
2713 		prog_aux->kfunc_tab = tab;
2714 	}
2715 
2716 	/* func_id == 0 is always invalid, but instead of returning an error, be
2717 	 * conservative and wait until the code elimination pass before returning
2718 	 * error, so that invalid calls that get pruned out can be in BPF programs
2719 	 * loaded from userspace.  It is also required that offset be untouched
2720 	 * for such calls.
2721 	 */
2722 	if (!func_id && !offset)
2723 		return 0;
2724 
2725 	if (!btf_tab && offset) {
2726 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
2727 		if (!btf_tab)
2728 			return -ENOMEM;
2729 		prog_aux->kfunc_btf_tab = btf_tab;
2730 	}
2731 
2732 	if (find_kfunc_desc(env->prog, func_id, offset))
2733 		return 0;
2734 
2735 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2736 		verbose(env, "too many different kernel function calls\n");
2737 		return -E2BIG;
2738 	}
2739 
2740 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
2741 	if (err)
2742 		return err;
2743 
2744 	addr = kallsyms_lookup_name(kfunc.name);
2745 	if (!addr) {
2746 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
2747 		return -EINVAL;
2748 	}
2749 
2750 	if (bpf_dev_bound_kfunc_id(func_id)) {
2751 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2752 		if (err)
2753 			return err;
2754 	}
2755 
2756 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
2757 	if (err)
2758 		return err;
2759 
2760 	desc = &tab->descs[tab->nr_descs++];
2761 	desc->func_id = func_id;
2762 	desc->offset = offset;
2763 	desc->addr = addr;
2764 	desc->func_model = func_model;
2765 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2766 	     kfunc_desc_cmp_by_id_off, NULL);
2767 	return 0;
2768 }
2769 
2770 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2771 {
2772 	return !!prog->aux->kfunc_tab;
2773 }
2774 
2775 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2776 {
2777 	struct bpf_subprog_info *subprog = env->subprog_info;
2778 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2779 	struct bpf_insn *insn = env->prog->insnsi;
2780 
2781 	/* Add entry function. */
2782 	ret = add_subprog(env, 0);
2783 	if (ret)
2784 		return ret;
2785 
2786 	for (i = 0; i < insn_cnt; i++, insn++) {
2787 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2788 		    !bpf_pseudo_kfunc_call(insn))
2789 			continue;
2790 
2791 		if (!env->bpf_capable) {
2792 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2793 			return -EPERM;
2794 		}
2795 
2796 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2797 			ret = add_subprog(env, i + insn->imm + 1);
2798 		else
2799 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
2800 
2801 		if (ret < 0)
2802 			return ret;
2803 	}
2804 
2805 	ret = bpf_find_exception_callback_insn_off(env);
2806 	if (ret < 0)
2807 		return ret;
2808 	ex_cb_insn = ret;
2809 
2810 	/* If ex_cb_insn > 0, this means that the main program has a subprog
2811 	 * marked using BTF decl tag to serve as the exception callback.
2812 	 */
2813 	if (ex_cb_insn) {
2814 		ret = add_subprog(env, ex_cb_insn);
2815 		if (ret < 0)
2816 			return ret;
2817 		for (i = 1; i < env->subprog_cnt; i++) {
2818 			if (env->subprog_info[i].start != ex_cb_insn)
2819 				continue;
2820 			env->exception_callback_subprog = i;
2821 			bpf_mark_subprog_exc_cb(env, i);
2822 			break;
2823 		}
2824 	}
2825 
2826 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2827 	 * logic. 'subprog_cnt' should not be increased.
2828 	 */
2829 	subprog[env->subprog_cnt].start = insn_cnt;
2830 
2831 	if (env->log.level & BPF_LOG_LEVEL2)
2832 		for (i = 0; i < env->subprog_cnt; i++)
2833 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2834 
2835 	return 0;
2836 }
2837 
2838 static int check_subprogs(struct bpf_verifier_env *env)
2839 {
2840 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2841 	struct bpf_subprog_info *subprog = env->subprog_info;
2842 	struct bpf_insn *insn = env->prog->insnsi;
2843 	int insn_cnt = env->prog->len;
2844 
2845 	/* now check that all jumps are within the same subprog */
2846 	subprog_start = subprog[cur_subprog].start;
2847 	subprog_end = subprog[cur_subprog + 1].start;
2848 	for (i = 0; i < insn_cnt; i++) {
2849 		u8 code = insn[i].code;
2850 
2851 		if (code == (BPF_JMP | BPF_CALL) &&
2852 		    insn[i].src_reg == 0 &&
2853 		    insn[i].imm == BPF_FUNC_tail_call) {
2854 			subprog[cur_subprog].has_tail_call = true;
2855 			subprog[cur_subprog].tail_call_reachable = true;
2856 		}
2857 		if (BPF_CLASS(code) == BPF_LD &&
2858 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2859 			subprog[cur_subprog].has_ld_abs = true;
2860 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2861 			goto next;
2862 		if (BPF_OP(code) == BPF_CALL)
2863 			goto next;
2864 		if (BPF_OP(code) == BPF_EXIT) {
2865 			subprog[cur_subprog].exit_idx = i;
2866 			goto next;
2867 		}
2868 		off = i + bpf_jmp_offset(&insn[i]) + 1;
2869 		if (off < subprog_start || off >= subprog_end) {
2870 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2871 			return -EINVAL;
2872 		}
2873 next:
2874 		if (i == subprog_end - 1) {
2875 			/* to avoid fall-through from one subprog into another
2876 			 * the last insn of the subprog should be either exit
2877 			 * or unconditional jump back or bpf_throw call
2878 			 */
2879 			if (code != (BPF_JMP | BPF_EXIT) &&
2880 			    code != (BPF_JMP32 | BPF_JA) &&
2881 			    code != (BPF_JMP | BPF_JA)) {
2882 				verbose(env, "last insn is not an exit or jmp\n");
2883 				return -EINVAL;
2884 			}
2885 			subprog_start = subprog_end;
2886 			cur_subprog++;
2887 			if (cur_subprog < env->subprog_cnt)
2888 				subprog_end = subprog[cur_subprog + 1].start;
2889 		}
2890 	}
2891 	return 0;
2892 }
2893 
2894 /*
2895  * Sort subprogs in topological order so that leaf subprogs come first and
2896  * their callers come later. This is a DFS post-order traversal of the call
2897  * graph. Scan only reachable instructions (those in the computed postorder) of
2898  * the current subprog to discover callees (direct subprogs and sync
2899  * callbacks).
2900  */
2901 static int sort_subprogs_topo(struct bpf_verifier_env *env)
2902 {
2903 	struct bpf_subprog_info *si = env->subprog_info;
2904 	int *insn_postorder = env->cfg.insn_postorder;
2905 	struct bpf_insn *insn = env->prog->insnsi;
2906 	int cnt = env->subprog_cnt;
2907 	int *dfs_stack = NULL;
2908 	int top = 0, order = 0;
2909 	int i, ret = 0;
2910 	u8 *color = NULL;
2911 
2912 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
2913 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
2914 	if (!color || !dfs_stack) {
2915 		ret = -ENOMEM;
2916 		goto out;
2917 	}
2918 
2919 	/*
2920 	 * DFS post-order traversal.
2921 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
2922 	 */
2923 	for (i = 0; i < cnt; i++) {
2924 		if (color[i])
2925 			continue;
2926 		color[i] = 1;
2927 		dfs_stack[top++] = i;
2928 
2929 		while (top > 0) {
2930 			int cur = dfs_stack[top - 1];
2931 			int po_start = si[cur].postorder_start;
2932 			int po_end = si[cur + 1].postorder_start;
2933 			bool pushed = false;
2934 			int j;
2935 
2936 			for (j = po_start; j < po_end; j++) {
2937 				int idx = insn_postorder[j];
2938 				int callee;
2939 
2940 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
2941 					continue;
2942 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
2943 				if (callee < 0) {
2944 					ret = -EFAULT;
2945 					goto out;
2946 				}
2947 				if (color[callee] == 2)
2948 					continue;
2949 				if (color[callee] == 1) {
2950 					if (bpf_pseudo_func(&insn[idx]))
2951 						continue;
2952 					verbose(env, "recursive call from %s() to %s()\n",
2953 						subprog_name(env, cur),
2954 						subprog_name(env, callee));
2955 					ret = -EINVAL;
2956 					goto out;
2957 				}
2958 				color[callee] = 1;
2959 				dfs_stack[top++] = callee;
2960 				pushed = true;
2961 				break;
2962 			}
2963 
2964 			if (!pushed) {
2965 				color[cur] = 2;
2966 				env->subprog_topo_order[order++] = cur;
2967 				top--;
2968 			}
2969 		}
2970 	}
2971 
2972 	if (env->log.level & BPF_LOG_LEVEL2)
2973 		for (i = 0; i < cnt; i++)
2974 			verbose(env, "topo_order[%d] = %s\n",
2975 				i, subprog_name(env, env->subprog_topo_order[i]));
2976 out:
2977 	kvfree(dfs_stack);
2978 	kvfree(color);
2979 	return ret;
2980 }
2981 
2982 static void mark_stack_slots_scratched(struct bpf_verifier_env *env,
2983 				       int spi, int nr_slots)
2984 {
2985 	int i;
2986 
2987 	for (i = 0; i < nr_slots; i++)
2988 		mark_stack_slot_scratched(env, spi - i);
2989 }
2990 
2991 /* This function is supposed to be used by the following 32-bit optimization
2992  * code only. It returns TRUE if the source or destination register operates
2993  * on 64-bit, otherwise return FALSE.
2994  */
2995 bool bpf_is_reg64(struct bpf_insn *insn,
2996 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
2997 {
2998 	u8 code, class, op;
2999 
3000 	code = insn->code;
3001 	class = BPF_CLASS(code);
3002 	op = BPF_OP(code);
3003 	if (class == BPF_JMP) {
3004 		/* BPF_EXIT for "main" will reach here. Return TRUE
3005 		 * conservatively.
3006 		 */
3007 		if (op == BPF_EXIT)
3008 			return true;
3009 		if (op == BPF_CALL) {
3010 			/* BPF to BPF call will reach here because of marking
3011 			 * caller saved clobber with DST_OP_NO_MARK for which we
3012 			 * don't care the register def because they are anyway
3013 			 * marked as NOT_INIT already.
3014 			 */
3015 			if (insn->src_reg == BPF_PSEUDO_CALL)
3016 				return false;
3017 			/* Helper call will reach here because of arg type
3018 			 * check, conservatively return TRUE.
3019 			 */
3020 			if (t == SRC_OP)
3021 				return true;
3022 
3023 			return false;
3024 		}
3025 	}
3026 
3027 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3028 		return false;
3029 
3030 	if (class == BPF_ALU64 || class == BPF_JMP ||
3031 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3032 		return true;
3033 
3034 	if (class == BPF_ALU || class == BPF_JMP32)
3035 		return false;
3036 
3037 	if (class == BPF_LDX) {
3038 		if (t != SRC_OP)
3039 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3040 		/* LDX source must be ptr. */
3041 		return true;
3042 	}
3043 
3044 	if (class == BPF_STX) {
3045 		/* BPF_STX (including atomic variants) has one or more source
3046 		 * operands, one of which is a ptr. Check whether the caller is
3047 		 * asking about it.
3048 		 */
3049 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3050 			return true;
3051 		return BPF_SIZE(code) == BPF_DW;
3052 	}
3053 
3054 	if (class == BPF_LD) {
3055 		u8 mode = BPF_MODE(code);
3056 
3057 		/* LD_IMM64 */
3058 		if (mode == BPF_IMM)
3059 			return true;
3060 
3061 		/* Both LD_IND and LD_ABS return 32-bit data. */
3062 		if (t != SRC_OP)
3063 			return  false;
3064 
3065 		/* Implicit ctx ptr. */
3066 		if (regno == BPF_REG_6)
3067 			return true;
3068 
3069 		/* Explicit source could be any width. */
3070 		return true;
3071 	}
3072 
3073 	if (class == BPF_ST)
3074 		/* The only source register for BPF_ST is a ptr. */
3075 		return true;
3076 
3077 	/* Conservatively return true at default. */
3078 	return true;
3079 }
3080 
3081 static void mark_insn_zext(struct bpf_verifier_env *env,
3082 			   struct bpf_reg_state *reg)
3083 {
3084 	s32 def_idx = reg->subreg_def;
3085 
3086 	if (def_idx == DEF_NOT_SUBREG)
3087 		return;
3088 
3089 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3090 	/* The dst will be zero extended, so won't be sub-register anymore. */
3091 	reg->subreg_def = DEF_NOT_SUBREG;
3092 }
3093 
3094 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3095 			   enum bpf_reg_arg_type t)
3096 {
3097 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3098 	struct bpf_reg_state *reg;
3099 	bool rw64;
3100 
3101 	mark_reg_scratched(env, regno);
3102 
3103 	reg = &regs[regno];
3104 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3105 	if (t == SRC_OP) {
3106 		/* check whether register used as source operand can be read */
3107 		if (reg->type == NOT_INIT) {
3108 			verbose(env, "R%d !read_ok\n", regno);
3109 			return -EACCES;
3110 		}
3111 		/* We don't need to worry about FP liveness because it's read-only */
3112 		if (regno == BPF_REG_FP)
3113 			return 0;
3114 
3115 		if (rw64)
3116 			mark_insn_zext(env, reg);
3117 
3118 		return 0;
3119 	} else {
3120 		/* check whether register used as dest operand can be written to */
3121 		if (regno == BPF_REG_FP) {
3122 			verbose(env, "frame pointer is read only\n");
3123 			return -EACCES;
3124 		}
3125 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3126 		if (t == DST_OP)
3127 			mark_reg_unknown(env, regs, regno);
3128 	}
3129 	return 0;
3130 }
3131 
3132 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3133 			 enum bpf_reg_arg_type t)
3134 {
3135 	struct bpf_verifier_state *vstate = env->cur_state;
3136 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3137 
3138 	return __check_reg_arg(env, state->regs, regno, t);
3139 }
3140 
3141 static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
3142 {
3143 	env->insn_aux_data[idx].indirect_target = true;
3144 }
3145 
3146 #define LR_FRAMENO_BITS	3
3147 #define LR_SPI_BITS	6
3148 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3149 #define LR_SIZE_BITS	4
3150 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3151 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3152 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3153 #define LR_SPI_OFF	LR_FRAMENO_BITS
3154 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3155 #define LINKED_REGS_MAX	6
3156 
3157 struct linked_reg {
3158 	u8 frameno;
3159 	union {
3160 		u8 spi;
3161 		u8 regno;
3162 	};
3163 	bool is_reg;
3164 };
3165 
3166 struct linked_regs {
3167 	int cnt;
3168 	struct linked_reg entries[LINKED_REGS_MAX];
3169 };
3170 
3171 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3172 {
3173 	if (s->cnt < LINKED_REGS_MAX)
3174 		return &s->entries[s->cnt++];
3175 
3176 	return NULL;
3177 }
3178 
3179 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3180  * number of elements currently in stack.
3181  * Pack one history entry for linked registers as 10 bits in the following format:
3182  * - 3-bits frameno
3183  * - 6-bits spi_or_reg
3184  * - 1-bit  is_reg
3185  */
3186 static u64 linked_regs_pack(struct linked_regs *s)
3187 {
3188 	u64 val = 0;
3189 	int i;
3190 
3191 	for (i = 0; i < s->cnt; ++i) {
3192 		struct linked_reg *e = &s->entries[i];
3193 		u64 tmp = 0;
3194 
3195 		tmp |= e->frameno;
3196 		tmp |= e->spi << LR_SPI_OFF;
3197 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3198 
3199 		val <<= LR_ENTRY_BITS;
3200 		val |= tmp;
3201 	}
3202 	val <<= LR_SIZE_BITS;
3203 	val |= s->cnt;
3204 	return val;
3205 }
3206 
3207 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3208 {
3209 	int i;
3210 
3211 	s->cnt = val & LR_SIZE_MASK;
3212 	val >>= LR_SIZE_BITS;
3213 
3214 	for (i = 0; i < s->cnt; ++i) {
3215 		struct linked_reg *e = &s->entries[i];
3216 
3217 		e->frameno =  val & LR_FRAMENO_MASK;
3218 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3219 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3220 		val >>= LR_ENTRY_BITS;
3221 	}
3222 }
3223 
3224 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3225 {
3226 	const struct btf_type *func;
3227 	struct btf *desc_btf;
3228 
3229 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3230 		return NULL;
3231 
3232 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3233 	if (IS_ERR(desc_btf))
3234 		return "<error>";
3235 
3236 	func = btf_type_by_id(desc_btf, insn->imm);
3237 	return btf_name_by_offset(desc_btf, func->name_off);
3238 }
3239 
3240 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3241 {
3242 	const struct bpf_insn_cbs cbs = {
3243 		.cb_call	= disasm_kfunc_name,
3244 		.cb_print	= verbose,
3245 		.private_data	= env,
3246 	};
3247 
3248 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3249 }
3250 
3251 /* If any register R in hist->linked_regs is marked as precise in bt,
3252  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3253  */
3254 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3255 {
3256 	struct linked_regs linked_regs;
3257 	bool some_precise = false;
3258 	int i;
3259 
3260 	if (!hist || hist->linked_regs == 0)
3261 		return;
3262 
3263 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3264 	for (i = 0; i < linked_regs.cnt; ++i) {
3265 		struct linked_reg *e = &linked_regs.entries[i];
3266 
3267 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3268 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3269 			some_precise = true;
3270 			break;
3271 		}
3272 	}
3273 
3274 	if (!some_precise)
3275 		return;
3276 
3277 	for (i = 0; i < linked_regs.cnt; ++i) {
3278 		struct linked_reg *e = &linked_regs.entries[i];
3279 
3280 		if (e->is_reg)
3281 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3282 		else
3283 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3284 	}
3285 }
3286 
3287 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3288 {
3289 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3290 }
3291 
3292 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3293  * desired reg and stack masks across all relevant frames
3294  */
3295 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3296 				      struct bpf_verifier_state *starting_state)
3297 {
3298 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3299 }
3300 
3301 static bool is_spillable_regtype(enum bpf_reg_type type)
3302 {
3303 	switch (base_type(type)) {
3304 	case PTR_TO_MAP_VALUE:
3305 	case PTR_TO_STACK:
3306 	case PTR_TO_CTX:
3307 	case PTR_TO_PACKET:
3308 	case PTR_TO_PACKET_META:
3309 	case PTR_TO_PACKET_END:
3310 	case PTR_TO_FLOW_KEYS:
3311 	case CONST_PTR_TO_MAP:
3312 	case PTR_TO_SOCKET:
3313 	case PTR_TO_SOCK_COMMON:
3314 	case PTR_TO_TCP_SOCK:
3315 	case PTR_TO_XDP_SOCK:
3316 	case PTR_TO_BTF_ID:
3317 	case PTR_TO_BUF:
3318 	case PTR_TO_MEM:
3319 	case PTR_TO_FUNC:
3320 	case PTR_TO_MAP_KEY:
3321 	case PTR_TO_ARENA:
3322 		return true;
3323 	default:
3324 		return false;
3325 	}
3326 }
3327 
3328 
3329 /* check if register is a constant scalar value */
3330 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3331 {
3332 	return reg->type == SCALAR_VALUE &&
3333 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3334 }
3335 
3336 /* assuming is_reg_const() is true, return constant value of a register */
3337 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3338 {
3339 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3340 }
3341 
3342 static bool __is_pointer_value(bool allow_ptr_leaks,
3343 			       const struct bpf_reg_state *reg)
3344 {
3345 	if (allow_ptr_leaks)
3346 		return false;
3347 
3348 	return reg->type != SCALAR_VALUE;
3349 }
3350 
3351 static void clear_scalar_id(struct bpf_reg_state *reg)
3352 {
3353 	reg->id = 0;
3354 	reg->delta = 0;
3355 }
3356 
3357 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3358 					struct bpf_reg_state *src_reg)
3359 {
3360 	if (src_reg->type != SCALAR_VALUE)
3361 		return;
3362 	/*
3363 	 * The verifier is processing rX = rY insn and
3364 	 * rY->id has special linked register already.
3365 	 * Cleared it, since multiple rX += const are not supported.
3366 	 */
3367 	if (src_reg->id & BPF_ADD_CONST)
3368 		clear_scalar_id(src_reg);
3369 	/*
3370 	 * Ensure that src_reg has a valid ID that will be copied to
3371 	 * dst_reg and then will be used by sync_linked_regs() to
3372 	 * propagate min/max range.
3373 	 */
3374 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3375 		src_reg->id = ++env->id_gen;
3376 }
3377 
3378 static void save_register_state(struct bpf_verifier_env *env,
3379 				struct bpf_func_state *state,
3380 				int spi, struct bpf_reg_state *reg,
3381 				int size)
3382 {
3383 	int i;
3384 
3385 	state->stack[spi].spilled_ptr = *reg;
3386 
3387 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3388 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3389 
3390 	/* size < 8 bytes spill */
3391 	for (; i; i--)
3392 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3393 }
3394 
3395 static bool is_bpf_st_mem(struct bpf_insn *insn)
3396 {
3397 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3398 }
3399 
3400 static int get_reg_width(struct bpf_reg_state *reg)
3401 {
3402 	return fls64(reg_umax(reg));
3403 }
3404 
3405 /* See comment for mark_fastcall_pattern_for_call() */
3406 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3407 					  struct bpf_func_state *state, int insn_idx, int off)
3408 {
3409 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3410 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3411 	int i;
3412 
3413 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3414 		return;
3415 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3416 	 * from something that is not a part of the fastcall pattern,
3417 	 * disable fastcall rewrites for current subprogram by setting
3418 	 * fastcall_stack_off to a value smaller than any possible offset.
3419 	 */
3420 	subprog->fastcall_stack_off = S16_MIN;
3421 	/* reset fastcall aux flags within subprogram,
3422 	 * happens at most once per subprogram
3423 	 */
3424 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3425 		aux[i].fastcall_spills_num = 0;
3426 		aux[i].fastcall_pattern = 0;
3427 	}
3428 }
3429 
3430 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3431 {
3432 	int i;
3433 
3434 	/* regular write of data into stack destroys any spilled ptr */
3435 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3436 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3437 	if (is_stack_slot_special(&state->stack[spi]))
3438 		for (i = 0; i < BPF_REG_SIZE; i++)
3439 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3440 }
3441 
3442 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3443  * stack boundary and alignment are checked in check_mem_access()
3444  */
3445 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3446 				       /* stack frame we're writing to */
3447 				       struct bpf_func_state *state,
3448 				       int off, int size, int value_regno,
3449 				       int insn_idx)
3450 {
3451 	struct bpf_func_state *cur; /* state of the current function */
3452 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3453 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3454 	struct bpf_reg_state *reg = NULL;
3455 	int insn_flags = INSN_F_STACK_ACCESS;
3456 	int hist_spi = spi, hist_frame = state->frameno;
3457 
3458 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3459 	 * so it's aligned access and [off, off + size) are within stack limits
3460 	 */
3461 	if (!env->allow_ptr_leaks &&
3462 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3463 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3464 	    size != BPF_REG_SIZE) {
3465 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3466 		return -EACCES;
3467 	}
3468 
3469 	cur = env->cur_state->frame[env->cur_state->curframe];
3470 	if (value_regno >= 0)
3471 		reg = &cur->regs[value_regno];
3472 	if (!env->bypass_spec_v4) {
3473 		bool sanitize = reg && is_spillable_regtype(reg->type);
3474 
3475 		for (i = 0; i < size; i++) {
3476 			u8 type = state->stack[spi].slot_type[i];
3477 
3478 			if (type != STACK_MISC && type != STACK_ZERO) {
3479 				sanitize = true;
3480 				break;
3481 			}
3482 		}
3483 
3484 		if (sanitize)
3485 			env->insn_aux_data[insn_idx].nospec_result = true;
3486 	}
3487 
3488 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3489 	if (err)
3490 		return err;
3491 
3492 	check_fastcall_stack_contract(env, state, insn_idx, off);
3493 	mark_stack_slot_scratched(env, spi);
3494 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3495 		bool reg_value_fits;
3496 
3497 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3498 		/* Make sure that reg had an ID to build a relation on spill. */
3499 		if (reg_value_fits)
3500 			assign_scalar_id_before_mov(env, reg);
3501 		save_register_state(env, state, spi, reg, size);
3502 		/* Break the relation on a narrowing spill. */
3503 		if (!reg_value_fits)
3504 			state->stack[spi].spilled_ptr.id = 0;
3505 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3506 		   env->bpf_capable) {
3507 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3508 
3509 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3510 		__mark_reg_known(tmp_reg, insn->imm);
3511 		tmp_reg->type = SCALAR_VALUE;
3512 		save_register_state(env, state, spi, tmp_reg, size);
3513 	} else if (reg && is_spillable_regtype(reg->type)) {
3514 		/* register containing pointer is being spilled into stack */
3515 		if (size != BPF_REG_SIZE) {
3516 			verbose_linfo(env, insn_idx, "; ");
3517 			verbose(env, "invalid size of register spill\n");
3518 			return -EACCES;
3519 		}
3520 		if (state != cur && reg->type == PTR_TO_STACK) {
3521 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3522 			return -EINVAL;
3523 		}
3524 		save_register_state(env, state, spi, reg, size);
3525 	} else {
3526 		u8 type = STACK_MISC;
3527 
3528 		scrub_special_slot(state, spi);
3529 
3530 		/* when we zero initialize stack slots mark them as such */
3531 		if ((reg && bpf_register_is_null(reg)) ||
3532 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3533 			/* STACK_ZERO case happened because register spill
3534 			 * wasn't properly aligned at the stack slot boundary,
3535 			 * so it's not a register spill anymore; force
3536 			 * originating register to be precise to make
3537 			 * STACK_ZERO correct for subsequent states
3538 			 */
3539 			err = mark_chain_precision(env, value_regno);
3540 			if (err)
3541 				return err;
3542 			type = STACK_ZERO;
3543 		}
3544 
3545 		/* Mark slots affected by this stack write. */
3546 		for (i = 0; i < size; i++)
3547 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3548 		insn_flags = 0; /* not a register spill */
3549 	}
3550 
3551 	if (insn_flags)
3552 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3553 					    hist_spi, hist_frame, 0);
3554 	return 0;
3555 }
3556 
3557 /* Write the stack: 'stack[ptr_reg + off] = value_regno'. 'ptr_reg' is
3558  * known to contain a variable offset.
3559  * This function checks whether the write is permitted and conservatively
3560  * tracks the effects of the write, considering that each stack slot in the
3561  * dynamic range is potentially written to.
3562  *
3563  * 'value_regno' can be -1, meaning that an unknown value is being written to
3564  * the stack.
3565  *
3566  * Spilled pointers in range are not marked as written because we don't know
3567  * what's going to be actually written. This means that read propagation for
3568  * future reads cannot be terminated by this write.
3569  *
3570  * For privileged programs, uninitialized stack slots are considered
3571  * initialized by this write (even though we don't know exactly what offsets
3572  * are going to be written to). The idea is that we don't want the verifier to
3573  * reject future reads that access slots written to through variable offsets.
3574  */
3575 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3576 				     /* func where register points to */
3577 				     struct bpf_func_state *state,
3578 				     struct bpf_reg_state *ptr_reg, int off, int size,
3579 				     int value_regno, int insn_idx)
3580 {
3581 	struct bpf_func_state *cur; /* state of the current function */
3582 	int min_off, max_off;
3583 	int i, err;
3584 	struct bpf_reg_state *value_reg = NULL;
3585 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3586 	bool writing_zero = false;
3587 	/* set if the fact that we're writing a zero is used to let any
3588 	 * stack slots remain STACK_ZERO
3589 	 */
3590 	bool zero_used = false;
3591 
3592 	cur = env->cur_state->frame[env->cur_state->curframe];
3593 	min_off = reg_smin(ptr_reg) + off;
3594 	max_off = reg_smax(ptr_reg) + off + size;
3595 	if (value_regno >= 0)
3596 		value_reg = &cur->regs[value_regno];
3597 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3598 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3599 		writing_zero = true;
3600 
3601 	for (i = min_off; i < max_off; i++) {
3602 		int spi;
3603 
3604 		spi = bpf_get_spi(i);
3605 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3606 		if (err)
3607 			return err;
3608 	}
3609 
3610 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3611 	/* Variable offset writes destroy any spilled pointers in range. */
3612 	for (i = min_off; i < max_off; i++) {
3613 		u8 new_type, *stype;
3614 		int slot, spi;
3615 
3616 		slot = -i - 1;
3617 		spi = slot / BPF_REG_SIZE;
3618 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3619 		mark_stack_slot_scratched(env, spi);
3620 
3621 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3622 			/* Reject the write if range we may write to has not
3623 			 * been initialized beforehand. If we didn't reject
3624 			 * here, the ptr status would be erased below (even
3625 			 * though not all slots are actually overwritten),
3626 			 * possibly opening the door to leaks.
3627 			 *
3628 			 * We do however catch STACK_INVALID case below, and
3629 			 * only allow reading possibly uninitialized memory
3630 			 * later for CAP_PERFMON, as the write may not happen to
3631 			 * that slot.
3632 			 */
3633 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3634 				insn_idx, i);
3635 			return -EINVAL;
3636 		}
3637 
3638 		/* If writing_zero and the spi slot contains a spill of value 0,
3639 		 * maintain the spill type.
3640 		 */
3641 		if (writing_zero && *stype == STACK_SPILL &&
3642 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
3643 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
3644 
3645 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
3646 				zero_used = true;
3647 				continue;
3648 			}
3649 		}
3650 
3651 		/*
3652 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
3653 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
3654 		 * and valid program is rejected by check_stack_read_fixed_off()
3655 		 * with obscure "invalid size of register fill" message.
3656 		 */
3657 		scrub_special_slot(state, spi);
3658 
3659 		/* Update the slot type. */
3660 		new_type = STACK_MISC;
3661 		if (writing_zero && *stype == STACK_ZERO) {
3662 			new_type = STACK_ZERO;
3663 			zero_used = true;
3664 		}
3665 		/* If the slot is STACK_INVALID, we check whether it's OK to
3666 		 * pretend that it will be initialized by this write. The slot
3667 		 * might not actually be written to, and so if we mark it as
3668 		 * initialized future reads might leak uninitialized memory.
3669 		 * For privileged programs, we will accept such reads to slots
3670 		 * that may or may not be written because, if we're reject
3671 		 * them, the error would be too confusing.
3672 		 * Conservatively, treat STACK_POISON in a similar way.
3673 		 */
3674 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
3675 		    !env->allow_uninit_stack) {
3676 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3677 					insn_idx, i);
3678 			return -EINVAL;
3679 		}
3680 		*stype = new_type;
3681 	}
3682 	if (zero_used) {
3683 		/* backtracking doesn't work for STACK_ZERO yet. */
3684 		err = mark_chain_precision(env, value_regno);
3685 		if (err)
3686 			return err;
3687 	}
3688 	return 0;
3689 }
3690 
3691 /* When register 'dst_regno' is assigned some values from stack[min_off,
3692  * max_off), we set the register's type according to the types of the
3693  * respective stack slots. If all the stack values are known to be zeros, then
3694  * so is the destination reg. Otherwise, the register is considered to be
3695  * SCALAR. This function does not deal with register filling; the caller must
3696  * ensure that all spilled registers in the stack range have been marked as
3697  * read.
3698  */
3699 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3700 				/* func where src register points to */
3701 				struct bpf_func_state *ptr_state,
3702 				int min_off, int max_off, int dst_regno)
3703 {
3704 	struct bpf_verifier_state *vstate = env->cur_state;
3705 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3706 	int i, slot, spi;
3707 	u8 *stype;
3708 	int zeros = 0;
3709 
3710 	for (i = min_off; i < max_off; i++) {
3711 		slot = -i - 1;
3712 		spi = slot / BPF_REG_SIZE;
3713 		mark_stack_slot_scratched(env, spi);
3714 		stype = ptr_state->stack[spi].slot_type;
3715 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3716 			break;
3717 		zeros++;
3718 	}
3719 	if (zeros == max_off - min_off) {
3720 		/* Any access_size read into register is zero extended,
3721 		 * so the whole register == const_zero.
3722 		 */
3723 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
3724 	} else {
3725 		/* have read misc data from the stack */
3726 		mark_reg_unknown(env, state->regs, dst_regno);
3727 	}
3728 }
3729 
3730 /* Read the stack at 'off' and put the results into the register indicated by
3731  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3732  * spilled reg.
3733  *
3734  * 'dst_regno' can be -1, meaning that the read value is not going to a
3735  * register.
3736  *
3737  * The access is assumed to be within the current stack bounds.
3738  */
3739 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3740 				      /* func where src register points to */
3741 				      struct bpf_func_state *reg_state,
3742 				      int off, int size, int dst_regno)
3743 {
3744 	struct bpf_verifier_state *vstate = env->cur_state;
3745 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3746 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3747 	struct bpf_reg_state *reg;
3748 	u8 *stype, type;
3749 	int insn_flags = INSN_F_STACK_ACCESS;
3750 	int hist_spi = spi, hist_frame = reg_state->frameno;
3751 
3752 	stype = reg_state->stack[spi].slot_type;
3753 	reg = &reg_state->stack[spi].spilled_ptr;
3754 
3755 	mark_stack_slot_scratched(env, spi);
3756 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
3757 
3758 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
3759 		u8 spill_size = 1;
3760 
3761 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3762 			spill_size++;
3763 
3764 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3765 			if (reg->type != SCALAR_VALUE) {
3766 				verbose_linfo(env, env->insn_idx, "; ");
3767 				verbose(env, "invalid size of register fill\n");
3768 				return -EACCES;
3769 			}
3770 
3771 			if (dst_regno < 0)
3772 				return 0;
3773 
3774 			if (size <= spill_size &&
3775 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
3776 				/* The earlier check_reg_arg() has decided the
3777 				 * subreg_def for this insn.  Save it first.
3778 				 */
3779 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3780 
3781 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
3782 				    get_reg_width(reg) <= 32)
3783 					/* Ensure stack slot has an ID to build a relation
3784 					 * with the destination register on fill.
3785 					 */
3786 					assign_scalar_id_before_mov(env, reg);
3787 				state->regs[dst_regno] = *reg;
3788 				state->regs[dst_regno].subreg_def = subreg_def;
3789 
3790 				/* Break the relation on a narrowing fill.
3791 				 * coerce_reg_to_size will adjust the boundaries.
3792 				 */
3793 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
3794 					clear_scalar_id(&state->regs[dst_regno]);
3795 			} else {
3796 				int spill_cnt = 0, zero_cnt = 0;
3797 
3798 				for (i = 0; i < size; i++) {
3799 					type = stype[(slot - i) % BPF_REG_SIZE];
3800 					if (type == STACK_SPILL) {
3801 						spill_cnt++;
3802 						continue;
3803 					}
3804 					if (type == STACK_MISC)
3805 						continue;
3806 					if (type == STACK_ZERO) {
3807 						zero_cnt++;
3808 						continue;
3809 					}
3810 					if (type == STACK_INVALID && env->allow_uninit_stack)
3811 						continue;
3812 					if (type == STACK_POISON) {
3813 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3814 							off, i, size);
3815 					} else {
3816 						verbose(env, "invalid read from stack off %d+%d size %d\n",
3817 							off, i, size);
3818 					}
3819 					return -EACCES;
3820 				}
3821 
3822 				if (spill_cnt == size &&
3823 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
3824 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3825 					/* this IS register fill, so keep insn_flags */
3826 				} else if (zero_cnt == size) {
3827 					/* similarly to mark_reg_stack_read(), preserve zeroes */
3828 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3829 					insn_flags = 0; /* not restoring original register state */
3830 				} else {
3831 					mark_reg_unknown(env, state->regs, dst_regno);
3832 					insn_flags = 0; /* not restoring original register state */
3833 				}
3834 			}
3835 		} else if (dst_regno >= 0) {
3836 			/* restore register state from stack */
3837 			if (env->bpf_capable)
3838 				/* Ensure stack slot has an ID to build a relation
3839 				 * with the destination register on fill.
3840 				 */
3841 				assign_scalar_id_before_mov(env, reg);
3842 			state->regs[dst_regno] = *reg;
3843 			/* mark reg as written since spilled pointer state likely
3844 			 * has its liveness marks cleared by is_state_visited()
3845 			 * which resets stack/reg liveness for state transitions
3846 			 */
3847 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3848 			/* If dst_regno==-1, the caller is asking us whether
3849 			 * it is acceptable to use this value as a SCALAR_VALUE
3850 			 * (e.g. for XADD).
3851 			 * We must not allow unprivileged callers to do that
3852 			 * with spilled pointers.
3853 			 */
3854 			verbose(env, "leaking pointer from stack off %d\n",
3855 				off);
3856 			return -EACCES;
3857 		}
3858 	} else {
3859 		for (i = 0; i < size; i++) {
3860 			type = stype[(slot - i) % BPF_REG_SIZE];
3861 			if (type == STACK_MISC)
3862 				continue;
3863 			if (type == STACK_ZERO)
3864 				continue;
3865 			if (type == STACK_INVALID && env->allow_uninit_stack)
3866 				continue;
3867 			if (type == STACK_POISON) {
3868 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3869 					off, i, size);
3870 			} else {
3871 				verbose(env, "invalid read from stack off %d+%d size %d\n",
3872 					off, i, size);
3873 			}
3874 			return -EACCES;
3875 		}
3876 		if (dst_regno >= 0)
3877 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3878 		insn_flags = 0; /* we are not restoring spilled register */
3879 	}
3880 	if (insn_flags)
3881 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3882 					    hist_spi, hist_frame, 0);
3883 	return 0;
3884 }
3885 
3886 enum bpf_access_src {
3887 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3888 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3889 };
3890 
3891 static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3892 					 argno_t argno, int off, int access_size,
3893 					 bool zero_size_allowed,
3894 					 enum bpf_access_type type,
3895 					 struct bpf_call_arg_meta *meta);
3896 
3897 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3898 {
3899 	return cur_regs(env) + regno;
3900 }
3901 
3902 /* Read the stack at 'reg + off' and put the result into the register
3903  * 'dst_regno'.
3904  * 'off' includes the pointer register's fixed offset(i.e. 'reg->off'),
3905  * but not its variable offset.
3906  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3907  *
3908  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3909  * filling registers (i.e. reads of spilled register cannot be detected when
3910  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3911  * SCALAR_VALUE. That's why we assert that the 'reg' has a variable
3912  * offset; for a fixed offset check_stack_read_fixed_off should be used
3913  * instead.
3914  */
3915 static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3916 				    argno_t ptr_argno, int off, int size, int dst_regno)
3917 {
3918 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
3919 	int err;
3920 	int min_off, max_off;
3921 
3922 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3923 	 */
3924 	err = check_stack_range_initialized(env, reg, ptr_argno, off, size,
3925 					    false, BPF_READ, NULL);
3926 	if (err)
3927 		return err;
3928 
3929 	min_off = reg_smin(reg) + off;
3930 	max_off = reg_smax(reg) + off;
3931 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3932 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
3933 	return 0;
3934 }
3935 
3936 /* check_stack_read dispatches to check_stack_read_fixed_off or
3937  * check_stack_read_var_off.
3938  *
3939  * The caller must ensure that the offset falls within the allocated stack
3940  * bounds.
3941  *
3942  * 'dst_regno' is a register which will receive the value from the stack. It
3943  * can be -1, meaning that the read value is not going to a register.
3944  */
3945 static int check_stack_read(struct bpf_verifier_env *env,
3946 			    struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size,
3947 			    int dst_regno)
3948 {
3949 	struct bpf_func_state *state = bpf_func(env, reg);
3950 	int err;
3951 	/* Some accesses are only permitted with a static offset. */
3952 	bool var_off = !tnum_is_const(reg->var_off);
3953 
3954 	/* The offset is required to be static when reads don't go to a
3955 	 * register, in order to not leak pointers (see
3956 	 * check_stack_read_fixed_off).
3957 	 */
3958 	if (dst_regno < 0 && var_off) {
3959 		char tn_buf[48];
3960 
3961 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3962 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3963 			tn_buf, off, size);
3964 		return -EACCES;
3965 	}
3966 	/* Variable offset is prohibited for unprivileged mode for simplicity
3967 	 * since it requires corresponding support in Spectre masking for stack
3968 	 * ALU. See also retrieve_ptr_limit(). The check in
3969 	 * check_stack_access_for_ptr_arithmetic() called by
3970 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
3971 	 * with variable offsets, therefore no check is required here. Further,
3972 	 * just checking it here would be insufficient as speculative stack
3973 	 * writes could still lead to unsafe speculative behaviour.
3974 	 */
3975 	if (!var_off) {
3976 		off += reg->var_off.value;
3977 		err = check_stack_read_fixed_off(env, state, off, size,
3978 						 dst_regno);
3979 	} else {
3980 		/* Variable offset stack reads need more conservative handling
3981 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3982 		 * branch.
3983 		 */
3984 		err = check_stack_read_var_off(env, reg, ptr_argno, off, size,
3985 					       dst_regno);
3986 	}
3987 	return err;
3988 }
3989 
3990 
3991 /* check_stack_write dispatches to check_stack_write_fixed_off or
3992  * check_stack_write_var_off.
3993  *
3994  * 'reg' is the register used as a pointer into the stack.
3995  * 'value_regno' is the register whose value we're writing to the stack. It can
3996  * be -1, meaning that we're not writing from a register.
3997  *
3998  * The caller must ensure that the offset falls within the maximum stack size.
3999  */
4000 static int check_stack_write(struct bpf_verifier_env *env,
4001 			     struct bpf_reg_state *reg, int off, int size,
4002 			     int value_regno, int insn_idx)
4003 {
4004 	struct bpf_func_state *state = bpf_func(env, reg);
4005 	int err;
4006 
4007 	if (tnum_is_const(reg->var_off)) {
4008 		off += reg->var_off.value;
4009 		err = check_stack_write_fixed_off(env, state, off, size,
4010 						  value_regno, insn_idx);
4011 	} else {
4012 		/* Variable offset stack reads need more conservative handling
4013 		 * than fixed offset ones.
4014 		 */
4015 		err = check_stack_write_var_off(env, state,
4016 						reg, off, size,
4017 						value_regno, insn_idx);
4018 	}
4019 	return err;
4020 }
4021 
4022 /*
4023  * Write a value to the outgoing stack arg area.
4024  * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7).
4025  */
4026 static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state,
4027 				 int off, struct bpf_reg_state *value_reg)
4028 {
4029 	int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS;
4030 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4031 	int spi = -off / BPF_REG_SIZE - 1;
4032 	struct bpf_reg_state *arg;
4033 	int err;
4034 
4035 	if (spi >= max_stack_arg_regs) {
4036 		verbose(env, "stack arg write offset %d exceeds max %d stack args\n",
4037 			off, max_stack_arg_regs);
4038 		return -EINVAL;
4039 	}
4040 
4041 	err = grow_stack_arg_slots(env, state, spi + 1);
4042 	if (err)
4043 		return err;
4044 
4045 	/* Track the max outgoing stack arg slot count. */
4046 	if (spi + 1 > subprog->max_out_stack_arg_cnt)
4047 		subprog->max_out_stack_arg_cnt = spi + 1;
4048 
4049 	if (value_reg) {
4050 		state->stack_arg_regs[spi] = *value_reg;
4051 	} else {
4052 		/* BPF_ST: store immediate, treat as scalar */
4053 		arg = &state->stack_arg_regs[spi];
4054 		arg->type = SCALAR_VALUE;
4055 		__mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm);
4056 	}
4057 	state->no_stack_arg_load = true;
4058 	return bpf_push_jmp_history(env, env->cur_state,
4059 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4060 }
4061 
4062 /*
4063  * Read a value from the incoming stack arg area.
4064  * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7).
4065  */
4066 static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state,
4067 				int off, int dst_regno)
4068 {
4069 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4070 	struct bpf_verifier_state *vstate = env->cur_state;
4071 	int spi = off / BPF_REG_SIZE - 1;
4072 	struct bpf_func_state *caller, *cur;
4073 	struct bpf_reg_state *arg;
4074 
4075 	if (state->no_stack_arg_load) {
4076 		verbose(env, "r11 load must be before any r11 store or call insn\n");
4077 		return -EINVAL;
4078 	}
4079 
4080 	if (spi + 1 > bpf_in_stack_arg_cnt(subprog)) {
4081 		verbose(env, "invalid read from stack arg off %d depth %d\n",
4082 			off, bpf_in_stack_arg_cnt(subprog) * BPF_REG_SIZE);
4083 		return -EACCES;
4084 	}
4085 
4086 	caller = vstate->frame[vstate->curframe - 1];
4087 	arg = &caller->stack_arg_regs[spi];
4088 	cur = vstate->frame[vstate->curframe];
4089 	cur->regs[dst_regno] = *arg;
4090 	return bpf_push_jmp_history(env, env->cur_state,
4091 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4092 }
4093 
4094 static int mark_stack_arg_precision(struct bpf_verifier_env *env, int arg_idx)
4095 {
4096 	struct bpf_func_state *caller = cur_func(env);
4097 	int spi = arg_idx - MAX_BPF_FUNC_REG_ARGS;
4098 
4099 	bt_set_frame_stack_arg_slot(&env->bt, caller->frameno, spi);
4100 	return mark_chain_precision_batch(env, env->cur_state);
4101 }
4102 
4103 static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller,
4104 				     int nargs)
4105 {
4106 	int i, spi;
4107 
4108 	for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) {
4109 		spi = i - MAX_BPF_FUNC_REG_ARGS;
4110 		if (spi >= caller->out_stack_arg_cnt ||
4111 		    caller->stack_arg_regs[spi].type == NOT_INIT) {
4112 			verbose(env, "callee expects %d args, stack arg%d is not initialized\n",
4113 				nargs, spi + 1);
4114 			return -EFAULT;
4115 		}
4116 	}
4117 
4118 	return 0;
4119 }
4120 
4121 static struct bpf_reg_state *get_func_arg_reg(struct bpf_func_state *caller,
4122 					      struct bpf_reg_state *regs, int arg)
4123 {
4124 	if (arg < MAX_BPF_FUNC_REG_ARGS)
4125 		return &regs[arg + 1];
4126 
4127 	return &caller->stack_arg_regs[arg - MAX_BPF_FUNC_REG_ARGS];
4128 }
4129 
4130 static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4131 				 int off, int size, enum bpf_access_type type)
4132 {
4133 	struct bpf_map *map = reg->map_ptr;
4134 	u32 cap = bpf_map_flags_to_cap(map);
4135 
4136 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4137 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4138 			map->value_size, reg_smin(reg) + off, size);
4139 		return -EACCES;
4140 	}
4141 
4142 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4143 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4144 			map->value_size, reg_smin(reg) + off, size);
4145 		return -EACCES;
4146 	}
4147 
4148 	return 0;
4149 }
4150 
4151 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4152 static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4153 			      int off, int size, u32 mem_size,
4154 			      bool zero_size_allowed)
4155 {
4156 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4157 
4158 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4159 		return 0;
4160 
4161 	switch (reg->type) {
4162 	case PTR_TO_MAP_KEY:
4163 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4164 			mem_size, off, size);
4165 		break;
4166 	case PTR_TO_MAP_VALUE:
4167 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4168 			mem_size, off, size);
4169 		break;
4170 	case PTR_TO_PACKET:
4171 	case PTR_TO_PACKET_META:
4172 	case PTR_TO_PACKET_END:
4173 		verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n",
4174 			off, size, reg_arg_name(env, argno), reg->id, off, mem_size);
4175 		break;
4176 	case PTR_TO_CTX:
4177 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4178 			mem_size, off, size);
4179 		break;
4180 	case PTR_TO_MEM:
4181 	default:
4182 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4183 			mem_size, off, size);
4184 	}
4185 
4186 	return -EACCES;
4187 }
4188 
4189 /* check read/write into a memory region with possible variable offset */
4190 static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4191 				   int off, int size, u32 mem_size,
4192 				   bool zero_size_allowed)
4193 {
4194 	int err;
4195 
4196 	/* We may have adjusted the register pointing to memory region, so we
4197 	 * need to try adding each of min_value and max_value to off
4198 	 * to make sure our theoretical access will be safe.
4199 	 *
4200 	 * The minimum value is only important with signed
4201 	 * comparisons where we can't assume the floor of a
4202 	 * value is 0.  If we are using signed variables for our
4203 	 * index'es we need to make sure that whatever we use
4204 	 * will have a set floor within our range.
4205 	 */
4206 	if (reg_smin(reg) < 0 &&
4207 	    (reg_smin(reg) == S64_MIN ||
4208 	     (off + reg_smin(reg) != (s64)(s32)(off + reg_smin(reg))) ||
4209 	      reg_smin(reg) + off < 0)) {
4210 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4211 			reg_arg_name(env, argno));
4212 		return -EACCES;
4213 	}
4214 	err = __check_mem_access(env, reg, argno, reg_smin(reg) + off, size,
4215 				 mem_size, zero_size_allowed);
4216 	if (err) {
4217 		verbose(env, "%s min value is outside of the allowed memory range\n",
4218 			reg_arg_name(env, argno));
4219 		return err;
4220 	}
4221 
4222 	/* If we haven't set a max value then we need to bail since we can't be
4223 	 * sure we won't do bad things.
4224 	 * If reg_umax(reg) + off could overflow, treat that as unbounded too.
4225 	 */
4226 	if (reg_umax(reg) >= BPF_MAX_VAR_OFF) {
4227 		verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n",
4228 			reg_arg_name(env, argno));
4229 		return -EACCES;
4230 	}
4231 	err = __check_mem_access(env, reg, argno, reg_umax(reg) + off, size,
4232 				 mem_size, zero_size_allowed);
4233 	if (err) {
4234 		verbose(env, "%s max value is outside of the allowed memory range\n",
4235 			reg_arg_name(env, argno));
4236 		return err;
4237 	}
4238 
4239 	return 0;
4240 }
4241 
4242 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4243 			       const struct bpf_reg_state *reg, argno_t argno,
4244 			       bool fixed_off_ok)
4245 {
4246 	/* Access to this pointer-typed register or passing it to a helper
4247 	 * is only allowed in its original, unmodified form.
4248 	 */
4249 
4250 	if (!tnum_is_const(reg->var_off)) {
4251 		char tn_buf[48];
4252 
4253 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4254 		verbose(env, "variable %s access var_off=%s disallowed\n",
4255 			reg_type_str(env, reg->type), tn_buf);
4256 		return -EACCES;
4257 	}
4258 
4259 	if (reg_smin(reg) < 0) {
4260 		verbose(env, "negative offset %s ptr %s off=%lld disallowed\n",
4261 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4262 		return -EACCES;
4263 	}
4264 
4265 	if (!fixed_off_ok && reg->var_off.value != 0) {
4266 		verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n",
4267 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4268 		return -EACCES;
4269 	}
4270 
4271 	return 0;
4272 }
4273 
4274 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4275 		             const struct bpf_reg_state *reg, int regno)
4276 {
4277 	return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
4278 }
4279 
4280 static int map_kptr_match_type(struct bpf_verifier_env *env,
4281 			       struct btf_field *kptr_field,
4282 			       struct bpf_reg_state *reg, u32 regno)
4283 {
4284 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4285 	int perm_flags;
4286 	const char *reg_name = "";
4287 
4288 	if (base_type(reg->type) != PTR_TO_BTF_ID)
4289 		goto bad_type;
4290 
4291 	if (btf_is_kernel(reg->btf)) {
4292 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4293 
4294 		/* Only unreferenced case accepts untrusted pointers */
4295 		if (kptr_field->type == BPF_KPTR_UNREF)
4296 			perm_flags |= PTR_UNTRUSTED;
4297 	} else {
4298 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4299 		if (kptr_field->type == BPF_KPTR_PERCPU)
4300 			perm_flags |= MEM_PERCPU;
4301 	}
4302 
4303 	if (type_flag(reg->type) & ~perm_flags)
4304 		goto bad_type;
4305 
4306 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4307 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4308 
4309 	/* For ref_ptr case, release function check should ensure we get one
4310 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4311 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4312 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4313 	 * reg->id is not needed here.
4314 	 */
4315 	if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true))
4316 		return -EACCES;
4317 
4318 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4319 	 * we also need to take into account the reg->var_off.
4320 	 *
4321 	 * We want to support cases like:
4322 	 *
4323 	 * struct foo {
4324 	 *         struct bar br;
4325 	 *         struct baz bz;
4326 	 * };
4327 	 *
4328 	 * struct foo *v;
4329 	 * v = func();	      // PTR_TO_BTF_ID
4330 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4331 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4332 	 *                    // first member type of struct after comparison fails
4333 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4334 	 *                    // to match type
4335 	 *
4336 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4337 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4338 	 * the struct to match type against first member of struct, i.e. reject
4339 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4340 	 * strict mode to true for type match.
4341 	 */
4342 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4343 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4344 				  kptr_field->type != BPF_KPTR_UNREF))
4345 		goto bad_type;
4346 	return 0;
4347 bad_type:
4348 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4349 		reg_type_str(env, reg->type), reg_name);
4350 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4351 	if (kptr_field->type == BPF_KPTR_UNREF)
4352 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4353 			targ_name);
4354 	else
4355 		verbose(env, "\n");
4356 	return -EINVAL;
4357 }
4358 
4359 static bool in_sleepable(struct bpf_verifier_env *env)
4360 {
4361 	return env->cur_state->in_sleepable;
4362 }
4363 
4364 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4365  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4366  */
4367 static bool in_rcu_cs(struct bpf_verifier_env *env)
4368 {
4369 	return env->cur_state->active_rcu_locks ||
4370 	       env->cur_state->active_locks ||
4371 	       !in_sleepable(env);
4372 }
4373 
4374 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4375 BTF_SET_START(rcu_protected_types)
4376 #ifdef CONFIG_NET
4377 BTF_ID(struct, prog_test_ref_kfunc)
4378 #endif
4379 #ifdef CONFIG_CGROUPS
4380 BTF_ID(struct, cgroup)
4381 #endif
4382 #ifdef CONFIG_BPF_JIT
4383 BTF_ID(struct, bpf_cpumask)
4384 #endif
4385 BTF_ID(struct, task_struct)
4386 #ifdef CONFIG_CRYPTO
4387 BTF_ID(struct, bpf_crypto_ctx)
4388 #endif
4389 BTF_SET_END(rcu_protected_types)
4390 
4391 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4392 {
4393 	if (!btf_is_kernel(btf))
4394 		return true;
4395 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4396 }
4397 
4398 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4399 {
4400 	struct btf_struct_meta *meta;
4401 
4402 	if (btf_is_kernel(kptr_field->kptr.btf))
4403 		return NULL;
4404 
4405 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4406 				    kptr_field->kptr.btf_id);
4407 
4408 	return meta ? meta->record : NULL;
4409 }
4410 
4411 static bool rcu_safe_kptr(const struct btf_field *field)
4412 {
4413 	const struct btf_field_kptr *kptr = &field->kptr;
4414 
4415 	return field->type == BPF_KPTR_PERCPU ||
4416 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4417 }
4418 
4419 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4420 {
4421 	struct btf_record *rec;
4422 	u32 ret;
4423 
4424 	ret = PTR_MAYBE_NULL;
4425 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4426 		ret |= MEM_RCU;
4427 		if (kptr_field->type == BPF_KPTR_PERCPU)
4428 			ret |= MEM_PERCPU;
4429 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4430 			ret |= MEM_ALLOC;
4431 
4432 		rec = kptr_pointee_btf_record(kptr_field);
4433 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4434 			ret |= NON_OWN_REF;
4435 	} else {
4436 		ret |= PTR_UNTRUSTED;
4437 	}
4438 
4439 	return ret;
4440 }
4441 
4442 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4443 			    struct btf_field *field)
4444 {
4445 	struct bpf_reg_state *reg;
4446 	const struct btf_type *t;
4447 
4448 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4449 	mark_reg_known_zero(env, cur_regs(env), regno);
4450 	reg = reg_state(env, regno);
4451 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4452 	reg->mem_size = t->size;
4453 	reg->id = ++env->id_gen;
4454 
4455 	return 0;
4456 }
4457 
4458 static int check_map_kptr_access(struct bpf_verifier_env *env,
4459 				 int value_regno, int insn_idx,
4460 				 struct btf_field *kptr_field)
4461 {
4462 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4463 	int class = BPF_CLASS(insn->code);
4464 	struct bpf_reg_state *val_reg;
4465 	int ret;
4466 
4467 	/* Things we already checked for in check_map_access and caller:
4468 	 *  - Reject cases where variable offset may touch kptr
4469 	 *  - size of access (must be BPF_DW)
4470 	 *  - tnum_is_const(reg->var_off)
4471 	 *  - kptr_field->offset == off + reg->var_off.value
4472 	 */
4473 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4474 	if (BPF_MODE(insn->code) != BPF_MEM) {
4475 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4476 		return -EACCES;
4477 	}
4478 
4479 	/* We only allow loading referenced kptr, since it will be marked as
4480 	 * untrusted, similar to unreferenced kptr.
4481 	 */
4482 	if (class != BPF_LDX &&
4483 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4484 		verbose(env, "store to referenced kptr disallowed\n");
4485 		return -EACCES;
4486 	}
4487 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4488 		verbose(env, "store to uptr disallowed\n");
4489 		return -EACCES;
4490 	}
4491 
4492 	if (class == BPF_LDX) {
4493 		if (kptr_field->type == BPF_UPTR)
4494 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4495 
4496 		/* We can simply mark the value_regno receiving the pointer
4497 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4498 		 */
4499 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4500 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4501 				      btf_ld_kptr_type(env, kptr_field));
4502 		if (ret < 0)
4503 			return ret;
4504 	} else if (class == BPF_STX) {
4505 		val_reg = reg_state(env, value_regno);
4506 		if (!bpf_register_is_null(val_reg) &&
4507 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4508 			return -EACCES;
4509 	} else if (class == BPF_ST) {
4510 		if (insn->imm) {
4511 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4512 				kptr_field->offset);
4513 			return -EACCES;
4514 		}
4515 	} else {
4516 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4517 		return -EACCES;
4518 	}
4519 	return 0;
4520 }
4521 
4522 /*
4523  * Return the size of the memory region accessible from a pointer to map value.
4524  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4525  */
4526 static u32 map_mem_size(const struct bpf_map *map)
4527 {
4528 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4529 		return map->max_entries * sizeof(long);
4530 
4531 	return map->value_size;
4532 }
4533 
4534 /* check read/write into a map element with possible variable offset */
4535 static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4536 			    int off, int size, bool zero_size_allowed,
4537 			    enum bpf_access_src src)
4538 {
4539 	struct bpf_map *map = reg->map_ptr;
4540 	u32 mem_size = map_mem_size(map);
4541 	struct btf_record *rec;
4542 	int err, i;
4543 
4544 	err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed);
4545 	if (err)
4546 		return err;
4547 
4548 	if (IS_ERR_OR_NULL(map->record))
4549 		return 0;
4550 	rec = map->record;
4551 	for (i = 0; i < rec->cnt; i++) {
4552 		struct btf_field *field = &rec->fields[i];
4553 		u32 p = field->offset;
4554 
4555 		/* If any part of a field  can be touched by load/store, reject
4556 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4557 		 * it is sufficient to check x1 < y2 && y1 < x2.
4558 		 */
4559 		if (reg_smin(reg) + off < p + field->size &&
4560 		    p < reg_umax(reg) + off + size) {
4561 			switch (field->type) {
4562 			case BPF_KPTR_UNREF:
4563 			case BPF_KPTR_REF:
4564 			case BPF_KPTR_PERCPU:
4565 			case BPF_UPTR:
4566 				if (src != ACCESS_DIRECT) {
4567 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4568 						btf_field_type_name(field->type));
4569 					return -EACCES;
4570 				}
4571 				if (!tnum_is_const(reg->var_off)) {
4572 					verbose(env, "%s access cannot have variable offset\n",
4573 						btf_field_type_name(field->type));
4574 					return -EACCES;
4575 				}
4576 				if (p != off + reg->var_off.value) {
4577 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4578 						btf_field_type_name(field->type),
4579 						p, off + reg->var_off.value);
4580 					return -EACCES;
4581 				}
4582 				if (size != bpf_size_to_bytes(BPF_DW)) {
4583 					verbose(env, "%s access size must be BPF_DW\n",
4584 						btf_field_type_name(field->type));
4585 					return -EACCES;
4586 				}
4587 				break;
4588 			default:
4589 				verbose(env, "%s cannot be accessed directly by load/store\n",
4590 					btf_field_type_name(field->type));
4591 				return -EACCES;
4592 			}
4593 		}
4594 	}
4595 	return 0;
4596 }
4597 
4598 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4599 			       const struct bpf_call_arg_meta *meta,
4600 			       enum bpf_access_type t)
4601 {
4602 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4603 
4604 	switch (prog_type) {
4605 	/* Program types only with direct read access go here! */
4606 	case BPF_PROG_TYPE_LWT_IN:
4607 	case BPF_PROG_TYPE_LWT_OUT:
4608 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4609 	case BPF_PROG_TYPE_SK_REUSEPORT:
4610 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4611 	case BPF_PROG_TYPE_CGROUP_SKB:
4612 		if (t == BPF_WRITE)
4613 			return false;
4614 		fallthrough;
4615 
4616 	/* Program types with direct read + write access go here! */
4617 	case BPF_PROG_TYPE_SCHED_CLS:
4618 	case BPF_PROG_TYPE_SCHED_ACT:
4619 	case BPF_PROG_TYPE_XDP:
4620 	case BPF_PROG_TYPE_LWT_XMIT:
4621 	case BPF_PROG_TYPE_SK_SKB:
4622 	case BPF_PROG_TYPE_SK_MSG:
4623 		if (meta)
4624 			return meta->pkt_access;
4625 
4626 		env->seen_direct_write = true;
4627 		return true;
4628 
4629 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4630 		if (t == BPF_WRITE)
4631 			env->seen_direct_write = true;
4632 
4633 		return true;
4634 
4635 	default:
4636 		return false;
4637 	}
4638 }
4639 
4640 static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
4641 			       int size, bool zero_size_allowed)
4642 {
4643 	int err;
4644 
4645 	if (reg->range < 0) {
4646 		verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno));
4647 		return -EINVAL;
4648 	}
4649 
4650 	err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed);
4651 	if (err)
4652 		return err;
4653 
4654 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4655 	 * reg_umax(reg) can't be bigger than MAX_PACKET_OFF which is 0xffff,
4656 	 * otherwise find_good_pkt_pointers would have refused to set range info
4657 	 * that __check_mem_access would have rejected this pkt access.
4658 	 * Therefore, "off + reg_umax(reg) + size - 1" won't overflow u32.
4659 	 */
4660 	env->prog->aux->max_pkt_offset =
4661 		max_t(u32, env->prog->aux->max_pkt_offset,
4662 		      off + reg_umax(reg) + size - 1);
4663 
4664 	return 0;
4665 }
4666 
4667 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4668 {
4669 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4670 }
4671 
4672 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4673 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4674 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4675 {
4676 	if (env->ops->is_valid_access &&
4677 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4678 		/* A non zero info.ctx_field_size indicates that this field is a
4679 		 * candidate for later verifier transformation to load the whole
4680 		 * field and then apply a mask when accessed with a narrower
4681 		 * access than actual ctx access size. A zero info.ctx_field_size
4682 		 * will only allow for whole field access and rejects any other
4683 		 * type of narrower access.
4684 		 */
4685 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4686 			if (info->ref_id &&
4687 			    !find_reference_state(env->cur_state, info->ref_id)) {
4688 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4689 					off);
4690 				return -EACCES;
4691 			}
4692 		} else {
4693 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4694 		}
4695 		/* remember the offset of last byte accessed in ctx */
4696 		if (env->prog->aux->max_ctx_offset < off + size)
4697 			env->prog->aux->max_ctx_offset = off + size;
4698 		return 0;
4699 	}
4700 
4701 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4702 	return -EACCES;
4703 }
4704 
4705 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
4706 			    int off, int access_size, enum bpf_access_type t,
4707 			    struct bpf_insn_access_aux *info)
4708 {
4709 	/*
4710 	 * Program types that don't rewrite ctx accesses can safely
4711 	 * dereference ctx pointers with fixed offsets.
4712 	 */
4713 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4714 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4715 	int err;
4716 
4717 	if (var_off_ok)
4718 		err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false);
4719 	else
4720 		err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok);
4721 	if (err)
4722 		return err;
4723 	off += reg_umax(reg);
4724 
4725 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4726 	if (err)
4727 		verbose_linfo(env, insn_idx, "; ");
4728 	return err;
4729 }
4730 
4731 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4732 				  int size)
4733 {
4734 	if (size < 0 || off < 0 ||
4735 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4736 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4737 			off, size);
4738 		return -EACCES;
4739 	}
4740 	return 0;
4741 }
4742 
4743 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4744 			     struct bpf_reg_state *reg, argno_t argno, int off, int size,
4745 			     enum bpf_access_type t)
4746 {
4747 	struct bpf_insn_access_aux info = {};
4748 	bool valid;
4749 
4750 	if (reg_smin(reg) < 0) {
4751 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4752 			reg_arg_name(env, argno));
4753 		return -EACCES;
4754 	}
4755 
4756 	switch (reg->type) {
4757 	case PTR_TO_SOCK_COMMON:
4758 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4759 		break;
4760 	case PTR_TO_SOCKET:
4761 		valid = bpf_sock_is_valid_access(off, size, t, &info);
4762 		break;
4763 	case PTR_TO_TCP_SOCK:
4764 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4765 		break;
4766 	case PTR_TO_XDP_SOCK:
4767 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4768 		break;
4769 	default:
4770 		valid = false;
4771 	}
4772 
4773 
4774 	if (valid) {
4775 		env->insn_aux_data[insn_idx].ctx_field_size =
4776 			info.ctx_field_size;
4777 		return 0;
4778 	}
4779 
4780 	verbose(env, "%s invalid %s access off=%d size=%d\n",
4781 		reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size);
4782 
4783 	return -EACCES;
4784 }
4785 
4786 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4787 {
4788 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4789 }
4790 
4791 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4792 {
4793 	const struct bpf_reg_state *reg = reg_state(env, regno);
4794 
4795 	return reg->type == PTR_TO_CTX;
4796 }
4797 
4798 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4799 {
4800 	const struct bpf_reg_state *reg = reg_state(env, regno);
4801 
4802 	return type_is_sk_pointer(reg->type);
4803 }
4804 
4805 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4806 {
4807 	const struct bpf_reg_state *reg = reg_state(env, regno);
4808 
4809 	return type_is_pkt_pointer(reg->type);
4810 }
4811 
4812 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4813 {
4814 	const struct bpf_reg_state *reg = reg_state(env, regno);
4815 
4816 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4817 	return reg->type == PTR_TO_FLOW_KEYS;
4818 }
4819 
4820 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
4821 {
4822 	const struct bpf_reg_state *reg = reg_state(env, regno);
4823 
4824 	return reg->type == PTR_TO_ARENA;
4825 }
4826 
4827 /* Return false if @regno contains a pointer whose type isn't supported for
4828  * atomic instruction @insn.
4829  */
4830 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
4831 			       struct bpf_insn *insn)
4832 {
4833 	if (is_ctx_reg(env, regno))
4834 		return false;
4835 	if (is_pkt_reg(env, regno))
4836 		return false;
4837 	if (is_flow_key_reg(env, regno))
4838 		return false;
4839 	if (is_sk_reg(env, regno))
4840 		return false;
4841 	if (is_arena_reg(env, regno))
4842 		return bpf_jit_supports_insn(insn, true);
4843 
4844 	return true;
4845 }
4846 
4847 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
4848 #ifdef CONFIG_NET
4849 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
4850 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4851 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
4852 #endif
4853 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
4854 };
4855 
4856 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
4857 {
4858 	/* A referenced register is always trusted. */
4859 	if (reg_is_referenced(env, reg))
4860 		return true;
4861 
4862 	/* Types listed in the reg2btf_ids are always trusted */
4863 	if (reg2btf_ids[base_type(reg->type)] &&
4864 	    !bpf_type_has_unsafe_modifiers(reg->type))
4865 		return true;
4866 
4867 	/* If a register is not referenced, it is trusted if it has the
4868 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4869 	 * other type modifiers may be safe, but we elect to take an opt-in
4870 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4871 	 * not.
4872 	 *
4873 	 * Eventually, we should make PTR_TRUSTED the single source of truth
4874 	 * for whether a register is trusted.
4875 	 */
4876 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4877 	       !bpf_type_has_unsafe_modifiers(reg->type);
4878 }
4879 
4880 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4881 {
4882 	return reg->type & MEM_RCU;
4883 }
4884 
4885 static void clear_trusted_flags(enum bpf_type_flag *flag)
4886 {
4887 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
4888 }
4889 
4890 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4891 				   const struct bpf_reg_state *reg,
4892 				   int off, int size, bool strict)
4893 {
4894 	struct tnum reg_off;
4895 	int ip_align;
4896 
4897 	/* Byte size accesses are always allowed. */
4898 	if (!strict || size == 1)
4899 		return 0;
4900 
4901 	/* For platforms that do not have a Kconfig enabling
4902 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4903 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4904 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4905 	 * to this code only in strict mode where we want to emulate
4906 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4907 	 * unconditional IP align value of '2'.
4908 	 */
4909 	ip_align = 2;
4910 
4911 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
4912 	if (!tnum_is_aligned(reg_off, size)) {
4913 		char tn_buf[48];
4914 
4915 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4916 		verbose(env,
4917 			"misaligned packet access off %d+%s+%d size %d\n",
4918 			ip_align, tn_buf, off, size);
4919 		return -EACCES;
4920 	}
4921 
4922 	return 0;
4923 }
4924 
4925 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4926 				       const struct bpf_reg_state *reg,
4927 				       const char *pointer_desc,
4928 				       int off, int size, bool strict)
4929 {
4930 	struct tnum reg_off;
4931 
4932 	/* Byte size accesses are always allowed. */
4933 	if (!strict || size == 1)
4934 		return 0;
4935 
4936 	reg_off = tnum_add(reg->var_off, tnum_const(off));
4937 	if (!tnum_is_aligned(reg_off, size)) {
4938 		char tn_buf[48];
4939 
4940 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4941 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
4942 			pointer_desc, tn_buf, off, size);
4943 		return -EACCES;
4944 	}
4945 
4946 	return 0;
4947 }
4948 
4949 static int check_ptr_alignment(struct bpf_verifier_env *env,
4950 			       const struct bpf_reg_state *reg, int off,
4951 			       int size, bool strict_alignment_once)
4952 {
4953 	bool strict = env->strict_alignment || strict_alignment_once;
4954 	const char *pointer_desc = "";
4955 
4956 	switch (reg->type) {
4957 	case PTR_TO_PACKET:
4958 	case PTR_TO_PACKET_META:
4959 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4960 		 * right in front, treat it the very same way.
4961 		 */
4962 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4963 	case PTR_TO_FLOW_KEYS:
4964 		pointer_desc = "flow keys ";
4965 		break;
4966 	case PTR_TO_MAP_KEY:
4967 		pointer_desc = "key ";
4968 		break;
4969 	case PTR_TO_MAP_VALUE:
4970 		pointer_desc = "value ";
4971 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4972 			strict = true;
4973 		break;
4974 	case PTR_TO_CTX:
4975 		pointer_desc = "context ";
4976 		break;
4977 	case PTR_TO_STACK:
4978 		pointer_desc = "stack ";
4979 		/* The stack spill tracking logic in check_stack_write_fixed_off()
4980 		 * and check_stack_read_fixed_off() relies on stack accesses being
4981 		 * aligned.
4982 		 */
4983 		strict = true;
4984 		break;
4985 	case PTR_TO_SOCKET:
4986 		pointer_desc = "sock ";
4987 		break;
4988 	case PTR_TO_SOCK_COMMON:
4989 		pointer_desc = "sock_common ";
4990 		break;
4991 	case PTR_TO_TCP_SOCK:
4992 		pointer_desc = "tcp_sock ";
4993 		break;
4994 	case PTR_TO_XDP_SOCK:
4995 		pointer_desc = "xdp_sock ";
4996 		break;
4997 	case PTR_TO_ARENA:
4998 		return 0;
4999 	default:
5000 		break;
5001 	}
5002 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5003 					   strict);
5004 }
5005 
5006 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5007 {
5008 	if (!bpf_jit_supports_private_stack())
5009 		return NO_PRIV_STACK;
5010 
5011 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5012 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5013 	 * explicitly.
5014 	 */
5015 	switch (prog->type) {
5016 	case BPF_PROG_TYPE_KPROBE:
5017 	case BPF_PROG_TYPE_TRACEPOINT:
5018 	case BPF_PROG_TYPE_PERF_EVENT:
5019 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5020 		return PRIV_STACK_ADAPTIVE;
5021 	case BPF_PROG_TYPE_TRACING:
5022 	case BPF_PROG_TYPE_LSM:
5023 	case BPF_PROG_TYPE_STRUCT_OPS:
5024 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5025 			return PRIV_STACK_ADAPTIVE;
5026 		fallthrough;
5027 	default:
5028 		break;
5029 	}
5030 
5031 	return NO_PRIV_STACK;
5032 }
5033 
5034 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5035 {
5036 	if (env->prog->jit_requested)
5037 		return round_up(stack_depth, 16);
5038 
5039 	/* round up to 32-bytes, since this is granularity
5040 	 * of interpreter stack size
5041 	 */
5042 	return round_up(max_t(u32, stack_depth, 1), 32);
5043 }
5044 
5045 /* temporary state used for call frame depth calculation */
5046 struct bpf_subprog_call_depth_info {
5047 	int ret_insn; /* caller instruction where we return to. */
5048 	int caller; /* caller subprogram idx */
5049 	int frame; /* # of consecutive static call stack frames on top of stack */
5050 };
5051 
5052 /* starting from main bpf function walk all instructions of the function
5053  * and recursively walk all callees that given function can call.
5054  * Ignore jump and exit insns.
5055  */
5056 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5057 					 struct bpf_subprog_call_depth_info *dinfo,
5058 					 bool priv_stack_supported)
5059 {
5060 	struct bpf_subprog_info *subprog = env->subprog_info;
5061 	struct bpf_insn *insn = env->prog->insnsi;
5062 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5063 	bool tail_call_reachable = false;
5064 	int total;
5065 	int tmp;
5066 
5067 	/* no caller idx */
5068 	dinfo[idx].caller = -1;
5069 
5070 	i = subprog[idx].start;
5071 	if (!priv_stack_supported)
5072 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5073 process_func:
5074 	/* protect against potential stack overflow that might happen when
5075 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5076 	 * depth for such case down to 256 so that the worst case scenario
5077 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5078 	 * 8k).
5079 	 *
5080 	 * To get the idea what might happen, see an example:
5081 	 * func1 -> sub rsp, 128
5082 	 *  subfunc1 -> sub rsp, 256
5083 	 *  tailcall1 -> add rsp, 256
5084 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5085 	 *   subfunc2 -> sub rsp, 64
5086 	 *   subfunc22 -> sub rsp, 128
5087 	 *   tailcall2 -> add rsp, 128
5088 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5089 	 *
5090 	 * tailcall will unwind the current stack frame but it will not get rid
5091 	 * of caller's stack as shown on the example above.
5092 	 */
5093 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5094 		verbose(env,
5095 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5096 			depth);
5097 		return -EACCES;
5098 	}
5099 
5100 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5101 	if (IS_ENABLED(CONFIG_X86_64) && subprog[idx].stack_arg_cnt) {
5102 		/* x86-64 uses R9 for both private stack frame pointer and arg6. */
5103 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5104 	} else if (priv_stack_supported) {
5105 		/* Request private stack support only if the subprog stack
5106 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5107 		 * avoid jit penalty if the stack usage is small.
5108 		 */
5109 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5110 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5111 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5112 	}
5113 
5114 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5115 		if (subprog_depth > env->max_stack_depth)
5116 			env->max_stack_depth = subprog_depth;
5117 		if (subprog_depth > MAX_BPF_STACK) {
5118 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5119 				idx, subprog_depth);
5120 			return -EACCES;
5121 		}
5122 	} else {
5123 		depth += subprog_depth;
5124 		if (depth > env->max_stack_depth)
5125 			env->max_stack_depth = depth;
5126 		if (depth > MAX_BPF_STACK) {
5127 			total = 0;
5128 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5129 				total++;
5130 
5131 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5132 				total, depth);
5133 			return -EACCES;
5134 		}
5135 	}
5136 continue_func:
5137 	subprog_end = subprog[idx + 1].start;
5138 	for (; i < subprog_end; i++) {
5139 		int next_insn, sidx;
5140 
5141 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5142 			bool err = false;
5143 
5144 			if (!bpf_is_throw_kfunc(insn + i))
5145 				continue;
5146 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5147 				if (subprog[tmp].is_cb) {
5148 					err = true;
5149 					break;
5150 				}
5151 			}
5152 			if (!err)
5153 				continue;
5154 			verbose(env,
5155 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5156 				i, idx);
5157 			return -EINVAL;
5158 		}
5159 
5160 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5161 			continue;
5162 		/* remember insn and function to return to */
5163 
5164 		/* find the callee */
5165 		next_insn = i + insn[i].imm + 1;
5166 		sidx = bpf_find_subprog(env, next_insn);
5167 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5168 			return -EFAULT;
5169 		if (subprog[sidx].is_async_cb) {
5170 			if (subprog[sidx].has_tail_call) {
5171 				verifier_bug(env, "subprog has tail_call and async cb");
5172 				return -EFAULT;
5173 			}
5174 			/* async callbacks don't increase bpf prog stack size unless called directly */
5175 			if (!bpf_pseudo_call(insn + i))
5176 				continue;
5177 			if (subprog[sidx].is_exception_cb) {
5178 				verbose(env, "insn %d cannot call exception cb directly", i);
5179 				return -EINVAL;
5180 			}
5181 		}
5182 
5183 		/* store caller info for after we return from callee */
5184 		dinfo[idx].frame = frame;
5185 		dinfo[idx].ret_insn = i + 1;
5186 
5187 		/* push caller idx into callee's dinfo */
5188 		dinfo[sidx].caller = idx;
5189 
5190 		i = next_insn;
5191 
5192 		idx = sidx;
5193 		if (!priv_stack_supported)
5194 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5195 
5196 		if (subprog[idx].has_tail_call)
5197 			tail_call_reachable = true;
5198 
5199 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5200 		if (frame >= MAX_CALL_FRAMES) {
5201 			verbose(env, "the call stack of %d frames is too deep !\n",
5202 				frame);
5203 			return -E2BIG;
5204 		}
5205 		goto process_func;
5206 	}
5207 	/* if tail call got detected across bpf2bpf calls then mark each of the
5208 	 * currently present subprog frames as tail call reachable subprogs;
5209 	 * this info will be utilized by JIT so that we will be preserving the
5210 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5211 	 */
5212 	if (tail_call_reachable) {
5213 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5214 			if (subprog[tmp].is_exception_cb) {
5215 				verbose(env, "cannot tail call within exception cb\n");
5216 				return -EINVAL;
5217 			}
5218 			if (subprog[tmp].stack_arg_cnt) {
5219 				verbose(env, "tail_calls are not allowed in programs with stack args\n");
5220 				return -EINVAL;
5221 			}
5222 			subprog[tmp].tail_call_reachable = true;
5223 		}
5224 	} else if (!idx && subprog[0].has_tail_call && subprog[0].stack_arg_cnt) {
5225 		verbose(env, "tail_calls are not allowed in programs with stack args\n");
5226 		return -EINVAL;
5227 	}
5228 
5229 	if (subprog[0].tail_call_reachable)
5230 		env->prog->aux->tail_call_reachable = true;
5231 
5232 	/* end of for() loop means the last insn of the 'subprog'
5233 	 * was reached. Doesn't matter whether it was JA or EXIT
5234 	 */
5235 	if (frame == 0 && dinfo[idx].caller < 0)
5236 		return 0;
5237 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5238 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5239 
5240 	/* pop caller idx from callee */
5241 	idx = dinfo[idx].caller;
5242 
5243 	/* retrieve caller state from its frame */
5244 	frame = dinfo[idx].frame;
5245 	i = dinfo[idx].ret_insn;
5246 
5247 	/* reset tail_call_reachable to the parent's actual state */
5248 	tail_call_reachable = subprog[idx].tail_call_reachable;
5249 
5250 	goto continue_func;
5251 }
5252 
5253 static int check_max_stack_depth(struct bpf_verifier_env *env)
5254 {
5255 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5256 	struct bpf_subprog_call_depth_info *dinfo;
5257 	struct bpf_subprog_info *si = env->subprog_info;
5258 	bool priv_stack_supported;
5259 	int ret;
5260 
5261 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5262 	if (!dinfo)
5263 		return -ENOMEM;
5264 
5265 	for (int i = 0; i < env->subprog_cnt; i++) {
5266 		if (si[i].has_tail_call) {
5267 			priv_stack_mode = NO_PRIV_STACK;
5268 			break;
5269 		}
5270 	}
5271 
5272 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5273 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5274 
5275 	/* All async_cb subprogs use normal kernel stack. If a particular
5276 	 * subprog appears in both main prog and async_cb subtree, that
5277 	 * subprog will use normal kernel stack to avoid potential nesting.
5278 	 * The reverse subprog traversal ensures when main prog subtree is
5279 	 * checked, the subprogs appearing in async_cb subtrees are already
5280 	 * marked as using normal kernel stack, so stack size checking can
5281 	 * be done properly.
5282 	 */
5283 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5284 		if (!i || si[i].is_async_cb) {
5285 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5286 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5287 					priv_stack_supported);
5288 			if (ret < 0) {
5289 				kvfree(dinfo);
5290 				return ret;
5291 			}
5292 		}
5293 	}
5294 
5295 	for (int i = 0; i < env->subprog_cnt; i++) {
5296 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5297 			env->prog->aux->jits_use_priv_stack = true;
5298 			break;
5299 		}
5300 	}
5301 
5302 	kvfree(dinfo);
5303 
5304 	return 0;
5305 }
5306 
5307 static int __check_buffer_access(struct bpf_verifier_env *env,
5308 				 const char *buf_info,
5309 				 const struct bpf_reg_state *reg,
5310 				 argno_t argno, int off, int size)
5311 {
5312 	if (off < 0) {
5313 		verbose(env,
5314 			"%s invalid %s buffer access: off=%d, size=%d\n",
5315 			reg_arg_name(env, argno), buf_info, off, size);
5316 		return -EACCES;
5317 	}
5318 	if (!tnum_is_const(reg->var_off)) {
5319 		char tn_buf[48];
5320 
5321 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5322 		verbose(env,
5323 			"%s invalid variable buffer offset: off=%d, var_off=%s\n",
5324 			reg_arg_name(env, argno), off, tn_buf);
5325 		return -EACCES;
5326 	}
5327 
5328 	return 0;
5329 }
5330 
5331 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5332 				  const struct bpf_reg_state *reg,
5333 				  argno_t argno, int off, int size)
5334 {
5335 	int err;
5336 
5337 	err = __check_buffer_access(env, "tracepoint", reg, argno, off, size);
5338 	if (err)
5339 		return err;
5340 
5341 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5342 					    env->prog->aux->max_tp_access);
5343 
5344 	return 0;
5345 }
5346 
5347 static int check_buffer_access(struct bpf_verifier_env *env,
5348 			       const struct bpf_reg_state *reg,
5349 			       argno_t argno, int off, int size,
5350 			       bool zero_size_allowed,
5351 			       u32 *max_access)
5352 {
5353 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5354 	int err;
5355 
5356 	err = __check_buffer_access(env, buf_info, reg, argno, off, size);
5357 	if (err)
5358 		return err;
5359 
5360 	*max_access = max(reg->var_off.value + off + size, *max_access);
5361 
5362 	return 0;
5363 }
5364 
5365 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5366 static void zext_32_to_64(struct bpf_reg_state *reg)
5367 {
5368 	reg->var_off = tnum_subreg(reg->var_off);
5369 	reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg));
5370 }
5371 
5372 /* truncate register to smaller size (in bytes)
5373  * must be called with size < BPF_REG_SIZE
5374  */
5375 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5376 {
5377 	u64 mask;
5378 
5379 	/* clear high bits in bit representation */
5380 	reg->var_off = tnum_cast(reg->var_off, size);
5381 
5382 	/* fix arithmetic bounds */
5383 	mask = ((u64)1 << (size * 8)) - 1;
5384 	if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask))
5385 		reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask);
5386 	else
5387 		reg_set_urange64(reg, 0, mask);
5388 
5389 	/* If size is smaller than 32bit register the 32bit register
5390 	 * values are also truncated so we push 64-bit bounds into
5391 	 * 32-bit bounds. Above were truncated < 32-bits already.
5392 	 */
5393 	if (size < 4)
5394 		__mark_reg32_unbounded(reg);
5395 
5396 	reg_bounds_sync(reg);
5397 }
5398 
5399 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5400 {
5401 	if (size == 1) {
5402 		reg_set_srange64(reg, S8_MIN, S8_MAX);
5403 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5404 	} else if (size == 2) {
5405 		reg_set_srange64(reg, S16_MIN, S16_MAX);
5406 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5407 	} else {
5408 		/* size == 4 */
5409 		reg_set_srange64(reg, S32_MIN, S32_MAX);
5410 		reg_set_srange32(reg, S32_MIN, S32_MAX);
5411 	}
5412 	reg->var_off = tnum_unknown;
5413 }
5414 
5415 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5416 {
5417 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5418 	u64 top_smax_value, top_smin_value;
5419 	u64 num_bits = size * 8;
5420 
5421 	if (tnum_is_const(reg->var_off)) {
5422 		u64_cval = reg->var_off.value;
5423 		if (size == 1)
5424 			reg->var_off = tnum_const((s8)u64_cval);
5425 		else if (size == 2)
5426 			reg->var_off = tnum_const((s16)u64_cval);
5427 		else
5428 			/* size == 4 */
5429 			reg->var_off = tnum_const((s32)u64_cval);
5430 
5431 		u64_cval = reg->var_off.value;
5432 		reg->r64 = cnum64_from_urange(u64_cval, u64_cval);
5433 		reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval);
5434 		return;
5435 	}
5436 
5437 	top_smax_value = ((u64)reg_smax(reg) >> num_bits) << num_bits;
5438 	top_smin_value = ((u64)reg_smin(reg) >> num_bits) << num_bits;
5439 
5440 	if (top_smax_value != top_smin_value)
5441 		goto out;
5442 
5443 	/* find the s64_min and s64_min after sign extension */
5444 	if (size == 1) {
5445 		init_s64_max = (s8)reg_smax(reg);
5446 		init_s64_min = (s8)reg_smin(reg);
5447 	} else if (size == 2) {
5448 		init_s64_max = (s16)reg_smax(reg);
5449 		init_s64_min = (s16)reg_smin(reg);
5450 	} else {
5451 		init_s64_max = (s32)reg_smax(reg);
5452 		init_s64_min = (s32)reg_smin(reg);
5453 	}
5454 
5455 	s64_max = max(init_s64_max, init_s64_min);
5456 	s64_min = min(init_s64_max, init_s64_min);
5457 
5458 	/* both of s64_max/s64_min positive or negative */
5459 	if ((s64_max >= 0) == (s64_min >= 0)) {
5460 		reg_set_srange64(reg, s64_min, s64_max);
5461 		reg_set_srange32(reg, s64_min, s64_max);
5462 		reg->var_off = tnum_range(s64_min, s64_max);
5463 		return;
5464 	}
5465 
5466 out:
5467 	set_sext64_default_val(reg, size);
5468 }
5469 
5470 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5471 {
5472 	if (size == 1)
5473 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5474 	else
5475 		/* size == 2 */
5476 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5477 	reg->var_off = tnum_subreg(tnum_unknown);
5478 }
5479 
5480 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5481 {
5482 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5483 	u32 top_smax_value, top_smin_value;
5484 	u32 num_bits = size * 8;
5485 
5486 	if (tnum_is_const(reg->var_off)) {
5487 		u32_val = reg->var_off.value;
5488 		if (size == 1)
5489 			reg->var_off = tnum_const((s8)u32_val);
5490 		else
5491 			reg->var_off = tnum_const((s16)u32_val);
5492 
5493 		u32_val = reg->var_off.value;
5494 		reg_set_srange32(reg, u32_val, u32_val);
5495 		return;
5496 	}
5497 
5498 	top_smax_value = ((u32)reg_s32_max(reg) >> num_bits) << num_bits;
5499 	top_smin_value = ((u32)reg_s32_min(reg) >> num_bits) << num_bits;
5500 
5501 	if (top_smax_value != top_smin_value)
5502 		goto out;
5503 
5504 	/* find the s32_min and s32_min after sign extension */
5505 	if (size == 1) {
5506 		init_s32_max = (s8)reg_s32_max(reg);
5507 		init_s32_min = (s8)reg_s32_min(reg);
5508 	} else {
5509 		/* size == 2 */
5510 		init_s32_max = (s16)reg_s32_max(reg);
5511 		init_s32_min = (s16)reg_s32_min(reg);
5512 	}
5513 	s32_max = max(init_s32_max, init_s32_min);
5514 	s32_min = min(init_s32_max, init_s32_min);
5515 
5516 	if ((s32_min >= 0) == (s32_max >= 0)) {
5517 		reg_set_srange32(reg, s32_min, s32_max);
5518 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5519 		return;
5520 	}
5521 
5522 out:
5523 	set_sext32_default_val(reg, size);
5524 }
5525 
5526 bool bpf_map_is_rdonly(const struct bpf_map *map)
5527 {
5528 	/* A map is considered read-only if the following condition are true:
5529 	 *
5530 	 * 1) BPF program side cannot change any of the map content. The
5531 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5532 	 *    and was set at map creation time.
5533 	 * 2) The map value(s) have been initialized from user space by a
5534 	 *    loader and then "frozen", such that no new map update/delete
5535 	 *    operations from syscall side are possible for the rest of
5536 	 *    the map's lifetime from that point onwards.
5537 	 * 3) Any parallel/pending map update/delete operations from syscall
5538 	 *    side have been completed. Only after that point, it's safe to
5539 	 *    assume that map value(s) are immutable.
5540 	 */
5541 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5542 	       READ_ONCE(map->frozen) &&
5543 	       !bpf_map_write_active(map);
5544 }
5545 
5546 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5547 			bool is_ldsx)
5548 {
5549 	void *ptr;
5550 	u64 addr;
5551 	int err;
5552 
5553 	err = map->ops->map_direct_value_addr(map, &addr, off);
5554 	if (err)
5555 		return err;
5556 	ptr = (void *)(long)addr + off;
5557 
5558 	switch (size) {
5559 	case sizeof(u8):
5560 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5561 		break;
5562 	case sizeof(u16):
5563 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5564 		break;
5565 	case sizeof(u32):
5566 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5567 		break;
5568 	case sizeof(u64):
5569 		*val = *(u64 *)ptr;
5570 		break;
5571 	default:
5572 		return -EINVAL;
5573 	}
5574 	return 0;
5575 }
5576 
5577 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5578 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5579 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5580 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5581 
5582 /*
5583  * Allow list few fields as RCU trusted or full trusted.
5584  * This logic doesn't allow mix tagging and will be removed once GCC supports
5585  * btf_type_tag.
5586  */
5587 
5588 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5589 BTF_TYPE_SAFE_RCU(struct task_struct) {
5590 	const cpumask_t *cpus_ptr;
5591 	struct css_set __rcu *cgroups;
5592 	struct task_struct __rcu *real_parent;
5593 	struct task_struct *group_leader;
5594 };
5595 
5596 BTF_TYPE_SAFE_RCU(struct cgroup) {
5597 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5598 	struct kernfs_node *kn;
5599 };
5600 
5601 BTF_TYPE_SAFE_RCU(struct css_set) {
5602 	struct cgroup *dfl_cgrp;
5603 };
5604 
5605 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5606 	struct cgroup *cgroup;
5607 };
5608 
5609 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5610 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5611 	struct file __rcu *exe_file;
5612 #ifdef CONFIG_MEMCG
5613 	struct task_struct __rcu *owner;
5614 #endif
5615 };
5616 
5617 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5618  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5619  */
5620 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5621 	struct sock *sk;
5622 };
5623 
5624 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5625 	struct sock *sk;
5626 };
5627 
5628 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5629 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5630 	struct seq_file *seq;
5631 };
5632 
5633 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5634 	struct bpf_iter_meta *meta;
5635 	struct task_struct *task;
5636 };
5637 
5638 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5639 	struct file *file;
5640 };
5641 
5642 BTF_TYPE_SAFE_TRUSTED(struct file) {
5643 	struct inode *f_inode;
5644 };
5645 
5646 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5647 	struct inode *d_inode;
5648 };
5649 
5650 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5651 	struct sock *sk;
5652 };
5653 
5654 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5655 	struct mm_struct *vm_mm;
5656 	struct file *vm_file;
5657 };
5658 
5659 static bool type_is_rcu(struct bpf_verifier_env *env,
5660 			struct bpf_reg_state *reg,
5661 			const char *field_name, u32 btf_id)
5662 {
5663 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5664 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5665 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5666 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5667 
5668 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5669 }
5670 
5671 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5672 				struct bpf_reg_state *reg,
5673 				const char *field_name, u32 btf_id)
5674 {
5675 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5676 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5677 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5678 
5679 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5680 }
5681 
5682 static bool type_is_trusted(struct bpf_verifier_env *env,
5683 			    struct bpf_reg_state *reg,
5684 			    const char *field_name, u32 btf_id)
5685 {
5686 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5687 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5688 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5689 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5690 
5691 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5692 }
5693 
5694 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5695 				    struct bpf_reg_state *reg,
5696 				    const char *field_name, u32 btf_id)
5697 {
5698 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5699 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5700 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5701 
5702 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5703 					  "__safe_trusted_or_null");
5704 }
5705 
5706 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5707 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5708 				   argno_t argno, int off, int size,
5709 				   enum bpf_access_type atype,
5710 				   int value_regno)
5711 {
5712 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5713 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5714 	const char *field_name = NULL;
5715 	enum bpf_type_flag flag = 0;
5716 	u32 btf_id = 0;
5717 	int ret;
5718 
5719 	if (!env->allow_ptr_leaks) {
5720 		verbose(env,
5721 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5722 			tname);
5723 		return -EPERM;
5724 	}
5725 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5726 		verbose(env,
5727 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5728 			tname);
5729 		return -EINVAL;
5730 	}
5731 
5732 	if (!tnum_is_const(reg->var_off)) {
5733 		char tn_buf[48];
5734 
5735 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5736 		verbose(env,
5737 			"%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5738 			reg_arg_name(env, argno), tname, off, tn_buf);
5739 		return -EACCES;
5740 	}
5741 
5742 	off += reg->var_off.value;
5743 
5744 	if (off < 0) {
5745 		verbose(env,
5746 			"%s is ptr_%s invalid negative access: off=%d\n",
5747 			reg_arg_name(env, argno), tname, off);
5748 		return -EACCES;
5749 	}
5750 
5751 	if (reg->type & MEM_USER) {
5752 		verbose(env,
5753 			"%s is ptr_%s access user memory: off=%d\n",
5754 			reg_arg_name(env, argno), tname, off);
5755 		return -EACCES;
5756 	}
5757 
5758 	if (reg->type & MEM_PERCPU) {
5759 		verbose(env,
5760 			"%s is ptr_%s access percpu memory: off=%d\n",
5761 			reg_arg_name(env, argno), tname, off);
5762 		return -EACCES;
5763 	}
5764 
5765 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
5766 		if (!btf_is_kernel(reg->btf)) {
5767 			verifier_bug(env, "reg->btf must be kernel btf");
5768 			return -EFAULT;
5769 		}
5770 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
5771 	} else {
5772 		/* Writes are permitted with default btf_struct_access for
5773 		 * program allocated objects (which always have id > 0),
5774 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5775 		 */
5776 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
5777 			verbose(env, "only read is supported\n");
5778 			return -EACCES;
5779 		}
5780 
5781 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5782 		    !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) {
5783 			verifier_bug(env, "allocated object must have a referenced id");
5784 			return -EFAULT;
5785 		}
5786 
5787 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
5788 	}
5789 
5790 	if (ret < 0)
5791 		return ret;
5792 
5793 	if (ret != PTR_TO_BTF_ID) {
5794 		/* just mark; */
5795 
5796 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5797 		/* If this is an untrusted pointer, all pointers formed by walking it
5798 		 * also inherit the untrusted flag.
5799 		 */
5800 		flag = PTR_UNTRUSTED;
5801 
5802 	} else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) {
5803 		/* By default any pointer obtained from walking a trusted pointer is no
5804 		 * longer trusted, unless the field being accessed has explicitly been
5805 		 * marked as inheriting its parent's state of trust (either full or RCU).
5806 		 * For example:
5807 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
5808 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
5809 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5810 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5811 		 *
5812 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
5813 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
5814 		 */
5815 		if (type_is_trusted(env, reg, field_name, btf_id)) {
5816 			flag |= PTR_TRUSTED;
5817 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
5818 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
5819 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5820 			if (type_is_rcu(env, reg, field_name, btf_id)) {
5821 				/* ignore __rcu tag and mark it MEM_RCU */
5822 				flag |= MEM_RCU;
5823 			} else if (flag & MEM_RCU ||
5824 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
5825 				/* __rcu tagged pointers can be NULL */
5826 				flag |= MEM_RCU | PTR_MAYBE_NULL;
5827 
5828 				/* We always trust them */
5829 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
5830 				    flag & PTR_UNTRUSTED)
5831 					flag &= ~PTR_UNTRUSTED;
5832 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
5833 				/* keep as-is */
5834 			} else {
5835 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
5836 				clear_trusted_flags(&flag);
5837 			}
5838 		} else {
5839 			/*
5840 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
5841 			 * aggressively mark as untrusted otherwise such
5842 			 * pointers will be plain PTR_TO_BTF_ID without flags
5843 			 * and will be allowed to be passed into helpers for
5844 			 * compat reasons.
5845 			 */
5846 			flag = PTR_UNTRUSTED;
5847 		}
5848 	} else {
5849 		/* Old compat. Deprecated */
5850 		clear_trusted_flags(&flag);
5851 	}
5852 
5853 	if (atype == BPF_READ && value_regno >= 0) {
5854 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5855 		if (ret < 0)
5856 			return ret;
5857 	}
5858 
5859 	return 0;
5860 }
5861 
5862 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5863 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5864 				   argno_t argno, int off, int size,
5865 				   enum bpf_access_type atype,
5866 				   int value_regno)
5867 {
5868 	struct bpf_map *map = reg->map_ptr;
5869 	struct bpf_reg_state map_reg;
5870 	enum bpf_type_flag flag = 0;
5871 	const struct btf_type *t;
5872 	const char *tname;
5873 	u32 btf_id;
5874 	int ret;
5875 
5876 	if (!btf_vmlinux) {
5877 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5878 		return -ENOTSUPP;
5879 	}
5880 
5881 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5882 		verbose(env, "map_ptr access not supported for map type %d\n",
5883 			map->map_type);
5884 		return -ENOTSUPP;
5885 	}
5886 
5887 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5888 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5889 
5890 	if (!env->allow_ptr_leaks) {
5891 		verbose(env,
5892 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5893 			tname);
5894 		return -EPERM;
5895 	}
5896 
5897 	if (off < 0) {
5898 		verbose(env, "%s is %s invalid negative access: off=%d\n",
5899 			reg_arg_name(env, argno), tname, off);
5900 		return -EACCES;
5901 	}
5902 
5903 	if (atype != BPF_READ) {
5904 		verbose(env, "only read from %s is supported\n", tname);
5905 		return -EACCES;
5906 	}
5907 
5908 	/* Simulate access to a PTR_TO_BTF_ID */
5909 	memset(&map_reg, 0, sizeof(map_reg));
5910 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
5911 			      btf_vmlinux, *map->ops->map_btf_id, 0);
5912 	if (ret < 0)
5913 		return ret;
5914 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
5915 	if (ret < 0)
5916 		return ret;
5917 
5918 	if (value_regno >= 0) {
5919 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5920 		if (ret < 0)
5921 			return ret;
5922 	}
5923 
5924 	return 0;
5925 }
5926 
5927 /* Check that the stack access at the given offset is within bounds. The
5928  * maximum valid offset is -1.
5929  *
5930  * The minimum valid offset is -MAX_BPF_STACK for writes, and
5931  * -state->allocated_stack for reads.
5932  */
5933 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
5934                                           s64 off,
5935                                           struct bpf_func_state *state,
5936                                           enum bpf_access_type t)
5937 {
5938 	int min_valid_off;
5939 
5940 	if (t == BPF_WRITE || env->allow_uninit_stack)
5941 		min_valid_off = -MAX_BPF_STACK;
5942 	else
5943 		min_valid_off = -state->allocated_stack;
5944 
5945 	if (off < min_valid_off || off > -1)
5946 		return -EACCES;
5947 	return 0;
5948 }
5949 
5950 /* Check that the stack access at 'regno + off' falls within the maximum stack
5951  * bounds.
5952  *
5953  * 'off' includes `regno->offset`, but not its dynamic part (if any).
5954  */
5955 static int check_stack_access_within_bounds(
5956 		struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5957 		argno_t argno, int off, int access_size,
5958 		enum bpf_access_type type)
5959 {
5960 	struct bpf_func_state *state = bpf_func(env, reg);
5961 	s64 min_off, max_off;
5962 	int err;
5963 	char *err_extra;
5964 
5965 	if (type == BPF_READ)
5966 		err_extra = " read from";
5967 	else
5968 		err_extra = " write to";
5969 
5970 	if (tnum_is_const(reg->var_off)) {
5971 		min_off = (s64)reg->var_off.value + off;
5972 		max_off = min_off + access_size;
5973 	} else {
5974 		if (reg_smax(reg) >= BPF_MAX_VAR_OFF ||
5975 		    reg_smin(reg) <= -BPF_MAX_VAR_OFF) {
5976 			verbose(env, "invalid unbounded variable-offset%s stack %s\n",
5977 				err_extra, reg_arg_name(env, argno));
5978 			return -EACCES;
5979 		}
5980 		min_off = reg_smin(reg) + off;
5981 		max_off = reg_smax(reg) + off + access_size;
5982 	}
5983 
5984 	err = check_stack_slot_within_bounds(env, min_off, state, type);
5985 	if (!err && max_off > 0)
5986 		err = -EINVAL; /* out of stack access into non-negative offsets */
5987 	if (!err && access_size < 0)
5988 		/* access_size should not be negative (or overflow an int); others checks
5989 		 * along the way should have prevented such an access.
5990 		 */
5991 		err = -EFAULT; /* invalid negative access size; integer overflow? */
5992 
5993 	if (err) {
5994 		if (tnum_is_const(reg->var_off)) {
5995 			verbose(env, "invalid%s stack %s off=%lld size=%d\n",
5996 				err_extra, reg_arg_name(env, argno), min_off, access_size);
5997 		} else {
5998 			char tn_buf[48];
5999 
6000 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6001 			verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n",
6002 				err_extra, reg_arg_name(env, argno), tn_buf, off, access_size);
6003 		}
6004 		return err;
6005 	}
6006 
6007 	/* Note that there is no stack access with offset zero, so the needed stack
6008 	 * size is -min_off, not -min_off+1.
6009 	 */
6010 	return grow_stack_state(env, state, -min_off /* size */);
6011 }
6012 
6013 static bool get_func_retval_range(struct bpf_prog *prog,
6014 				  struct bpf_retval_range *range)
6015 {
6016 	if (prog->type == BPF_PROG_TYPE_LSM &&
6017 		prog->expected_attach_type == BPF_LSM_MAC &&
6018 		!bpf_lsm_get_retval_range(prog, range)) {
6019 		return true;
6020 	}
6021 	return false;
6022 }
6023 
6024 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6025 {
6026 	struct bpf_reg_state fake_reg;
6027 
6028 	if (!val)
6029 		return;
6030 
6031 	fake_reg.type = SCALAR_VALUE;
6032 	__mark_reg_known(&fake_reg, val);
6033 
6034 	scalar32_min_max_add(dst_reg, &fake_reg);
6035 	scalar_min_max_add(dst_reg, &fake_reg);
6036 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6037 
6038 	reg_bounds_sync(dst_reg);
6039 }
6040 
6041 /* check whether memory at (regno + off) is accessible for t = (read | write)
6042  * if t==write, value_regno is a register which value is stored into memory
6043  * if t==read, value_regno is a register which will receive the value from memory
6044  * if t==write && value_regno==-1, some unknown value is stored into memory
6045  * if t==read && value_regno==-1, don't care what we read from memory
6046  */
6047 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
6048 			    int off, int bpf_size, enum bpf_access_type t,
6049 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6050 {
6051 	struct bpf_reg_state *regs = cur_regs(env);
6052 	int size, err = 0;
6053 
6054 	size = bpf_size_to_bytes(bpf_size);
6055 	if (size < 0)
6056 		return size;
6057 
6058 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6059 	if (err)
6060 		return err;
6061 
6062 	if (reg->type == PTR_TO_MAP_KEY) {
6063 		if (t == BPF_WRITE) {
6064 			verbose(env, "write to change key %s not allowed\n",
6065 				reg_arg_name(env, argno));
6066 			return -EACCES;
6067 		}
6068 
6069 		err = check_mem_region_access(env, reg, argno, off, size,
6070 					      reg->map_ptr->key_size, false);
6071 		if (err)
6072 			return err;
6073 		if (value_regno >= 0)
6074 			mark_reg_unknown(env, regs, value_regno);
6075 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6076 		struct btf_field *kptr_field = NULL;
6077 
6078 		if (t == BPF_WRITE && value_regno >= 0 &&
6079 		    is_pointer_value(env, value_regno)) {
6080 			verbose(env, "R%d leaks addr into map\n", value_regno);
6081 			return -EACCES;
6082 		}
6083 		err = check_map_access_type(env, reg, off, size, t);
6084 		if (err)
6085 			return err;
6086 		err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT);
6087 		if (err)
6088 			return err;
6089 		if (tnum_is_const(reg->var_off))
6090 			kptr_field = btf_record_find(reg->map_ptr->record,
6091 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6092 		if (kptr_field) {
6093 			err = check_map_kptr_access(env, value_regno, insn_idx, kptr_field);
6094 		} else if (t == BPF_READ && value_regno >= 0) {
6095 			struct bpf_map *map = reg->map_ptr;
6096 
6097 			/*
6098 			 * If map is read-only, track its contents as scalars,
6099 			 * unless it is an insn array (see the special case below)
6100 			 */
6101 			if (tnum_is_const(reg->var_off) &&
6102 			    bpf_map_is_rdonly(map) &&
6103 			    map->ops->map_direct_value_addr &&
6104 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6105 				int map_off = off + reg->var_off.value;
6106 				u64 val = 0;
6107 
6108 				err = bpf_map_direct_read(map, map_off, size,
6109 							  &val, is_ldsx);
6110 				if (err)
6111 					return err;
6112 
6113 				regs[value_regno].type = SCALAR_VALUE;
6114 				__mark_reg_known(&regs[value_regno], val);
6115 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6116 				if (bpf_size != BPF_DW) {
6117 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6118 						     size);
6119 					return -EACCES;
6120 				}
6121 				regs[value_regno] = *reg;
6122 				add_scalar_to_reg(&regs[value_regno], off);
6123 				regs[value_regno].type = PTR_TO_INSN;
6124 			} else {
6125 				mark_reg_unknown(env, regs, value_regno);
6126 			}
6127 		}
6128 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6129 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6130 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6131 
6132 		if (type_may_be_null(reg->type)) {
6133 			verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6134 				reg_type_str(env, reg->type));
6135 			return -EACCES;
6136 		}
6137 
6138 		if (t == BPF_WRITE && rdonly_mem) {
6139 			verbose(env, "%s cannot write into %s\n",
6140 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6141 			return -EACCES;
6142 		}
6143 
6144 		if (t == BPF_WRITE && value_regno >= 0 &&
6145 		    is_pointer_value(env, value_regno)) {
6146 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6147 			return -EACCES;
6148 		}
6149 
6150 		/*
6151 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6152 		 * instructions, hence no need to check bounds in that case.
6153 		 */
6154 		if (!rdonly_untrusted)
6155 			err = check_mem_region_access(env, reg, argno, off, size,
6156 						      reg->mem_size, false);
6157 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6158 			mark_reg_unknown(env, regs, value_regno);
6159 	} else if (reg->type == PTR_TO_CTX) {
6160 		struct bpf_insn_access_aux info = {
6161 			.reg_type = SCALAR_VALUE,
6162 			.is_ldsx = is_ldsx,
6163 			.log = &env->log,
6164 		};
6165 		struct bpf_retval_range range;
6166 
6167 		if (t == BPF_WRITE && value_regno >= 0 &&
6168 		    is_pointer_value(env, value_regno)) {
6169 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6170 			return -EACCES;
6171 		}
6172 
6173 		err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info);
6174 		if (!err && t == BPF_READ && value_regno >= 0) {
6175 			/* ctx access returns either a scalar, or a
6176 			 * PTR_TO_PACKET[_META,_END]. In the latter
6177 			 * case, we know the offset is zero.
6178 			 */
6179 			if (info.reg_type == SCALAR_VALUE) {
6180 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6181 					err = __mark_reg_s32_range(env, regs, value_regno,
6182 								   range.minval, range.maxval);
6183 					if (err)
6184 						return err;
6185 				} else {
6186 					mark_reg_unknown(env, regs, value_regno);
6187 				}
6188 			} else {
6189 				mark_reg_known_zero(env, regs,
6190 						    value_regno);
6191 				/* A load of ctx field could have different
6192 				 * actual load size with the one encoded in the
6193 				 * insn. When the dst is PTR, it is for sure not
6194 				 * a sub-register.
6195 				 */
6196 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6197 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6198 					regs[value_regno].btf = info.btf;
6199 					regs[value_regno].btf_id = info.btf_id;
6200 					regs[value_regno].id = info.ref_id;
6201 				}
6202 				if (type_may_be_null(info.reg_type) && !regs[value_regno].id)
6203 					regs[value_regno].id = ++env->id_gen;
6204 			}
6205 			regs[value_regno].type = info.reg_type;
6206 		}
6207 
6208 	} else if (reg->type == PTR_TO_STACK) {
6209 		/* Basic bounds checks. */
6210 		err = check_stack_access_within_bounds(env, reg, argno, off, size, t);
6211 		if (err)
6212 			return err;
6213 
6214 		if (t == BPF_READ)
6215 			err = check_stack_read(env, reg, argno, off, size,
6216 					       value_regno);
6217 		else
6218 			err = check_stack_write(env, reg, off, size,
6219 						value_regno, insn_idx);
6220 	} else if (reg_is_pkt_pointer(reg)) {
6221 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6222 			verbose(env, "cannot write into packet\n");
6223 			return -EACCES;
6224 		}
6225 		if (t == BPF_WRITE && value_regno >= 0 &&
6226 		    is_pointer_value(env, value_regno)) {
6227 			verbose(env, "R%d leaks addr into packet\n",
6228 				value_regno);
6229 			return -EACCES;
6230 		}
6231 		err = check_packet_access(env, reg, argno, off, size, false);
6232 		if (!err && t == BPF_READ && value_regno >= 0)
6233 			mark_reg_unknown(env, regs, value_regno);
6234 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6235 		if (t == BPF_WRITE && value_regno >= 0 &&
6236 		    is_pointer_value(env, value_regno)) {
6237 			verbose(env, "R%d leaks addr into flow keys\n",
6238 				value_regno);
6239 			return -EACCES;
6240 		}
6241 
6242 		err = check_flow_keys_access(env, off, size);
6243 		if (!err && t == BPF_READ && value_regno >= 0)
6244 			mark_reg_unknown(env, regs, value_regno);
6245 	} else if (type_is_sk_pointer(reg->type)) {
6246 		if (t == BPF_WRITE) {
6247 			verbose(env, "%s cannot write into %s\n",
6248 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6249 			return -EACCES;
6250 		}
6251 		err = check_sock_access(env, insn_idx, reg, argno, off, size, t);
6252 		if (!err && value_regno >= 0)
6253 			mark_reg_unknown(env, regs, value_regno);
6254 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6255 		err = check_tp_buffer_access(env, reg, argno, off, size);
6256 		if (!err && t == BPF_READ && value_regno >= 0)
6257 			mark_reg_unknown(env, regs, value_regno);
6258 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6259 		   !type_may_be_null(reg->type)) {
6260 		err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t,
6261 					      value_regno);
6262 	} else if (reg->type == CONST_PTR_TO_MAP) {
6263 		err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t,
6264 					      value_regno);
6265 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6266 		   !type_may_be_null(reg->type)) {
6267 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6268 		u32 *max_access;
6269 
6270 		if (rdonly_mem) {
6271 			if (t == BPF_WRITE) {
6272 				verbose(env, "%s cannot write into %s\n",
6273 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6274 				return -EACCES;
6275 			}
6276 			max_access = &env->prog->aux->max_rdonly_access;
6277 		} else {
6278 			max_access = &env->prog->aux->max_rdwr_access;
6279 		}
6280 
6281 		err = check_buffer_access(env, reg, argno, off, size, false,
6282 					  max_access);
6283 
6284 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6285 			mark_reg_unknown(env, regs, value_regno);
6286 	} else if (reg->type == PTR_TO_ARENA) {
6287 		if (t == BPF_READ && value_regno >= 0)
6288 			mark_reg_unknown(env, regs, value_regno);
6289 	} else {
6290 		verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6291 			reg_type_str(env, reg->type));
6292 		return -EACCES;
6293 	}
6294 
6295 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6296 	    regs[value_regno].type == SCALAR_VALUE) {
6297 		if (!is_ldsx)
6298 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6299 			coerce_reg_to_size(&regs[value_regno], size);
6300 		else
6301 			coerce_reg_to_size_sx(&regs[value_regno], size);
6302 	}
6303 	return err;
6304 }
6305 
6306 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6307 			     bool allow_trust_mismatch);
6308 
6309 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6310 			  bool strict_alignment_once, bool is_ldsx,
6311 			  bool allow_trust_mismatch, const char *ctx)
6312 {
6313 	struct bpf_verifier_state *vstate = env->cur_state;
6314 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6315 	struct bpf_reg_state *regs = cur_regs(env);
6316 	enum bpf_reg_type src_reg_type;
6317 	int err;
6318 
6319 	/* Handle stack arg read */
6320 	if (is_stack_arg_ldx(insn)) {
6321 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6322 		if (err)
6323 			return err;
6324 		return check_stack_arg_read(env, state, insn->off, insn->dst_reg);
6325 	}
6326 
6327 	/* check src operand */
6328 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6329 	if (err)
6330 		return err;
6331 
6332 	/* check dst operand */
6333 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6334 	if (err)
6335 		return err;
6336 
6337 	src_reg_type = regs[insn->src_reg].type;
6338 
6339 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6340 	 * updated by this call.
6341 	 */
6342 	err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off,
6343 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6344 			       strict_alignment_once, is_ldsx);
6345 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6346 				       allow_trust_mismatch);
6347 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6348 
6349 	return err;
6350 }
6351 
6352 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6353 			   bool strict_alignment_once)
6354 {
6355 	struct bpf_verifier_state *vstate = env->cur_state;
6356 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6357 	struct bpf_reg_state *regs = cur_regs(env);
6358 	enum bpf_reg_type dst_reg_type;
6359 	int err;
6360 
6361 	/* Handle stack arg write */
6362 	if (is_stack_arg_stx(insn)) {
6363 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
6364 		if (err)
6365 			return err;
6366 		return check_stack_arg_write(env, state, insn->off, regs + insn->src_reg);
6367 	}
6368 
6369 	/* check src1 operand */
6370 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6371 	if (err)
6372 		return err;
6373 
6374 	/* check src2 operand */
6375 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6376 	if (err)
6377 		return err;
6378 
6379 	dst_reg_type = regs[insn->dst_reg].type;
6380 
6381 	/* Check if (dst_reg + off) is writeable. */
6382 	err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6383 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6384 			       strict_alignment_once, false);
6385 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6386 
6387 	return err;
6388 }
6389 
6390 static int check_atomic_rmw(struct bpf_verifier_env *env,
6391 			    struct bpf_insn *insn)
6392 {
6393 	struct bpf_reg_state *dst_reg;
6394 	int load_reg;
6395 	int err;
6396 
6397 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6398 		verbose(env, "invalid atomic operand size\n");
6399 		return -EINVAL;
6400 	}
6401 
6402 	/* check src1 operand */
6403 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6404 	if (err)
6405 		return err;
6406 
6407 	/* check src2 operand */
6408 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6409 	if (err)
6410 		return err;
6411 
6412 	if (insn->imm == BPF_CMPXCHG) {
6413 		/* Check comparison of R0 with memory location */
6414 		const u32 aux_reg = BPF_REG_0;
6415 
6416 		err = check_reg_arg(env, aux_reg, SRC_OP);
6417 		if (err)
6418 			return err;
6419 
6420 		if (is_pointer_value(env, aux_reg)) {
6421 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6422 			return -EACCES;
6423 		}
6424 	}
6425 
6426 	if (is_pointer_value(env, insn->src_reg)) {
6427 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6428 		return -EACCES;
6429 	}
6430 
6431 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6432 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6433 			insn->dst_reg,
6434 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6435 		return -EACCES;
6436 	}
6437 
6438 	if (insn->imm & BPF_FETCH) {
6439 		if (insn->imm == BPF_CMPXCHG)
6440 			load_reg = BPF_REG_0;
6441 		else
6442 			load_reg = insn->src_reg;
6443 
6444 		/* check and record load of old value */
6445 		err = check_reg_arg(env, load_reg, DST_OP);
6446 		if (err)
6447 			return err;
6448 	} else {
6449 		/* This instruction accesses a memory location but doesn't
6450 		 * actually load it into a register.
6451 		 */
6452 		load_reg = -1;
6453 	}
6454 
6455 	dst_reg = cur_regs(env) + insn->dst_reg;
6456 
6457 	/* Check whether we can read the memory, with second call for fetch
6458 	 * case to simulate the register fill.
6459 	 */
6460 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6461 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6462 	if (!err && load_reg >= 0)
6463 		err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg),
6464 				       insn->off, BPF_SIZE(insn->code),
6465 				       BPF_READ, load_reg, true, false);
6466 	if (err)
6467 		return err;
6468 
6469 	if (is_arena_reg(env, insn->dst_reg)) {
6470 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6471 		if (err)
6472 			return err;
6473 	}
6474 	/* Check whether we can write into the same memory. */
6475 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6476 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6477 	if (err)
6478 		return err;
6479 	return 0;
6480 }
6481 
6482 static int check_atomic_load(struct bpf_verifier_env *env,
6483 			     struct bpf_insn *insn)
6484 {
6485 	int err;
6486 
6487 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6488 	if (err)
6489 		return err;
6490 
6491 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6492 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6493 			insn->src_reg,
6494 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6495 		return -EACCES;
6496 	}
6497 
6498 	return 0;
6499 }
6500 
6501 static int check_atomic_store(struct bpf_verifier_env *env,
6502 			      struct bpf_insn *insn)
6503 {
6504 	int err;
6505 
6506 	err = check_store_reg(env, insn, true);
6507 	if (err)
6508 		return err;
6509 
6510 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6511 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6512 			insn->dst_reg,
6513 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6514 		return -EACCES;
6515 	}
6516 
6517 	return 0;
6518 }
6519 
6520 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6521 {
6522 	switch (insn->imm) {
6523 	case BPF_ADD:
6524 	case BPF_ADD | BPF_FETCH:
6525 	case BPF_AND:
6526 	case BPF_AND | BPF_FETCH:
6527 	case BPF_OR:
6528 	case BPF_OR | BPF_FETCH:
6529 	case BPF_XOR:
6530 	case BPF_XOR | BPF_FETCH:
6531 	case BPF_XCHG:
6532 	case BPF_CMPXCHG:
6533 		return check_atomic_rmw(env, insn);
6534 	case BPF_LOAD_ACQ:
6535 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6536 			verbose(env,
6537 				"64-bit load-acquires are only supported on 64-bit arches\n");
6538 			return -EOPNOTSUPP;
6539 		}
6540 		return check_atomic_load(env, insn);
6541 	case BPF_STORE_REL:
6542 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6543 			verbose(env,
6544 				"64-bit store-releases are only supported on 64-bit arches\n");
6545 			return -EOPNOTSUPP;
6546 		}
6547 		return check_atomic_store(env, insn);
6548 	default:
6549 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6550 			insn->imm);
6551 		return -EINVAL;
6552 	}
6553 }
6554 
6555 /* When register 'regno' is used to read the stack (either directly or through
6556  * a helper function) make sure that it's within stack boundary and, depending
6557  * on the access type and privileges, that all elements of the stack are
6558  * initialized.
6559  *
6560  * All registers that have been spilled on the stack in the slots within the
6561  * read offsets are marked as read.
6562  */
6563 static int check_stack_range_initialized(
6564 		struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
6565 		int access_size, bool zero_size_allowed,
6566 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6567 {
6568 	struct bpf_func_state *state = bpf_func(env, reg);
6569 	int err, min_off, max_off, i, j, slot, spi;
6570 	/* Some accesses can write anything into the stack, others are
6571 	 * read-only.
6572 	 */
6573 	bool clobber = type == BPF_WRITE;
6574 	/*
6575 	 * Negative access_size signals global subprog/kfunc arg check where
6576 	 * STACK_POISON slots are acceptable. static stack liveness
6577 	 * might have determined that subprog doesn't read them,
6578 	 * but BTF based global subprog validation isn't accurate enough.
6579 	 */
6580 	bool allow_poison = access_size < 0 || clobber;
6581 
6582 	access_size = abs(access_size);
6583 
6584 	if (access_size == 0 && !zero_size_allowed) {
6585 		verbose(env, "invalid zero-sized read\n");
6586 		return -EACCES;
6587 	}
6588 
6589 	err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type);
6590 	if (err)
6591 		return err;
6592 
6593 
6594 	if (tnum_is_const(reg->var_off)) {
6595 		min_off = max_off = reg->var_off.value + off;
6596 	} else {
6597 		/* Variable offset is prohibited for unprivileged mode for
6598 		 * simplicity since it requires corresponding support in
6599 		 * Spectre masking for stack ALU.
6600 		 * See also retrieve_ptr_limit().
6601 		 */
6602 		if (!env->bypass_spec_v1) {
6603 			char tn_buf[48];
6604 
6605 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6606 			verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n",
6607 				reg_arg_name(env, argno), tn_buf);
6608 			return -EACCES;
6609 		}
6610 		/* Only initialized buffer on stack is allowed to be accessed
6611 		 * with variable offset. With uninitialized buffer it's hard to
6612 		 * guarantee that whole memory is marked as initialized on
6613 		 * helper return since specific bounds are unknown what may
6614 		 * cause uninitialized stack leaking.
6615 		 */
6616 		if (meta && meta->raw_mode)
6617 			meta = NULL;
6618 
6619 		min_off = reg_smin(reg) + off;
6620 		max_off = reg_smax(reg) + off;
6621 	}
6622 
6623 	if (meta && meta->raw_mode) {
6624 		/* Ensure we won't be overwriting dynptrs when simulating byte
6625 		 * by byte access in check_helper_call using meta.access_size.
6626 		 * This would be a problem if we have a helper in the future
6627 		 * which takes:
6628 		 *
6629 		 *	helper(uninit_mem, len, dynptr)
6630 		 *
6631 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6632 		 * may end up writing to dynptr itself when touching memory from
6633 		 * arg 1. This can be relaxed on a case by case basis for known
6634 		 * safe cases, but reject due to the possibilitiy of aliasing by
6635 		 * default.
6636 		 */
6637 		for (i = min_off; i < max_off + access_size; i++) {
6638 			int stack_off = -i - 1;
6639 
6640 			spi = bpf_get_spi(i);
6641 			/* raw_mode may write past allocated_stack */
6642 			if (state->allocated_stack <= stack_off)
6643 				continue;
6644 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6645 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6646 				return -EACCES;
6647 			}
6648 		}
6649 		meta->access_size = access_size;
6650 		meta->regno = reg_from_argno(argno);
6651 		return 0;
6652 	}
6653 
6654 	for (i = min_off; i < max_off + access_size; i++) {
6655 		u8 *stype;
6656 
6657 		slot = -i - 1;
6658 		spi = slot / BPF_REG_SIZE;
6659 		if (state->allocated_stack <= slot) {
6660 			verbose(env, "allocated_stack too small\n");
6661 			return -EFAULT;
6662 		}
6663 
6664 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6665 		if (*stype == STACK_MISC)
6666 			goto mark;
6667 		if ((*stype == STACK_ZERO) ||
6668 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6669 			if (clobber) {
6670 				/* helper can write anything into the stack */
6671 				*stype = STACK_MISC;
6672 			}
6673 			goto mark;
6674 		}
6675 
6676 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6677 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6678 		     env->allow_ptr_leaks)) {
6679 			if (clobber) {
6680 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6681 				for (j = 0; j < BPF_REG_SIZE; j++)
6682 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6683 			}
6684 			goto mark;
6685 		}
6686 
6687 		if (*stype == STACK_POISON) {
6688 			if (allow_poison)
6689 				goto mark;
6690 			verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n",
6691 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6692 		} else if (tnum_is_const(reg->var_off)) {
6693 			verbose(env, "invalid read from stack %s off %d+%d size %d\n",
6694 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6695 		} else {
6696 			char tn_buf[48];
6697 
6698 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6699 			verbose(env, "invalid read from stack %s var_off %s+%d size %d\n",
6700 				reg_arg_name(env, argno), tn_buf, i - min_off, access_size);
6701 		}
6702 		return -EACCES;
6703 mark:
6704 		;
6705 	}
6706 	return 0;
6707 }
6708 
6709 static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
6710 				   int access_size, enum bpf_access_type access_type,
6711 				   bool zero_size_allowed,
6712 				   struct bpf_call_arg_meta *meta)
6713 {
6714 	struct bpf_reg_state *regs = cur_regs(env);
6715 	u32 *max_access;
6716 
6717 	switch (base_type(reg->type)) {
6718 	case PTR_TO_PACKET:
6719 	case PTR_TO_PACKET_META:
6720 		return check_packet_access(env, reg, argno, 0, access_size,
6721 					   zero_size_allowed);
6722 	case PTR_TO_MAP_KEY:
6723 		if (access_type == BPF_WRITE) {
6724 			verbose(env, "%s cannot write into %s\n",
6725 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6726 			return -EACCES;
6727 		}
6728 		return check_mem_region_access(env, reg, argno, 0, access_size,
6729 					       reg->map_ptr->key_size, false);
6730 	case PTR_TO_MAP_VALUE:
6731 		if (check_map_access_type(env, reg, 0, access_size, access_type))
6732 			return -EACCES;
6733 		return check_map_access(env, reg, argno, 0, access_size,
6734 					zero_size_allowed, ACCESS_HELPER);
6735 	case PTR_TO_MEM:
6736 		if (type_is_rdonly_mem(reg->type)) {
6737 			if (access_type == BPF_WRITE) {
6738 				verbose(env, "%s cannot write into %s\n",
6739 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6740 				return -EACCES;
6741 			}
6742 		}
6743 		return check_mem_region_access(env, reg, argno, 0,
6744 					       access_size, reg->mem_size,
6745 					       zero_size_allowed);
6746 	case PTR_TO_BUF:
6747 		if (type_is_rdonly_mem(reg->type)) {
6748 			if (access_type == BPF_WRITE) {
6749 				verbose(env, "%s cannot write into %s\n",
6750 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6751 				return -EACCES;
6752 			}
6753 
6754 			max_access = &env->prog->aux->max_rdonly_access;
6755 		} else {
6756 			max_access = &env->prog->aux->max_rdwr_access;
6757 		}
6758 		return check_buffer_access(env, reg, argno, 0,
6759 					   access_size, zero_size_allowed,
6760 					   max_access);
6761 	case PTR_TO_STACK:
6762 		return check_stack_range_initialized(
6763 				env, reg,
6764 				argno, 0, access_size,
6765 				zero_size_allowed, access_type, meta);
6766 	case PTR_TO_BTF_ID:
6767 		return check_ptr_to_btf_access(env, regs, reg, argno, 0,
6768 					       access_size, BPF_READ, -1);
6769 	case PTR_TO_CTX:
6770 		/* Only permit reading or writing syscall context using helper calls. */
6771 		if (is_var_ctx_off_allowed(env->prog)) {
6772 			int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX,
6773 							  zero_size_allowed);
6774 			if (err)
6775 				return err;
6776 			if (env->prog->aux->max_ctx_offset < reg_umax(reg) + access_size)
6777 				env->prog->aux->max_ctx_offset = reg_umax(reg) + access_size;
6778 			return 0;
6779 		}
6780 		fallthrough;
6781 	default: /* scalar_value or invalid ptr */
6782 		/* Allow zero-byte read from NULL, regardless of pointer type */
6783 		if (zero_size_allowed && access_size == 0 &&
6784 		    bpf_register_is_null(reg))
6785 			return 0;
6786 
6787 		verbose(env, "%s type=%s ", reg_arg_name(env, argno),
6788 			reg_type_str(env, reg->type));
6789 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6790 		return -EACCES;
6791 	}
6792 }
6793 
6794 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
6795  * size.
6796  *
6797  * @mem_reg contains the pointer, @size_reg contains the access size.
6798  */
6799 static int check_mem_size_reg(struct bpf_verifier_env *env,
6800 			      struct bpf_reg_state *mem_reg,
6801 			      struct bpf_reg_state *size_reg, argno_t mem_argno,
6802 			      argno_t size_argno, enum bpf_access_type access_type,
6803 			      bool zero_size_allowed,
6804 			      struct bpf_call_arg_meta *meta)
6805 {
6806 	int err;
6807 
6808 	/* This is used to refine r0 return value bounds for helpers
6809 	 * that enforce this value as an upper bound on return values.
6810 	 * See do_refine_retval_range() for helpers that can refine
6811 	 * the return value. C type of helper is u32 so we pull register
6812 	 * bound from umax_value however, if negative verifier errors
6813 	 * out. Only upper bounds can be learned because retval is an
6814 	 * int type and negative retvals are allowed.
6815 	 */
6816 	meta->msize_max_value = reg_umax(size_reg);
6817 
6818 	/* The register is SCALAR_VALUE; the access check happens using
6819 	 * its boundaries. For unprivileged variable accesses, disable
6820 	 * raw mode so that the program is required to initialize all
6821 	 * the memory that the helper could just partially fill up.
6822 	 */
6823 	if (!tnum_is_const(size_reg->var_off))
6824 		meta = NULL;
6825 
6826 	if (reg_smin(size_reg) < 0) {
6827 		verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n",
6828 			reg_arg_name(env, size_argno));
6829 		return -EACCES;
6830 	}
6831 
6832 	if (reg_umin(size_reg) == 0 && !zero_size_allowed) {
6833 		verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n",
6834 			reg_arg_name(env, size_argno), reg_umin(size_reg), reg_umax(size_reg));
6835 		return -EACCES;
6836 	}
6837 
6838 	if (reg_umax(size_reg) >= BPF_MAX_VAR_SIZ) {
6839 		verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6840 			reg_arg_name(env, size_argno));
6841 		return -EACCES;
6842 	}
6843 	err = check_helper_mem_access(env, mem_reg, mem_argno, reg_umax(size_reg),
6844 				      access_type, zero_size_allowed, meta);
6845 	if (!err) {
6846 		int regno = reg_from_argno(size_argno);
6847 
6848 		if (regno >= 0)
6849 			err = mark_chain_precision(env, regno);
6850 		else
6851 			err = mark_stack_arg_precision(env, arg_idx_from_argno(size_argno));
6852 	}
6853 	return err;
6854 }
6855 
6856 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6857 			 argno_t argno, u32 mem_size)
6858 {
6859 	bool may_be_null = type_may_be_null(reg->type);
6860 	struct bpf_reg_state saved_reg;
6861 	int err;
6862 
6863 	if (bpf_register_is_null(reg))
6864 		return 0;
6865 
6866 	if (mem_size > S32_MAX) {
6867 		verbose(env, "%s memory size %u is too large\n",
6868 			reg_arg_name(env, argno), mem_size);
6869 		return -EACCES;
6870 	}
6871 
6872 	/* Assuming that the register contains a value check if the memory
6873 	 * access is safe. Temporarily save and restore the register's state as
6874 	 * the conversion shouldn't be visible to a caller.
6875 	 */
6876 	if (may_be_null) {
6877 		saved_reg = *reg;
6878 		mark_ptr_not_null_reg(reg);
6879 	}
6880 
6881 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
6882 
6883 	err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL);
6884 	err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL);
6885 
6886 	if (may_be_null)
6887 		*reg = saved_reg;
6888 
6889 	return err;
6890 }
6891 
6892 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg,
6893 				    struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno)
6894 {
6895 	bool may_be_null = type_may_be_null(mem_reg->type);
6896 	struct bpf_reg_state saved_reg;
6897 	struct bpf_call_arg_meta meta;
6898 	int err;
6899 
6900 	memset(&meta, 0, sizeof(meta));
6901 
6902 	if (may_be_null) {
6903 		saved_reg = *mem_reg;
6904 		mark_ptr_not_null_reg(mem_reg);
6905 	}
6906 
6907 	err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta);
6908 	err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta);
6909 
6910 	if (may_be_null)
6911 		*mem_reg = saved_reg;
6912 
6913 	return err;
6914 }
6915 
6916 enum {
6917 	PROCESS_SPIN_LOCK = (1 << 0),
6918 	PROCESS_RES_LOCK  = (1 << 1),
6919 	PROCESS_LOCK_IRQ  = (1 << 2),
6920 };
6921 
6922 /* Implementation details:
6923  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6924  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6925  * Two bpf_map_lookups (even with the same key) will have different reg->id.
6926  * Two separate bpf_obj_new will also have different reg->id.
6927  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6928  * clears reg->id after value_or_null->value transition, since the verifier only
6929  * cares about the range of access to valid map value pointer and doesn't care
6930  * about actual address of the map element.
6931  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6932  * reg->id > 0 after value_or_null->value transition. By doing so
6933  * two bpf_map_lookups will be considered two different pointers that
6934  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6935  * returned from bpf_obj_new.
6936  * The verifier allows taking only one bpf_spin_lock at a time to avoid
6937  * dead-locks.
6938  * Since only one bpf_spin_lock is allowed the checks are simpler than
6939  * reg_is_refcounted() logic. The verifier needs to remember only
6940  * one spin_lock instead of array of acquired_refs.
6941  * env->cur_state->active_locks remembers which map value element or allocated
6942  * object got locked and clears it after bpf_spin_unlock.
6943  */
6944 static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags)
6945 {
6946 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
6947 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
6948 	struct bpf_verifier_state *cur = env->cur_state;
6949 	bool is_const = tnum_is_const(reg->var_off);
6950 	bool is_irq = flags & PROCESS_LOCK_IRQ;
6951 	u64 val = reg->var_off.value;
6952 	struct bpf_map *map = NULL;
6953 	struct btf *btf = NULL;
6954 	struct btf_record *rec;
6955 	u32 spin_lock_off;
6956 	int err;
6957 
6958 	if (!is_const) {
6959 		verbose(env,
6960 			"%s doesn't have constant offset. %s_lock has to be at the constant offset\n",
6961 			reg_arg_name(env, argno), lock_str);
6962 		return -EINVAL;
6963 	}
6964 	if (reg->type == PTR_TO_MAP_VALUE) {
6965 		map = reg->map_ptr;
6966 		if (!map->btf) {
6967 			verbose(env,
6968 				"map '%s' has to have BTF in order to use %s_lock\n",
6969 				map->name, lock_str);
6970 			return -EINVAL;
6971 		}
6972 	} else {
6973 		btf = reg->btf;
6974 	}
6975 
6976 	rec = reg_btf_record(reg);
6977 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
6978 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
6979 			map ? map->name : "kptr", lock_str);
6980 		return -EINVAL;
6981 	}
6982 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
6983 	if (spin_lock_off != val) {
6984 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
6985 			val, lock_str, spin_lock_off);
6986 		return -EINVAL;
6987 	}
6988 	if (is_lock) {
6989 		void *ptr;
6990 		int type;
6991 
6992 		if (map)
6993 			ptr = map;
6994 		else
6995 			ptr = btf;
6996 
6997 		if (!is_res_lock && cur->active_locks) {
6998 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
6999 				verbose(env,
7000 					"Locking two bpf_spin_locks are not allowed\n");
7001 				return -EINVAL;
7002 			}
7003 		} else if (is_res_lock && cur->active_locks) {
7004 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7005 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7006 				return -EINVAL;
7007 			}
7008 		}
7009 
7010 		if (is_res_lock && is_irq)
7011 			type = REF_TYPE_RES_LOCK_IRQ;
7012 		else if (is_res_lock)
7013 			type = REF_TYPE_RES_LOCK;
7014 		else
7015 			type = REF_TYPE_LOCK;
7016 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7017 		if (err < 0) {
7018 			verbose(env, "Failed to acquire lock state\n");
7019 			return err;
7020 		}
7021 	} else {
7022 		void *ptr;
7023 		int type;
7024 
7025 		if (map)
7026 			ptr = map;
7027 		else
7028 			ptr = btf;
7029 
7030 		if (!cur->active_locks) {
7031 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7032 			return -EINVAL;
7033 		}
7034 
7035 		if (is_res_lock && is_irq)
7036 			type = REF_TYPE_RES_LOCK_IRQ;
7037 		else if (is_res_lock)
7038 			type = REF_TYPE_RES_LOCK;
7039 		else
7040 			type = REF_TYPE_LOCK;
7041 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7042 			verbose(env, "%s_unlock of different lock\n", lock_str);
7043 			return -EINVAL;
7044 		}
7045 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7046 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7047 			return -EINVAL;
7048 		}
7049 		if (release_lock_state(cur, type, reg->id, ptr)) {
7050 			verbose(env, "%s_unlock of different lock\n", lock_str);
7051 			return -EINVAL;
7052 		}
7053 
7054 		invalidate_non_owning_refs(env);
7055 	}
7056 	return 0;
7057 }
7058 
7059 /* Check if @regno is a pointer to a specific field in a map value */
7060 static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7061 				   enum btf_field_type field_type,
7062 				   struct bpf_map_desc *map_desc)
7063 {
7064 	bool is_const = tnum_is_const(reg->var_off);
7065 	struct bpf_map *map = reg->map_ptr;
7066 	u64 val = reg->var_off.value;
7067 	const char *struct_name = btf_field_type_name(field_type);
7068 	int field_off = -1;
7069 
7070 	if (!is_const) {
7071 		verbose(env,
7072 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
7073 			reg_arg_name(env, argno), struct_name);
7074 		return -EINVAL;
7075 	}
7076 	if (!map->btf) {
7077 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7078 			struct_name);
7079 		return -EINVAL;
7080 	}
7081 	if (!btf_record_has_field(map->record, field_type)) {
7082 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7083 		return -EINVAL;
7084 	}
7085 	switch (field_type) {
7086 	case BPF_TIMER:
7087 		field_off = map->record->timer_off;
7088 		break;
7089 	case BPF_TASK_WORK:
7090 		field_off = map->record->task_work_off;
7091 		break;
7092 	case BPF_WORKQUEUE:
7093 		field_off = map->record->wq_off;
7094 		break;
7095 	default:
7096 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7097 		return -EINVAL;
7098 	}
7099 	if (field_off != val) {
7100 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7101 			val, struct_name, field_off);
7102 		return -EINVAL;
7103 	}
7104 	if (map_desc->ptr) {
7105 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7106 		return -EFAULT;
7107 	}
7108 	map_desc->uid = reg->map_uid;
7109 	map_desc->ptr = map;
7110 	return 0;
7111 }
7112 
7113 static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7114 			      struct bpf_map_desc *map)
7115 {
7116 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7117 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7118 		return -EOPNOTSUPP;
7119 	}
7120 	return check_map_field_pointer(env, reg, argno, BPF_TIMER, map);
7121 }
7122 
7123 static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7124 				struct bpf_call_arg_meta *meta)
7125 {
7126 	return process_timer_func(env, reg, argno, &meta->map);
7127 }
7128 
7129 static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7130 			       struct bpf_kfunc_call_arg_meta *meta)
7131 {
7132 	return process_timer_func(env, reg, argno, &meta->map);
7133 }
7134 
7135 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7136 			     struct bpf_call_arg_meta *meta)
7137 {
7138 	struct bpf_reg_state *reg = reg_state(env, regno);
7139 	struct btf_field *kptr_field;
7140 	struct bpf_map *map_ptr;
7141 	struct btf_record *rec;
7142 	u32 kptr_off;
7143 
7144 	if (type_is_ptr_alloc_obj(reg->type)) {
7145 		rec = reg_btf_record(reg);
7146 	} else { /* PTR_TO_MAP_VALUE */
7147 		map_ptr = reg->map_ptr;
7148 		if (!map_ptr->btf) {
7149 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7150 				map_ptr->name);
7151 			return -EINVAL;
7152 		}
7153 		rec = map_ptr->record;
7154 		meta->map.ptr = map_ptr;
7155 	}
7156 
7157 	if (!tnum_is_const(reg->var_off)) {
7158 		verbose(env,
7159 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7160 			regno);
7161 		return -EINVAL;
7162 	}
7163 
7164 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7165 		verbose(env, "R%d has no valid kptr\n", regno);
7166 		return -EINVAL;
7167 	}
7168 
7169 	kptr_off = reg->var_off.value;
7170 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7171 	if (!kptr_field) {
7172 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7173 		return -EACCES;
7174 	}
7175 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7176 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7177 		return -EACCES;
7178 	}
7179 	meta->kptr_field = kptr_field;
7180 	return 0;
7181 }
7182 
7183 /*
7184  * Validate dynptr arguments for helper, kfunc and subprog.
7185  *
7186  * @dynptr is both input and output. It is populated when the argument is
7187  * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed)
7188  * and consumed when the argument is expecting to be an initialized dynptr.
7189  * @parent_id is used to track the referenced parent object (e.g., file or skb in
7190  * qdisc program) when constructing a dynptr.
7191  *
7192  * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7193  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7194  *
7195  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7196  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7197  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7198  *
7199  * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the
7200  * dynptr points to. At the first level, the verifier will make sure a
7201  * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of
7202  * a dynptr's view (i.e., start and offset) is not tracked as there is not such
7203  * use case. The second level is tracked using the upper bit of bpf_dynptr->size
7204  * and checked dynamically during runtime.
7205  */
7206 static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7207 			       argno_t argno, int insn_idx, enum bpf_arg_type arg_type,
7208 			       struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
7209 {
7210 	int spi, err = 0;
7211 
7212 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7213 		verbose(env,
7214 			"%s expected pointer to stack or const struct bpf_dynptr\n",
7215 			reg_arg_name(env, argno));
7216 		return -EINVAL;
7217 	}
7218 
7219 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7220 	 *		 constructing a mutable bpf_dynptr object.
7221 	 *
7222 	 *		 Currently, this is only possible with PTR_TO_STACK
7223 	 *		 pointing to a region of at least 16 bytes which doesn't
7224 	 *		 contain an existing bpf_dynptr.
7225 	 *
7226 	 *  OBJ_RELEASE - Points to a initialized bpf_dynptr that will be
7227 	 *		  destroyed.
7228 	 *
7229 	 *  None       - Points to a initialized dynptr that cannot be
7230 	 *		 reinitialized or destroyed. However, the view of the
7231 	 *		 dynptr and the memory it points to may be mutated.
7232 	 */
7233 	if (arg_type & MEM_UNINIT) {
7234 		int i;
7235 
7236 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7237 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7238 			return -EINVAL;
7239 		}
7240 
7241 		/* we write BPF_DW bits (8 bytes) at a time */
7242 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7243 			err = check_mem_access(env, insn_idx, reg, argno,
7244 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7245 			if (err)
7246 				return err;
7247 		}
7248 
7249 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr);
7250 	} else /* OBJ_RELEASE and None case from above */ {
7251 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7252 		if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) {
7253 			verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n");
7254 			return -EINVAL;
7255 		}
7256 
7257 		if (!is_dynptr_reg_valid_init(env, reg)) {
7258 			verbose(env, "Expected an initialized dynptr as %s\n",
7259 				reg_arg_name(env, argno));
7260 			return -EINVAL;
7261 		}
7262 
7263 		/* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */
7264 		if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) {
7265 			verbose(env,
7266 				"Expected a dynptr of type %s as %s\n",
7267 				dynptr_type_str(arg_to_dynptr_type(arg_type)),
7268 				reg_arg_name(env, argno));
7269 			return -EINVAL;
7270 		}
7271 
7272 		if (reg->type != CONST_PTR_TO_DYNPTR) {
7273 			struct bpf_func_state *state = bpf_func(env, reg);
7274 
7275 			spi = dynptr_get_spi(env, reg);
7276 			if (spi < 0)
7277 				return spi;
7278 
7279 			/*
7280 			 * For CONST_PTR_TO_DYNPTR, reg is already scratched by check_reg_arg
7281 			 * in check_helper_call and mark_btf_func_reg_size in check_kfunc_call.
7282 			 */
7283 			mark_stack_slots_scratched(env, spi, BPF_DYNPTR_NR_SLOTS);
7284 
7285 			reg = &state->stack[spi].spilled_ptr;
7286 		}
7287 
7288 		if (dynptr) {
7289 			dynptr->type = reg->dynptr.type;
7290 			dynptr->id = reg->id;
7291 			dynptr->parent_id = reg->parent_id;
7292 		}
7293 	}
7294 	return err;
7295 }
7296 
7297 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7298 {
7299 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7300 }
7301 
7302 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7303 {
7304 	return meta->kfunc_flags & KF_ITER_NEW;
7305 }
7306 
7307 
7308 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7309 {
7310 	return meta->kfunc_flags & KF_ITER_DESTROY;
7311 }
7312 
7313 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7314 			      const struct btf_param *arg)
7315 {
7316 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7317 	 * kfunc is iter state pointer
7318 	 */
7319 	if (is_iter_kfunc(meta))
7320 		return arg_idx == 0;
7321 
7322 	/* iter passed as an argument to a generic kfunc */
7323 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7324 }
7325 
7326 static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx,
7327 			    struct bpf_kfunc_call_arg_meta *meta)
7328 {
7329 	struct bpf_func_state *state = bpf_func(env, reg);
7330 	const struct btf_type *t;
7331 	u32 arg_idx = arg_idx_from_argno(argno);
7332 	int spi, err, i, nr_slots, btf_id;
7333 
7334 	if (reg->type != PTR_TO_STACK) {
7335 		verbose(env, "%s expected pointer to an iterator on stack\n",
7336 			reg_arg_name(env, argno));
7337 		return -EINVAL;
7338 	}
7339 
7340 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7341 	 * ensures struct convention, so we wouldn't need to do any BTF
7342 	 * validation here. But given iter state can be passed as a parameter
7343 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7344 	 * conservative here.
7345 	 */
7346 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx);
7347 	if (btf_id < 0) {
7348 		verbose(env, "expected valid iter pointer as %s\n",
7349 			reg_arg_name(env, argno));
7350 		return -EINVAL;
7351 	}
7352 	t = btf_type_by_id(meta->btf, btf_id);
7353 	nr_slots = t->size / BPF_REG_SIZE;
7354 
7355 	if (is_iter_new_kfunc(meta)) {
7356 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7357 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7358 			verbose(env, "expected uninitialized iter_%s as %s\n",
7359 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7360 			return -EINVAL;
7361 		}
7362 
7363 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7364 			err = check_mem_access(env, insn_idx, reg, argno,
7365 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7366 			if (err)
7367 				return err;
7368 		}
7369 
7370 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7371 		if (err)
7372 			return err;
7373 	} else {
7374 		/* iter_next() or iter_destroy(), as well as any kfunc
7375 		 * accepting iter argument, expect initialized iter state
7376 		 */
7377 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7378 		switch (err) {
7379 		case 0:
7380 			break;
7381 		case -EINVAL:
7382 			verbose(env, "expected an initialized iter_%s as %s\n",
7383 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7384 			return err;
7385 		case -EPROTO:
7386 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7387 			return err;
7388 		default:
7389 			return err;
7390 		}
7391 
7392 		spi = iter_get_spi(env, reg, nr_slots);
7393 		if (spi < 0)
7394 			return spi;
7395 
7396 		mark_stack_slots_scratched(env, spi, nr_slots);
7397 
7398 		/* remember meta->iter info for process_iter_next_call() */
7399 		meta->iter.spi = spi;
7400 		meta->iter.frameno = reg->frameno;
7401 		update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr);
7402 
7403 		if (is_iter_destroy_kfunc(meta)) {
7404 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7405 			if (err)
7406 				return err;
7407 		}
7408 	}
7409 
7410 	return 0;
7411 }
7412 
7413 /* Look for a previous loop entry at insn_idx: nearest parent state
7414  * stopped at insn_idx with callsites matching those in cur->frame.
7415  */
7416 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7417 						  struct bpf_verifier_state *cur,
7418 						  int insn_idx)
7419 {
7420 	struct bpf_verifier_state_list *sl;
7421 	struct bpf_verifier_state *st;
7422 	struct list_head *pos, *head;
7423 
7424 	/* Explored states are pushed in stack order, most recent states come first */
7425 	head = bpf_explored_state(env, insn_idx);
7426 	list_for_each(pos, head) {
7427 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7428 		/* If st->branches != 0 state is a part of current DFS verification path,
7429 		 * hence cur & st for a loop.
7430 		 */
7431 		st = &sl->state;
7432 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7433 		    st->dfs_depth < cur->dfs_depth)
7434 			return st;
7435 	}
7436 
7437 	return NULL;
7438 }
7439 
7440 /*
7441  * Check if scalar registers are exact for the purpose of not widening.
7442  * More lenient than regs_exact()
7443  */
7444 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7445 				    const struct bpf_reg_state *rcur)
7446 {
7447 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7448 }
7449 
7450 static void maybe_widen_reg(struct bpf_verifier_env *env,
7451 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7452 {
7453 	if (rold->type != SCALAR_VALUE)
7454 		return;
7455 	if (rold->type != rcur->type)
7456 		return;
7457 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7458 		return;
7459 	__mark_reg_unknown(env, rcur);
7460 }
7461 
7462 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7463 				   struct bpf_verifier_state *old,
7464 				   struct bpf_verifier_state *cur)
7465 {
7466 	struct bpf_func_state *fold, *fcur;
7467 	int i, fr, num_slots;
7468 
7469 	for (fr = old->curframe; fr >= 0; fr--) {
7470 		fold = old->frame[fr];
7471 		fcur = cur->frame[fr];
7472 
7473 		for (i = 0; i < MAX_BPF_REG; i++)
7474 			maybe_widen_reg(env,
7475 					&fold->regs[i],
7476 					&fcur->regs[i]);
7477 
7478 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7479 				fcur->allocated_stack / BPF_REG_SIZE);
7480 		for (i = 0; i < num_slots; i++) {
7481 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7482 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7483 				continue;
7484 
7485 			maybe_widen_reg(env,
7486 					&fold->stack[i].spilled_ptr,
7487 					&fcur->stack[i].spilled_ptr);
7488 		}
7489 	}
7490 	return 0;
7491 }
7492 
7493 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7494 						 struct bpf_kfunc_call_arg_meta *meta)
7495 {
7496 	int iter_frameno = meta->iter.frameno;
7497 	int iter_spi = meta->iter.spi;
7498 
7499 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7500 }
7501 
7502 /* process_iter_next_call() is called when verifier gets to iterator's next
7503  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7504  * to it as just "iter_next()" in comments below.
7505  *
7506  * BPF verifier relies on a crucial contract for any iter_next()
7507  * implementation: it should *eventually* return NULL, and once that happens
7508  * it should keep returning NULL. That is, once iterator exhausts elements to
7509  * iterate, it should never reset or spuriously return new elements.
7510  *
7511  * With the assumption of such contract, process_iter_next_call() simulates
7512  * a fork in the verifier state to validate loop logic correctness and safety
7513  * without having to simulate infinite amount of iterations.
7514  *
7515  * In current state, we first assume that iter_next() returned NULL and
7516  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7517  * conditions we should not form an infinite loop and should eventually reach
7518  * exit.
7519  *
7520  * Besides that, we also fork current state and enqueue it for later
7521  * verification. In a forked state we keep iterator state as ACTIVE
7522  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7523  * also bump iteration depth to prevent erroneous infinite loop detection
7524  * later on (see iter_active_depths_differ() comment for details). In this
7525  * state we assume that we'll eventually loop back to another iter_next()
7526  * calls (it could be in exactly same location or in some other instruction,
7527  * it doesn't matter, we don't make any unnecessary assumptions about this,
7528  * everything revolves around iterator state in a stack slot, not which
7529  * instruction is calling iter_next()). When that happens, we either will come
7530  * to iter_next() with equivalent state and can conclude that next iteration
7531  * will proceed in exactly the same way as we just verified, so it's safe to
7532  * assume that loop converges. If not, we'll go on another iteration
7533  * simulation with a different input state, until all possible starting states
7534  * are validated or we reach maximum number of instructions limit.
7535  *
7536  * This way, we will either exhaustively discover all possible input states
7537  * that iterator loop can start with and eventually will converge, or we'll
7538  * effectively regress into bounded loop simulation logic and either reach
7539  * maximum number of instructions if loop is not provably convergent, or there
7540  * is some statically known limit on number of iterations (e.g., if there is
7541  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7542  *
7543  * Iteration convergence logic in is_state_visited() relies on exact
7544  * states comparison, which ignores read and precision marks.
7545  * This is necessary because read and precision marks are not finalized
7546  * while in the loop. Exact comparison might preclude convergence for
7547  * simple programs like below:
7548  *
7549  *     i = 0;
7550  *     while(iter_next(&it))
7551  *       i++;
7552  *
7553  * At each iteration step i++ would produce a new distinct state and
7554  * eventually instruction processing limit would be reached.
7555  *
7556  * To avoid such behavior speculatively forget (widen) range for
7557  * imprecise scalar registers, if those registers were not precise at the
7558  * end of the previous iteration and do not match exactly.
7559  *
7560  * This is a conservative heuristic that allows to verify wide range of programs,
7561  * however it precludes verification of programs that conjure an
7562  * imprecise value on the first loop iteration and use it as precise on a second.
7563  * For example, the following safe program would fail to verify:
7564  *
7565  *     struct bpf_num_iter it;
7566  *     int arr[10];
7567  *     int i = 0, a = 0;
7568  *     bpf_iter_num_new(&it, 0, 10);
7569  *     while (bpf_iter_num_next(&it)) {
7570  *       if (a == 0) {
7571  *         a = 1;
7572  *         i = 7; // Because i changed verifier would forget
7573  *                // it's range on second loop entry.
7574  *       } else {
7575  *         arr[i] = 42; // This would fail to verify.
7576  *       }
7577  *     }
7578  *     bpf_iter_num_destroy(&it);
7579  */
7580 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7581 				  struct bpf_kfunc_call_arg_meta *meta)
7582 {
7583 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7584 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7585 	struct bpf_reg_state *cur_iter, *queued_iter;
7586 
7587 	BTF_TYPE_EMIT(struct bpf_iter);
7588 
7589 	cur_iter = get_iter_from_state(cur_st, meta);
7590 
7591 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7592 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7593 		verifier_bug(env, "unexpected iterator state %d (%s)",
7594 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7595 		return -EFAULT;
7596 	}
7597 
7598 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7599 		/* Because iter_next() call is a checkpoint is_state_visitied()
7600 		 * should guarantee parent state with same call sites and insn_idx.
7601 		 */
7602 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7603 		    !same_callsites(cur_st->parent, cur_st)) {
7604 			verifier_bug(env, "bad parent state for iter next call");
7605 			return -EFAULT;
7606 		}
7607 		/* Note cur_st->parent in the call below, it is necessary to skip
7608 		 * checkpoint created for cur_st by is_state_visited()
7609 		 * right at this instruction.
7610 		 */
7611 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7612 		/* branch out active iter state */
7613 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7614 		if (IS_ERR(queued_st))
7615 			return PTR_ERR(queued_st);
7616 
7617 		queued_iter = get_iter_from_state(queued_st, meta);
7618 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7619 		queued_iter->iter.depth++;
7620 		if (prev_st)
7621 			widen_imprecise_scalars(env, prev_st, queued_st);
7622 
7623 		queued_fr = queued_st->frame[queued_st->curframe];
7624 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7625 	}
7626 
7627 	/* switch to DRAINED state, but keep the depth unchanged */
7628 	/* mark current iter state as drained and assume returned NULL */
7629 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7630 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7631 
7632 	return 0;
7633 }
7634 
7635 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7636 {
7637 	return type == ARG_CONST_SIZE ||
7638 	       type == ARG_CONST_SIZE_OR_ZERO;
7639 }
7640 
7641 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7642 {
7643 	return base_type(type) == ARG_PTR_TO_MEM &&
7644 	       type & MEM_UNINIT;
7645 }
7646 
7647 static bool arg_type_is_release(enum bpf_arg_type type)
7648 {
7649 	return type & OBJ_RELEASE;
7650 }
7651 
7652 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7653 {
7654 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7655 }
7656 
7657 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7658 				 const struct bpf_call_arg_meta *meta,
7659 				 enum bpf_arg_type *arg_type)
7660 {
7661 	if (!meta->map.ptr) {
7662 		/* kernel subsystem misconfigured verifier */
7663 		verifier_bug(env, "invalid map_ptr to access map->type");
7664 		return -EFAULT;
7665 	}
7666 
7667 	switch (meta->map.ptr->map_type) {
7668 	case BPF_MAP_TYPE_SOCKMAP:
7669 	case BPF_MAP_TYPE_SOCKHASH:
7670 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7671 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7672 		} else {
7673 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7674 			return -EINVAL;
7675 		}
7676 		break;
7677 	case BPF_MAP_TYPE_BLOOM_FILTER:
7678 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7679 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7680 		break;
7681 	default:
7682 		break;
7683 	}
7684 	return 0;
7685 }
7686 
7687 struct bpf_reg_types {
7688 	const enum bpf_reg_type types[10];
7689 	u32 *btf_id;
7690 };
7691 
7692 static const struct bpf_reg_types sock_types = {
7693 	.types = {
7694 		PTR_TO_SOCK_COMMON,
7695 		PTR_TO_SOCKET,
7696 		PTR_TO_TCP_SOCK,
7697 		PTR_TO_XDP_SOCK,
7698 	},
7699 };
7700 
7701 #ifdef CONFIG_NET
7702 static const struct bpf_reg_types btf_id_sock_common_types = {
7703 	.types = {
7704 		PTR_TO_SOCK_COMMON,
7705 		PTR_TO_SOCKET,
7706 		PTR_TO_TCP_SOCK,
7707 		PTR_TO_XDP_SOCK,
7708 		PTR_TO_BTF_ID,
7709 		PTR_TO_BTF_ID | PTR_TRUSTED,
7710 	},
7711 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7712 };
7713 #endif
7714 
7715 static const struct bpf_reg_types mem_types = {
7716 	.types = {
7717 		PTR_TO_STACK,
7718 		PTR_TO_PACKET,
7719 		PTR_TO_PACKET_META,
7720 		PTR_TO_MAP_KEY,
7721 		PTR_TO_MAP_VALUE,
7722 		PTR_TO_MEM,
7723 		PTR_TO_MEM | MEM_RINGBUF,
7724 		PTR_TO_BUF,
7725 		PTR_TO_BTF_ID | PTR_TRUSTED,
7726 		PTR_TO_CTX,
7727 	},
7728 };
7729 
7730 static const struct bpf_reg_types spin_lock_types = {
7731 	.types = {
7732 		PTR_TO_MAP_VALUE,
7733 		PTR_TO_BTF_ID | MEM_ALLOC,
7734 	}
7735 };
7736 
7737 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7738 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7739 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7740 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7741 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7742 static const struct bpf_reg_types btf_ptr_types = {
7743 	.types = {
7744 		PTR_TO_BTF_ID,
7745 		PTR_TO_BTF_ID | PTR_TRUSTED,
7746 		PTR_TO_BTF_ID | MEM_RCU,
7747 	},
7748 };
7749 static const struct bpf_reg_types percpu_btf_ptr_types = {
7750 	.types = {
7751 		PTR_TO_BTF_ID | MEM_PERCPU,
7752 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7753 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7754 	}
7755 };
7756 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7757 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7758 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7759 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7760 static const struct bpf_reg_types kptr_xchg_dest_types = {
7761 	.types = {
7762 		PTR_TO_MAP_VALUE,
7763 		PTR_TO_BTF_ID | MEM_ALLOC,
7764 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
7765 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
7766 	}
7767 };
7768 static const struct bpf_reg_types dynptr_types = {
7769 	.types = {
7770 		PTR_TO_STACK,
7771 		CONST_PTR_TO_DYNPTR,
7772 	}
7773 };
7774 
7775 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7776 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7777 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7778 	[ARG_CONST_SIZE]		= &scalar_types,
7779 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7780 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7781 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7782 	[ARG_PTR_TO_CTX]		= &context_types,
7783 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7784 #ifdef CONFIG_NET
7785 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7786 #endif
7787 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7788 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7789 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7790 	[ARG_PTR_TO_MEM]		= &mem_types,
7791 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7792 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7793 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7794 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7795 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7796 	[ARG_PTR_TO_TIMER]		= &timer_types,
7797 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
7798 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7799 };
7800 
7801 static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7802 			  enum bpf_arg_type arg_type,
7803 			  const u32 *arg_btf_id,
7804 			  struct bpf_call_arg_meta *meta)
7805 {
7806 	enum bpf_reg_type expected, type = reg->type;
7807 	const struct bpf_reg_types *compatible;
7808 	int i, j, err;
7809 
7810 	compatible = compatible_reg_types[base_type(arg_type)];
7811 	if (!compatible) {
7812 		verifier_bug(env, "unsupported arg type %d", arg_type);
7813 		return -EFAULT;
7814 	}
7815 
7816 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7817 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7818 	 *
7819 	 * Same for MAYBE_NULL:
7820 	 *
7821 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7822 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7823 	 *
7824 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
7825 	 *
7826 	 * Therefore we fold these flags depending on the arg_type before comparison.
7827 	 */
7828 	if (arg_type & MEM_RDONLY)
7829 		type &= ~MEM_RDONLY;
7830 	if (arg_type & PTR_MAYBE_NULL)
7831 		type &= ~PTR_MAYBE_NULL;
7832 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
7833 		type &= ~DYNPTR_TYPE_FLAG_MASK;
7834 
7835 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
7836 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) {
7837 		type &= ~MEM_ALLOC;
7838 		type &= ~MEM_PERCPU;
7839 	}
7840 
7841 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7842 		expected = compatible->types[i];
7843 		if (expected == NOT_INIT)
7844 			break;
7845 
7846 		if (type == expected)
7847 			goto found;
7848 	}
7849 
7850 	verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type));
7851 	for (j = 0; j + 1 < i; j++)
7852 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7853 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7854 	return -EACCES;
7855 
7856 found:
7857 	if (base_type(reg->type) != PTR_TO_BTF_ID)
7858 		return 0;
7859 
7860 	if (compatible == &mem_types) {
7861 		if (!(arg_type & MEM_RDONLY)) {
7862 			verbose(env,
7863 				"%s() may write into memory pointed by %s type=%s\n",
7864 				func_id_name(meta->func_id),
7865 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
7866 			return -EACCES;
7867 		}
7868 		return 0;
7869 	}
7870 
7871 	switch ((int)reg->type) {
7872 	case PTR_TO_BTF_ID:
7873 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7874 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
7875 	case PTR_TO_BTF_ID | MEM_RCU:
7876 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7877 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7878 	{
7879 		/* For bpf_sk_release, it needs to match against first member
7880 		 * 'struct sock_common', hence make an exception for it. This
7881 		 * allows bpf_sk_release to work for multiple socket types.
7882 		 */
7883 		bool strict_type_match = arg_type_is_release(arg_type) &&
7884 					 meta->func_id != BPF_FUNC_sk_release;
7885 
7886 		if (type_may_be_null(reg->type) &&
7887 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7888 			verbose(env, "Possibly NULL pointer passed to helper %s\n",
7889 				reg_arg_name(env, argno));
7890 			return -EACCES;
7891 		}
7892 
7893 		if (!arg_btf_id) {
7894 			if (!compatible->btf_id) {
7895 				verifier_bug(env, "missing arg compatible BTF ID");
7896 				return -EFAULT;
7897 			}
7898 			arg_btf_id = compatible->btf_id;
7899 		}
7900 
7901 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7902 			if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno)))
7903 				return -EACCES;
7904 		} else {
7905 			if (arg_btf_id == BPF_PTR_POISON) {
7906 				verbose(env, "verifier internal error:");
7907 				verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n",
7908 					reg_arg_name(env, argno));
7909 				return -EACCES;
7910 			}
7911 
7912 			err = __check_ptr_off_reg(env, reg, argno, true);
7913 			if (err)
7914 				return err;
7915 
7916 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
7917 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
7918 						  strict_type_match)) {
7919 				verbose(env, "%s is of type %s but %s is expected\n",
7920 					reg_arg_name(env, argno),
7921 					btf_type_name(reg->btf, reg->btf_id),
7922 					btf_type_name(btf_vmlinux, *arg_btf_id));
7923 				return -EACCES;
7924 			}
7925 		}
7926 		break;
7927 	}
7928 	case PTR_TO_BTF_ID | MEM_ALLOC:
7929 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
7930 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
7931 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
7932 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7933 		    meta->func_id != BPF_FUNC_kptr_xchg) {
7934 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
7935 			return -EFAULT;
7936 		}
7937 		/* Check if local kptr in src arg matches kptr in dst arg */
7938 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7939 			int regno = reg_from_argno(argno);
7940 
7941 			if (regno == BPF_REG_2 &&
7942 			    map_kptr_match_type(env, meta->kptr_field, reg, regno))
7943 				return -EACCES;
7944 		}
7945 		break;
7946 	case PTR_TO_BTF_ID | MEM_PERCPU:
7947 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
7948 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7949 		/* Handled by helper specific checks */
7950 		break;
7951 	default:
7952 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
7953 		return -EFAULT;
7954 	}
7955 	return 0;
7956 }
7957 
7958 static struct btf_field *
7959 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7960 {
7961 	struct btf_field *field;
7962 	struct btf_record *rec;
7963 
7964 	rec = reg_btf_record(reg);
7965 	if (!rec)
7966 		return NULL;
7967 
7968 	field = btf_record_find(rec, off, fields);
7969 	if (!field)
7970 		return NULL;
7971 
7972 	return field;
7973 }
7974 
7975 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
7976 				  const struct bpf_reg_state *reg, argno_t argno,
7977 				  enum bpf_arg_type arg_type)
7978 {
7979 	u32 type = reg->type;
7980 
7981 	/* When referenced register is passed to release function, its fixed
7982 	 * offset must be 0.
7983 	 *
7984 	 * We will check arg_type_is_release reg has id when storing
7985 	 * meta->release_regno.
7986 	 */
7987 	if (arg_type_is_release(arg_type)) {
7988 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
7989 		 * may not directly point to the object being released, but to
7990 		 * dynptr pointing to such object, which might be at some offset
7991 		 * on the stack. In that case, we simply to fallback to the
7992 		 * default handling.
7993 		 */
7994 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
7995 			return 0;
7996 
7997 		/* Doing check_ptr_off_reg check for the offset will catch this
7998 		 * because fixed_off_ok is false, but checking here allows us
7999 		 * to give the user a better error message.
8000 		 */
8001 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8002 			verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n",
8003 				reg_arg_name(env, argno));
8004 			return -EINVAL;
8005 		}
8006 	}
8007 
8008 	switch (type) {
8009 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8010 	case PTR_TO_STACK:
8011 	case PTR_TO_PACKET:
8012 	case PTR_TO_PACKET_META:
8013 	case PTR_TO_MAP_KEY:
8014 	case PTR_TO_MAP_VALUE:
8015 	case PTR_TO_MEM:
8016 	case PTR_TO_MEM | MEM_RDONLY:
8017 	case PTR_TO_MEM | MEM_RINGBUF:
8018 	case PTR_TO_BUF:
8019 	case PTR_TO_BUF | MEM_RDONLY:
8020 	case PTR_TO_ARENA:
8021 	case SCALAR_VALUE:
8022 		return 0;
8023 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8024 	 * fixed offset.
8025 	 */
8026 	case PTR_TO_BTF_ID:
8027 	case PTR_TO_BTF_ID | MEM_ALLOC:
8028 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8029 	case PTR_TO_BTF_ID | MEM_RCU:
8030 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8031 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8032 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8033 		 * its fixed offset must be 0. In the other cases, fixed offset
8034 		 * can be non-zero. This was already checked above. So pass
8035 		 * fixed_off_ok as true to allow fixed offset for all other
8036 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8037 		 * still need to do checks instead of returning.
8038 		 */
8039 		return __check_ptr_off_reg(env, reg, argno, true);
8040 	case PTR_TO_CTX:
8041 		/*
8042 		 * Allow fixed and variable offsets for syscall context, but
8043 		 * only when the argument is passed as memory, not ctx,
8044 		 * otherwise we may get modified ctx in tail called programs and
8045 		 * global subprogs (that may act as extension prog hooks).
8046 		 */
8047 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8048 			return 0;
8049 		fallthrough;
8050 	default:
8051 		return __check_ptr_off_reg(env, reg, argno, false);
8052 	}
8053 }
8054 
8055 static int check_arg_const_str(struct bpf_verifier_env *env,
8056 			       struct bpf_reg_state *reg, argno_t argno)
8057 {
8058 	struct bpf_map *map = reg->map_ptr;
8059 	int err;
8060 	int map_off;
8061 	u64 map_addr;
8062 	char *str_ptr;
8063 
8064 	if (reg->type != PTR_TO_MAP_VALUE)
8065 		return -EINVAL;
8066 
8067 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8068 		verbose(env, "%s points to insn_array map which cannot be used as const string\n",
8069 			reg_arg_name(env, argno));
8070 		return -EACCES;
8071 	}
8072 
8073 	if (!bpf_map_is_rdonly(map)) {
8074 		verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno));
8075 		return -EACCES;
8076 	}
8077 
8078 	if (!tnum_is_const(reg->var_off)) {
8079 		verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno));
8080 		return -EACCES;
8081 	}
8082 
8083 	if (!map->ops->map_direct_value_addr) {
8084 		verbose(env, "no direct value access support for this map type\n");
8085 		return -EACCES;
8086 	}
8087 
8088 	err = check_map_access(env, reg, argno, 0,
8089 			       map->value_size - reg->var_off.value, false,
8090 			       ACCESS_HELPER);
8091 	if (err)
8092 		return err;
8093 
8094 	map_off = reg->var_off.value;
8095 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8096 	if (err) {
8097 		verbose(env, "direct value access on string failed\n");
8098 		return err;
8099 	}
8100 
8101 	str_ptr = (char *)(long)(map_addr);
8102 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8103 		verbose(env, "string is not zero-terminated\n");
8104 		return -EINVAL;
8105 	}
8106 	return 0;
8107 }
8108 
8109 /* Returns constant key value in `value` if possible, else negative error */
8110 static int get_constant_map_key(struct bpf_verifier_env *env,
8111 				struct bpf_reg_state *key,
8112 				u32 key_size,
8113 				s64 *value)
8114 {
8115 	struct bpf_func_state *state = bpf_func(env, key);
8116 	struct bpf_reg_state *reg;
8117 	int slot, spi, off;
8118 	int spill_size = 0;
8119 	int zero_size = 0;
8120 	int stack_off;
8121 	int i, err;
8122 	u8 *stype;
8123 
8124 	if (!env->bpf_capable)
8125 		return -EOPNOTSUPP;
8126 	if (key->type != PTR_TO_STACK)
8127 		return -EOPNOTSUPP;
8128 	if (!tnum_is_const(key->var_off))
8129 		return -EOPNOTSUPP;
8130 
8131 	stack_off = key->var_off.value;
8132 	slot = -stack_off - 1;
8133 	spi = slot / BPF_REG_SIZE;
8134 	off = slot % BPF_REG_SIZE;
8135 	stype = state->stack[spi].slot_type;
8136 
8137 	/* First handle precisely tracked STACK_ZERO */
8138 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8139 		zero_size++;
8140 	if (zero_size >= key_size) {
8141 		*value = 0;
8142 		return 0;
8143 	}
8144 
8145 	/* Check that stack contains a scalar spill of expected size */
8146 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8147 		return -EOPNOTSUPP;
8148 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8149 		spill_size++;
8150 	if (spill_size != key_size)
8151 		return -EOPNOTSUPP;
8152 
8153 	reg = &state->stack[spi].spilled_ptr;
8154 	if (!tnum_is_const(reg->var_off))
8155 		/* Stack value not statically known */
8156 		return -EOPNOTSUPP;
8157 
8158 	/* We are relying on a constant value. So mark as precise
8159 	 * to prevent pruning on it.
8160 	 */
8161 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8162 	err = mark_chain_precision_batch(env, env->cur_state);
8163 	if (err < 0)
8164 		return err;
8165 
8166 	*value = reg->var_off.value;
8167 	return 0;
8168 }
8169 
8170 static bool can_elide_value_nullness(enum bpf_map_type type);
8171 
8172 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8173 			  struct bpf_call_arg_meta *meta,
8174 			  const struct bpf_func_proto *fn,
8175 			  int insn_idx)
8176 {
8177 	u32 regno = BPF_REG_1 + arg;
8178 	struct bpf_reg_state *reg = reg_state(env, regno);
8179 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8180 	argno_t argno = argno_from_arg(arg + 1);
8181 	enum bpf_reg_type type = reg->type;
8182 	u32 *arg_btf_id = NULL;
8183 	u32 key_size;
8184 	int err = 0;
8185 
8186 	if (arg_type == ARG_DONTCARE)
8187 		return 0;
8188 
8189 	err = check_reg_arg(env, regno, SRC_OP);
8190 	if (err)
8191 		return err;
8192 
8193 	if (arg_type == ARG_ANYTHING) {
8194 		if (is_pointer_value(env, regno)) {
8195 			verbose(env, "R%d leaks addr into helper function\n",
8196 				regno);
8197 			return -EACCES;
8198 		}
8199 		return 0;
8200 	}
8201 
8202 	if (type_is_pkt_pointer(type) &&
8203 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8204 		verbose(env, "helper access to the packet is not allowed\n");
8205 		return -EACCES;
8206 	}
8207 
8208 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8209 		err = resolve_map_arg_type(env, meta, &arg_type);
8210 		if (err)
8211 			return err;
8212 	}
8213 
8214 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8215 		/* A NULL register has a SCALAR_VALUE type, so skip
8216 		 * type checking.
8217 		 */
8218 		goto skip_type_check;
8219 
8220 	/* arg_btf_id and arg_size are in a union. */
8221 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8222 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8223 		arg_btf_id = fn->arg_btf_id[arg];
8224 
8225 	err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta);
8226 	if (err)
8227 		return err;
8228 
8229 	err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type);
8230 	if (err)
8231 		return err;
8232 
8233 skip_type_check:
8234 	if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) &&
8235 	    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
8236 		verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n",
8237 			func_id_name(meta->func_id), reg_arg_name(env, argno));
8238 		return -EINVAL;
8239 	}
8240 
8241 	if (reg_is_referenced(env, reg))
8242 		update_ref_obj(&meta->ref_obj, reg);
8243 
8244 	switch (base_type(arg_type)) {
8245 	case ARG_CONST_MAP_PTR:
8246 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8247 		if (meta->map.ptr) {
8248 			/* Use map_uid (which is unique id of inner map) to reject:
8249 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8250 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8251 			 * if (inner_map1 && inner_map2) {
8252 			 *     timer = bpf_map_lookup_elem(inner_map1);
8253 			 *     if (timer)
8254 			 *         // mismatch would have been allowed
8255 			 *         bpf_timer_init(timer, inner_map2);
8256 			 * }
8257 			 *
8258 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8259 			 */
8260 			if (meta->map.ptr != reg->map_ptr ||
8261 			    meta->map.uid != reg->map_uid) {
8262 				verbose(env,
8263 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8264 					meta->map.uid, reg->map_uid);
8265 				return -EINVAL;
8266 			}
8267 		}
8268 		meta->map.ptr = reg->map_ptr;
8269 		meta->map.uid = reg->map_uid;
8270 		break;
8271 	case ARG_PTR_TO_MAP_KEY:
8272 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8273 		 * check that [key, key + map->key_size) are within
8274 		 * stack limits and initialized
8275 		 */
8276 		if (!meta->map.ptr) {
8277 			/* in function declaration map_ptr must come before
8278 			 * map_key, so that it's verified and known before
8279 			 * we have to check map_key here. Otherwise it means
8280 			 * that kernel subsystem misconfigured verifier
8281 			 */
8282 			verifier_bug(env, "invalid map_ptr to access map->key");
8283 			return -EFAULT;
8284 		}
8285 		key_size = meta->map.ptr->key_size;
8286 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL);
8287 		if (err)
8288 			return err;
8289 		if (can_elide_value_nullness(meta->map.ptr->map_type)) {
8290 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8291 			if (err < 0) {
8292 				meta->const_map_key = -1;
8293 				if (err == -EOPNOTSUPP)
8294 					err = 0;
8295 				else
8296 					return err;
8297 			}
8298 		}
8299 		break;
8300 	case ARG_PTR_TO_MAP_VALUE:
8301 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8302 			return 0;
8303 
8304 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8305 		 * check [value, value + map->value_size) validity
8306 		 */
8307 		if (!meta->map.ptr) {
8308 			/* kernel subsystem misconfigured verifier */
8309 			verifier_bug(env, "invalid map_ptr to access map->value");
8310 			return -EFAULT;
8311 		}
8312 		meta->raw_mode = arg_type & MEM_UNINIT;
8313 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size,
8314 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8315 					      false, meta);
8316 		break;
8317 	case ARG_PTR_TO_PERCPU_BTF_ID:
8318 		if (!reg->btf_id) {
8319 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8320 			return -EACCES;
8321 		}
8322 		meta->ret_btf = reg->btf;
8323 		meta->ret_btf_id = reg->btf_id;
8324 		break;
8325 	case ARG_PTR_TO_SPIN_LOCK:
8326 		if (in_rbtree_lock_required_cb(env)) {
8327 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8328 			return -EACCES;
8329 		}
8330 		if (meta->func_id == BPF_FUNC_spin_lock) {
8331 			err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK);
8332 			if (err)
8333 				return err;
8334 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8335 			err = process_spin_lock(env, reg, argno_from_reg(regno), 0);
8336 			if (err)
8337 				return err;
8338 		} else {
8339 			verifier_bug(env, "spin lock arg on unexpected helper");
8340 			return -EFAULT;
8341 		}
8342 		break;
8343 	case ARG_PTR_TO_TIMER:
8344 		err = process_timer_helper(env, reg, argno_from_reg(regno), meta);
8345 		if (err)
8346 			return err;
8347 		break;
8348 	case ARG_PTR_TO_FUNC:
8349 		meta->subprogno = reg->subprogno;
8350 		break;
8351 	case ARG_PTR_TO_MEM:
8352 		/* The access to this pointer is only checked when we hit the
8353 		 * next is_mem_size argument below.
8354 		 */
8355 		meta->raw_mode = arg_type & MEM_UNINIT;
8356 		if (arg_type & MEM_FIXED_SIZE) {
8357 			err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg],
8358 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8359 						      false, meta);
8360 			if (err)
8361 				return err;
8362 			if (arg_type & MEM_ALIGNED)
8363 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8364 		}
8365 		break;
8366 	case ARG_CONST_SIZE:
8367 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8368 					 argno_from_reg(regno),
8369 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8370 					 BPF_WRITE : BPF_READ,
8371 					 false, meta);
8372 		break;
8373 	case ARG_CONST_SIZE_OR_ZERO:
8374 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8375 					 argno_from_reg(regno),
8376 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8377 					 BPF_WRITE : BPF_READ,
8378 					 true, meta);
8379 		break;
8380 	case ARG_PTR_TO_DYNPTR:
8381 		err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj,
8382 					  &meta->dynptr);
8383 		if (err)
8384 			return err;
8385 		break;
8386 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8387 		if (!tnum_is_const(reg->var_off)) {
8388 			verbose(env, "R%d is not a known constant'\n",
8389 				regno);
8390 			return -EACCES;
8391 		}
8392 		meta->mem_size = reg->var_off.value;
8393 		err = mark_chain_precision(env, regno);
8394 		if (err)
8395 			return err;
8396 		break;
8397 	case ARG_PTR_TO_CONST_STR:
8398 	{
8399 		err = check_arg_const_str(env, reg, argno_from_reg(regno));
8400 		if (err)
8401 			return err;
8402 		break;
8403 	}
8404 	case ARG_KPTR_XCHG_DEST:
8405 		err = process_kptr_func(env, regno, meta);
8406 		if (err)
8407 			return err;
8408 		break;
8409 	}
8410 
8411 	return err;
8412 }
8413 
8414 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8415 {
8416 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8417 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8418 
8419 	if (func_id != BPF_FUNC_map_update_elem &&
8420 	    func_id != BPF_FUNC_map_delete_elem)
8421 		return false;
8422 
8423 	/* It's not possible to get access to a locked struct sock in these
8424 	 * contexts, so updating is safe.
8425 	 */
8426 	switch (type) {
8427 	case BPF_PROG_TYPE_TRACING:
8428 		if (eatype == BPF_TRACE_ITER)
8429 			return true;
8430 		break;
8431 	case BPF_PROG_TYPE_SOCK_OPS:
8432 		/* map_update allowed only via dedicated helpers with event type checks */
8433 		if (func_id == BPF_FUNC_map_delete_elem)
8434 			return true;
8435 		break;
8436 	case BPF_PROG_TYPE_SOCKET_FILTER:
8437 	case BPF_PROG_TYPE_SCHED_CLS:
8438 	case BPF_PROG_TYPE_SCHED_ACT:
8439 	case BPF_PROG_TYPE_XDP:
8440 	case BPF_PROG_TYPE_SK_REUSEPORT:
8441 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8442 	case BPF_PROG_TYPE_SK_LOOKUP:
8443 		return true;
8444 	default:
8445 		break;
8446 	}
8447 
8448 	verbose(env, "cannot update sockmap in this context\n");
8449 	return false;
8450 }
8451 
8452 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8453 {
8454 	return env->prog->jit_requested &&
8455 	       bpf_jit_supports_subprog_tailcalls();
8456 }
8457 
8458 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8459 					struct bpf_map *map, int func_id)
8460 {
8461 	if (!map)
8462 		return 0;
8463 
8464 	/* We need a two way check, first is from map perspective ... */
8465 	switch (map->map_type) {
8466 	case BPF_MAP_TYPE_PROG_ARRAY:
8467 		if (func_id != BPF_FUNC_tail_call)
8468 			goto error;
8469 		break;
8470 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8471 		if (func_id != BPF_FUNC_perf_event_read &&
8472 		    func_id != BPF_FUNC_perf_event_output &&
8473 		    func_id != BPF_FUNC_skb_output &&
8474 		    func_id != BPF_FUNC_perf_event_read_value &&
8475 		    func_id != BPF_FUNC_xdp_output)
8476 			goto error;
8477 		break;
8478 	case BPF_MAP_TYPE_RINGBUF:
8479 		if (func_id != BPF_FUNC_ringbuf_output &&
8480 		    func_id != BPF_FUNC_ringbuf_reserve &&
8481 		    func_id != BPF_FUNC_ringbuf_query &&
8482 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8483 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8484 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8485 			goto error;
8486 		break;
8487 	case BPF_MAP_TYPE_USER_RINGBUF:
8488 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8489 			goto error;
8490 		break;
8491 	case BPF_MAP_TYPE_STACK_TRACE:
8492 		if (func_id != BPF_FUNC_get_stackid)
8493 			goto error;
8494 		break;
8495 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8496 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8497 		    func_id != BPF_FUNC_current_task_under_cgroup)
8498 			goto error;
8499 		break;
8500 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8501 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8502 		if (func_id != BPF_FUNC_get_local_storage)
8503 			goto error;
8504 		break;
8505 	case BPF_MAP_TYPE_DEVMAP:
8506 	case BPF_MAP_TYPE_DEVMAP_HASH:
8507 		if (func_id != BPF_FUNC_redirect_map &&
8508 		    func_id != BPF_FUNC_map_lookup_elem)
8509 			goto error;
8510 		break;
8511 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8512 	 * appear.
8513 	 */
8514 	case BPF_MAP_TYPE_CPUMAP:
8515 		if (func_id != BPF_FUNC_redirect_map)
8516 			goto error;
8517 		break;
8518 	case BPF_MAP_TYPE_XSKMAP:
8519 		if (func_id != BPF_FUNC_redirect_map &&
8520 		    func_id != BPF_FUNC_map_lookup_elem)
8521 			goto error;
8522 		break;
8523 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8524 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8525 		if (func_id != BPF_FUNC_map_lookup_elem)
8526 			goto error;
8527 		break;
8528 	case BPF_MAP_TYPE_SOCKMAP:
8529 		if (func_id != BPF_FUNC_sk_redirect_map &&
8530 		    func_id != BPF_FUNC_sock_map_update &&
8531 		    func_id != BPF_FUNC_msg_redirect_map &&
8532 		    func_id != BPF_FUNC_sk_select_reuseport &&
8533 		    func_id != BPF_FUNC_map_lookup_elem &&
8534 		    !may_update_sockmap(env, func_id))
8535 			goto error;
8536 		break;
8537 	case BPF_MAP_TYPE_SOCKHASH:
8538 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8539 		    func_id != BPF_FUNC_sock_hash_update &&
8540 		    func_id != BPF_FUNC_msg_redirect_hash &&
8541 		    func_id != BPF_FUNC_sk_select_reuseport &&
8542 		    func_id != BPF_FUNC_map_lookup_elem &&
8543 		    !may_update_sockmap(env, func_id))
8544 			goto error;
8545 		break;
8546 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8547 		if (func_id != BPF_FUNC_sk_select_reuseport)
8548 			goto error;
8549 		break;
8550 	case BPF_MAP_TYPE_QUEUE:
8551 	case BPF_MAP_TYPE_STACK:
8552 		if (func_id != BPF_FUNC_map_peek_elem &&
8553 		    func_id != BPF_FUNC_map_pop_elem &&
8554 		    func_id != BPF_FUNC_map_push_elem)
8555 			goto error;
8556 		break;
8557 	case BPF_MAP_TYPE_SK_STORAGE:
8558 		if (func_id != BPF_FUNC_sk_storage_get &&
8559 		    func_id != BPF_FUNC_sk_storage_delete &&
8560 		    func_id != BPF_FUNC_kptr_xchg)
8561 			goto error;
8562 		break;
8563 	case BPF_MAP_TYPE_INODE_STORAGE:
8564 		if (func_id != BPF_FUNC_inode_storage_get &&
8565 		    func_id != BPF_FUNC_inode_storage_delete &&
8566 		    func_id != BPF_FUNC_kptr_xchg)
8567 			goto error;
8568 		break;
8569 	case BPF_MAP_TYPE_TASK_STORAGE:
8570 		if (func_id != BPF_FUNC_task_storage_get &&
8571 		    func_id != BPF_FUNC_task_storage_delete &&
8572 		    func_id != BPF_FUNC_kptr_xchg)
8573 			goto error;
8574 		break;
8575 	case BPF_MAP_TYPE_CGRP_STORAGE:
8576 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8577 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8578 		    func_id != BPF_FUNC_kptr_xchg)
8579 			goto error;
8580 		break;
8581 	case BPF_MAP_TYPE_BLOOM_FILTER:
8582 		if (func_id != BPF_FUNC_map_peek_elem &&
8583 		    func_id != BPF_FUNC_map_push_elem)
8584 			goto error;
8585 		break;
8586 	case BPF_MAP_TYPE_INSN_ARRAY:
8587 		goto error;
8588 	default:
8589 		break;
8590 	}
8591 
8592 	/* ... and second from the function itself. */
8593 	switch (func_id) {
8594 	case BPF_FUNC_tail_call:
8595 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8596 			goto error;
8597 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8598 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8599 			return -EINVAL;
8600 		}
8601 		break;
8602 	case BPF_FUNC_perf_event_read:
8603 	case BPF_FUNC_perf_event_output:
8604 	case BPF_FUNC_perf_event_read_value:
8605 	case BPF_FUNC_skb_output:
8606 	case BPF_FUNC_xdp_output:
8607 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8608 			goto error;
8609 		break;
8610 	case BPF_FUNC_ringbuf_output:
8611 	case BPF_FUNC_ringbuf_reserve:
8612 	case BPF_FUNC_ringbuf_query:
8613 	case BPF_FUNC_ringbuf_reserve_dynptr:
8614 	case BPF_FUNC_ringbuf_submit_dynptr:
8615 	case BPF_FUNC_ringbuf_discard_dynptr:
8616 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8617 			goto error;
8618 		break;
8619 	case BPF_FUNC_user_ringbuf_drain:
8620 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8621 			goto error;
8622 		break;
8623 	case BPF_FUNC_get_stackid:
8624 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8625 			goto error;
8626 		break;
8627 	case BPF_FUNC_current_task_under_cgroup:
8628 	case BPF_FUNC_skb_under_cgroup:
8629 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8630 			goto error;
8631 		break;
8632 	case BPF_FUNC_redirect_map:
8633 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8634 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8635 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8636 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8637 			goto error;
8638 		break;
8639 	case BPF_FUNC_sk_redirect_map:
8640 	case BPF_FUNC_msg_redirect_map:
8641 	case BPF_FUNC_sock_map_update:
8642 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8643 			goto error;
8644 		break;
8645 	case BPF_FUNC_sk_redirect_hash:
8646 	case BPF_FUNC_msg_redirect_hash:
8647 	case BPF_FUNC_sock_hash_update:
8648 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8649 			goto error;
8650 		break;
8651 	case BPF_FUNC_get_local_storage:
8652 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8653 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8654 			goto error;
8655 		break;
8656 	case BPF_FUNC_sk_select_reuseport:
8657 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8658 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8659 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8660 			goto error;
8661 		break;
8662 	case BPF_FUNC_map_pop_elem:
8663 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8664 		    map->map_type != BPF_MAP_TYPE_STACK)
8665 			goto error;
8666 		break;
8667 	case BPF_FUNC_map_peek_elem:
8668 	case BPF_FUNC_map_push_elem:
8669 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8670 		    map->map_type != BPF_MAP_TYPE_STACK &&
8671 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8672 			goto error;
8673 		break;
8674 	case BPF_FUNC_map_lookup_percpu_elem:
8675 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8676 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8677 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8678 			goto error;
8679 		break;
8680 	case BPF_FUNC_sk_storage_get:
8681 	case BPF_FUNC_sk_storage_delete:
8682 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8683 			goto error;
8684 		break;
8685 	case BPF_FUNC_inode_storage_get:
8686 	case BPF_FUNC_inode_storage_delete:
8687 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8688 			goto error;
8689 		break;
8690 	case BPF_FUNC_task_storage_get:
8691 	case BPF_FUNC_task_storage_delete:
8692 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8693 			goto error;
8694 		break;
8695 	case BPF_FUNC_cgrp_storage_get:
8696 	case BPF_FUNC_cgrp_storage_delete:
8697 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8698 			goto error;
8699 		break;
8700 	default:
8701 		break;
8702 	}
8703 
8704 	return 0;
8705 error:
8706 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
8707 		map->map_type, func_id_name(func_id), func_id);
8708 	return -EINVAL;
8709 }
8710 
8711 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8712 {
8713 	int count = 0;
8714 
8715 	if (arg_type_is_raw_mem(fn->arg1_type))
8716 		count++;
8717 	if (arg_type_is_raw_mem(fn->arg2_type))
8718 		count++;
8719 	if (arg_type_is_raw_mem(fn->arg3_type))
8720 		count++;
8721 	if (arg_type_is_raw_mem(fn->arg4_type))
8722 		count++;
8723 	if (arg_type_is_raw_mem(fn->arg5_type))
8724 		count++;
8725 
8726 	/* We only support one arg being in raw mode at the moment,
8727 	 * which is sufficient for the helper functions we have
8728 	 * right now.
8729 	 */
8730 	return count <= 1;
8731 }
8732 
8733 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
8734 {
8735 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
8736 	bool has_size = fn->arg_size[arg] != 0;
8737 	bool is_next_size = false;
8738 
8739 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
8740 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
8741 
8742 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
8743 		return is_next_size;
8744 
8745 	return has_size == is_next_size || is_next_size == is_fixed;
8746 }
8747 
8748 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
8749 {
8750 	/* bpf_xxx(..., buf, len) call will access 'len'
8751 	 * bytes from memory 'buf'. Both arg types need
8752 	 * to be paired, so make sure there's no buggy
8753 	 * helper function specification.
8754 	 */
8755 	if (arg_type_is_mem_size(fn->arg1_type) ||
8756 	    check_args_pair_invalid(fn, 0) ||
8757 	    check_args_pair_invalid(fn, 1) ||
8758 	    check_args_pair_invalid(fn, 2) ||
8759 	    check_args_pair_invalid(fn, 3) ||
8760 	    check_args_pair_invalid(fn, 4))
8761 		return false;
8762 
8763 	return true;
8764 }
8765 
8766 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8767 {
8768 	int i;
8769 
8770 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8771 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8772 			return !!fn->arg_btf_id[i];
8773 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8774 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8775 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8776 		    /* arg_btf_id and arg_size are in a union. */
8777 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8778 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8779 			return false;
8780 	}
8781 
8782 	return true;
8783 }
8784 
8785 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
8786 {
8787 	int i;
8788 
8789 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8790 		enum bpf_arg_type arg_type = fn->arg_type[i];
8791 
8792 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
8793 			continue;
8794 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
8795 			return false;
8796 	}
8797 
8798 	return true;
8799 }
8800 
8801 static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8802 {
8803 	int i;
8804 
8805 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8806 		enum bpf_arg_type arg_type = fn->arg_type[i];
8807 
8808 		if (arg_type_is_release(arg_type)) {
8809 			if (meta->release_regno)
8810 				return false;
8811 			meta->release_regno = i + 1;
8812 		}
8813 	}
8814 
8815 	return true;
8816 }
8817 
8818 static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8819 {
8820 	return check_raw_mode_ok(fn) &&
8821 	       check_arg_pair_ok(fn) &&
8822 	       check_mem_arg_rw_flag_ok(fn) &&
8823 	       check_proto_release_reg(fn, meta) &&
8824 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8825 }
8826 
8827 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8828  * are now invalid, so turn them into unknown SCALAR_VALUE.
8829  *
8830  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8831  * since these slices point to packet data.
8832  */
8833 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8834 {
8835 	struct bpf_func_state *state;
8836 	struct bpf_reg_state *reg;
8837 
8838 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8839 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8840 			mark_reg_invalid(env, reg);
8841 	}));
8842 }
8843 
8844 enum {
8845 	AT_PKT_END = -1,
8846 	BEYOND_PKT_END = -2,
8847 };
8848 
8849 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8850 {
8851 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8852 	struct bpf_reg_state *reg = &state->regs[regn];
8853 
8854 	if (reg->type != PTR_TO_PACKET)
8855 		/* PTR_TO_PACKET_META is not supported yet */
8856 		return;
8857 
8858 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8859 	 * How far beyond pkt_end it goes is unknown.
8860 	 * if (!range_open) it's the case of pkt >= pkt_end
8861 	 * if (range_open) it's the case of pkt > pkt_end
8862 	 * hence this pointer is at least 1 byte bigger than pkt_end
8863 	 */
8864 	if (range_open)
8865 		reg->range = BEYOND_PKT_END;
8866 	else
8867 		reg->range = AT_PKT_END;
8868 }
8869 
8870 static int release_reference_nomark(struct bpf_verifier_state *state, int id)
8871 {
8872 	int i;
8873 
8874 	for (i = 0; i < state->acquired_refs; i++) {
8875 		if (state->refs[i].type != REF_TYPE_PTR)
8876 			continue;
8877 		if (state->refs[i].id == id) {
8878 			release_reference_state(state, i);
8879 			return 0;
8880 		}
8881 	}
8882 	return -EINVAL;
8883 }
8884 
8885 static int idstack_push(struct bpf_idmap *idmap, u32 id)
8886 {
8887 	int i;
8888 
8889 	if (!id)
8890 		return 0;
8891 
8892 	for (i = 0; i < idmap->cnt; i++)
8893 		if (idmap->map[i].old == id)
8894 			return 0;
8895 
8896 	if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE))
8897 		return -EFAULT;
8898 
8899 	idmap->map[idmap->cnt++].old = id;
8900 	return 0;
8901 }
8902 
8903 static int idstack_pop(struct bpf_idmap *idmap)
8904 {
8905 	if (!idmap->cnt)
8906 		return 0;
8907 
8908 	return idmap->map[--idmap->cnt].old;
8909 }
8910 
8911 /* Release id and objects derived from it iteratively in a DFS manner */
8912 static int release_reference(struct bpf_verifier_env *env, int id)
8913 {
8914 	u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR);
8915 	struct bpf_verifier_state *vstate = env->cur_state;
8916 	struct bpf_idmap *idstack = &env->idmap_scratch;
8917 	struct bpf_stack_state *stack;
8918 	struct bpf_func_state *state;
8919 	struct bpf_reg_state *reg;
8920 	int i, err;
8921 
8922 	idstack->cnt = 0;
8923 	err = idstack_push(idstack, id);
8924 	if (err)
8925 		return err;
8926 
8927 	if (find_reference_state(vstate, id))
8928 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
8929 
8930 	while ((id = idstack_pop(idstack))) {
8931 		/*
8932 		 * Child references are inaccessible after parent is released,
8933 		 * any child references that exist at this point are a leak.
8934 		 */
8935 		for (i = 0; i < vstate->acquired_refs; i++) {
8936 			if (vstate->refs[i].type != REF_TYPE_PTR)
8937 				continue;
8938 			if (vstate->refs[i].parent_id != id)
8939 				continue;
8940 			verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n",
8941 				vstate->refs[i].id, vstate->refs[i].insn_idx);
8942 			return -EINVAL;
8943 		}
8944 
8945 		bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({
8946 			if (reg->id != id && reg->parent_id != id)
8947 				continue;
8948 
8949 			/* Free objects derived from the current object */
8950 			if (reg->parent_id == id) {
8951 				err = idstack_push(idstack, reg->id);
8952 				if (err)
8953 					return err;
8954 			}
8955 
8956 			if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL)
8957 				mark_reg_invalid(env, reg);
8958 			else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR)
8959 				invalidate_dynptr(env, stack);
8960 		}));
8961 	}
8962 
8963 	return 0;
8964 }
8965 
8966 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8967 {
8968 	struct bpf_func_state *unused;
8969 	struct bpf_reg_state *reg;
8970 
8971 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8972 		if (type_is_non_owning_ref(reg->type))
8973 			mark_reg_invalid(env, reg);
8974 	}));
8975 }
8976 
8977 static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env)
8978 {
8979 	struct bpf_stack_state *stack;
8980 	struct bpf_func_state *state;
8981 	struct bpf_reg_state *reg;
8982 	u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
8983 
8984 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({
8985 		if (reg->type & MEM_RCU) {
8986 			reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
8987 			reg->type |= PTR_UNTRUSTED;
8988 		}
8989 	}));
8990 }
8991 
8992 static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id)
8993 {
8994 	struct bpf_func_state *state;
8995 	struct bpf_reg_state *reg;
8996 	int err;
8997 
8998 	err = release_reference_nomark(env->cur_state, id);
8999 
9000 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9001 		if (reg->id != id)
9002 			continue;
9003 		if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
9004 			reg->id = 0;
9005 			reg->type &= ~MEM_ALLOC;
9006 			reg->type |= MEM_RCU;
9007 		}
9008 	}));
9009 
9010 	return err;
9011 }
9012 
9013 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9014 				    struct bpf_reg_state *regs)
9015 {
9016 	int i;
9017 
9018 	/* after the call registers r0 - r5 were scratched */
9019 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9020 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9021 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9022 	}
9023 }
9024 
9025 static void invalidate_outgoing_stack_args(const struct bpf_verifier_env *env,
9026 					   struct bpf_func_state *state)
9027 {
9028 	int i, nslots = state->out_stack_arg_cnt;
9029 
9030 	for (i = 0; i < nslots; i++)
9031 		bpf_mark_reg_not_init(env, &state->stack_arg_regs[i]);
9032 }
9033 
9034 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9035 				   struct bpf_func_state *caller,
9036 				   struct bpf_func_state *callee,
9037 				   int insn_idx);
9038 
9039 static int set_callee_state(struct bpf_verifier_env *env,
9040 			    struct bpf_func_state *caller,
9041 			    struct bpf_func_state *callee, int insn_idx);
9042 
9043 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9044 			    set_callee_state_fn set_callee_state_cb,
9045 			    struct bpf_verifier_state *state)
9046 {
9047 	struct bpf_func_state *caller, *callee;
9048 	int err;
9049 
9050 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9051 		verbose(env, "the call stack of %d frames is too deep\n",
9052 			state->curframe + 2);
9053 		return -E2BIG;
9054 	}
9055 
9056 	if (state->frame[state->curframe + 1]) {
9057 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9058 		return -EFAULT;
9059 	}
9060 
9061 	caller = state->frame[state->curframe];
9062 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9063 	if (!callee)
9064 		return -ENOMEM;
9065 	state->frame[state->curframe + 1] = callee;
9066 
9067 	/* callee cannot access r0, r6 - r9 for reading and has to write
9068 	 * into its own stack before reading from it.
9069 	 * callee can read/write into caller's stack
9070 	 */
9071 	init_func_state(env, callee,
9072 			/* remember the callsite, it will be used by bpf_exit */
9073 			callsite,
9074 			state->curframe + 1 /* frameno within this callchain */,
9075 			subprog /* subprog number within this prog */);
9076 	err = set_callee_state_cb(env, caller, callee, callsite);
9077 	if (err)
9078 		goto err_out;
9079 
9080 	/* only increment it after check_reg_arg() finished */
9081 	state->curframe++;
9082 
9083 	return 0;
9084 
9085 err_out:
9086 	free_func_state(callee);
9087 	state->frame[state->curframe + 1] = NULL;
9088 	return err;
9089 }
9090 
9091 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9092 				    const struct btf *btf,
9093 				    struct bpf_reg_state *regs)
9094 {
9095 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9096 	struct bpf_func_state *caller = cur_func(env);
9097 	struct bpf_verifier_log *log = &env->log;
9098 	struct ref_obj_desc ref_obj = {};
9099 	u32 i;
9100 	int ret, err;
9101 
9102 	ret = btf_prepare_func_args(env, subprog);
9103 	if (ret) {
9104 		if (bpf_in_stack_arg_cnt(sub) > 0) {
9105 			err = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9106 			if (err)
9107 				return err;
9108 		}
9109 		return ret;
9110 	}
9111 
9112 	ret = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9113 	if (ret)
9114 		return ret;
9115 
9116 	/* check that BTF function arguments match actual types that the
9117 	 * verifier sees.
9118 	 */
9119 	for (i = 0; i < sub->arg_cnt; i++) {
9120 		argno_t argno = argno_from_arg(i + 1);
9121 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
9122 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9123 
9124 		if (arg->arg_type == ARG_ANYTHING) {
9125 			if (reg->type != SCALAR_VALUE) {
9126 				bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno));
9127 				return -EINVAL;
9128 			}
9129 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9130 			/*
9131 			 * Anything is allowed for untrusted arguments, as these are
9132 			 * read-only and probe read instructions would protect against
9133 			 * invalid memory access.
9134 			 */
9135 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9136 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX);
9137 			if (ret < 0)
9138 				return ret;
9139 			/* If function expects ctx type in BTF check that caller
9140 			 * is passing PTR_TO_CTX.
9141 			 */
9142 			if (reg->type != PTR_TO_CTX) {
9143 				bpf_log(log, "%s expects pointer to ctx\n",
9144 					reg_arg_name(env, argno));
9145 				return -EINVAL;
9146 			}
9147 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9148 			ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE);
9149 			if (ret < 0)
9150 				return ret;
9151 			if (check_mem_reg(env, reg, argno, arg->mem_size))
9152 				return -EINVAL;
9153 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9154 				bpf_log(log, "%s is expected to be non-NULL\n",
9155 					reg_arg_name(env, argno));
9156 				return -EINVAL;
9157 			}
9158 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9159 			/*
9160 			 * Can pass any value and the kernel won't crash, but
9161 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9162 			 * else is a bug in the bpf program. Point it out to
9163 			 * the user at the verification time instead of
9164 			 * run-time debug nightmare.
9165 			 */
9166 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9167 				bpf_log(log, "%s is not a pointer to arena or scalar.\n",
9168 					reg_arg_name(env, argno));
9169 				return -EINVAL;
9170 			}
9171 		} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
9172 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR);
9173 			if (ret)
9174 				return ret;
9175 
9176 			ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL);
9177 			if (ret)
9178 				return ret;
9179 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9180 			struct bpf_call_arg_meta meta;
9181 			int err;
9182 
9183 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9184 				continue;
9185 
9186 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9187 			err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta);
9188 			err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type);
9189 			if (err)
9190 				return err;
9191 		} else {
9192 			verifier_bug(env, "unrecognized %s type %d",
9193 				     reg_arg_name(env, argno), arg->arg_type);
9194 			return -EFAULT;
9195 		}
9196 	}
9197 
9198 	return 0;
9199 }
9200 
9201 /* Compare BTF of a function call with given bpf_reg_state.
9202  * Returns:
9203  * EFAULT - there is a verifier bug. Abort verification.
9204  * EINVAL - there is a type mismatch or BTF is not available.
9205  * 0 - BTF matches with what bpf_reg_state expects.
9206  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9207  */
9208 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9209 				  struct bpf_reg_state *regs)
9210 {
9211 	struct bpf_prog *prog = env->prog;
9212 	struct btf *btf = prog->aux->btf;
9213 	u32 btf_id;
9214 	int err;
9215 
9216 	if (!prog->aux->func_info)
9217 		return -EINVAL;
9218 
9219 	btf_id = prog->aux->func_info[subprog].type_id;
9220 	if (!btf_id)
9221 		return -EFAULT;
9222 
9223 	if (prog->aux->func_info_aux[subprog].unreliable)
9224 		return -EINVAL;
9225 
9226 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9227 	/* Compiler optimizations can remove arguments from static functions
9228 	 * or mismatched type can be passed into a global function.
9229 	 * In such cases mark the function as unreliable from BTF point of view.
9230 	 */
9231 	if (err)
9232 		prog->aux->func_info_aux[subprog].unreliable = true;
9233 	return err;
9234 }
9235 
9236 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9237 			      int insn_idx, int subprog,
9238 			      set_callee_state_fn set_callee_state_cb)
9239 {
9240 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9241 	struct bpf_func_state *caller, *callee;
9242 	int err;
9243 
9244 	caller = state->frame[state->curframe];
9245 	err = btf_check_subprog_call(env, subprog, caller->regs);
9246 	if (err == -EFAULT)
9247 		return err;
9248 
9249 	/* set_callee_state is used for direct subprog calls, but we are
9250 	 * interested in validating only BPF helpers that can call subprogs as
9251 	 * callbacks
9252 	 */
9253 	env->subprog_info[subprog].is_cb = true;
9254 	if (bpf_pseudo_kfunc_call(insn) &&
9255 	    !is_callback_calling_kfunc(insn->imm)) {
9256 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9257 			     func_id_name(insn->imm), insn->imm);
9258 		return -EFAULT;
9259 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9260 		   !is_callback_calling_function(insn->imm)) { /* helper */
9261 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9262 			     func_id_name(insn->imm), insn->imm);
9263 		return -EFAULT;
9264 	}
9265 
9266 	if (bpf_is_async_callback_calling_insn(insn)) {
9267 		struct bpf_verifier_state *async_cb;
9268 
9269 		/* there is no real recursion here. timer and workqueue callbacks are async */
9270 		env->subprog_info[subprog].is_async_cb = true;
9271 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9272 					 insn_idx, subprog,
9273 					 is_async_cb_sleepable(env, insn));
9274 		if (IS_ERR(async_cb))
9275 			return PTR_ERR(async_cb);
9276 		callee = async_cb->frame[0];
9277 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9278 
9279 		/* Convert bpf_timer_set_callback() args into timer callback args */
9280 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9281 		if (err)
9282 			return err;
9283 
9284 		return 0;
9285 	}
9286 
9287 	/* for callback functions enqueue entry to callback and
9288 	 * proceed with next instruction within current frame.
9289 	 */
9290 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9291 	if (IS_ERR(callback_state))
9292 		return PTR_ERR(callback_state);
9293 
9294 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9295 			       callback_state);
9296 	if (err)
9297 		return err;
9298 
9299 	callback_state->callback_unroll_depth++;
9300 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9301 	caller->callback_depth = 0;
9302 	return 0;
9303 }
9304 
9305 static int process_bpf_exit_full(struct bpf_verifier_env *env,
9306 				 bool *do_print_state, bool exception_exit);
9307 
9308 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9309 			   int *insn_idx)
9310 {
9311 	struct bpf_verifier_state *state = env->cur_state;
9312 	struct bpf_subprog_info *caller_info;
9313 	u16 callee_incoming, stack_arg_cnt;
9314 	struct bpf_func_state *caller;
9315 	int err, subprog, target_insn;
9316 
9317 	target_insn = *insn_idx + insn->imm + 1;
9318 	subprog = bpf_find_subprog(env, target_insn);
9319 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9320 			    target_insn))
9321 		return -EFAULT;
9322 
9323 	caller = state->frame[state->curframe];
9324 	err = btf_check_subprog_call(env, subprog, caller->regs);
9325 	if (err == -EFAULT)
9326 		return err;
9327 	if (bpf_subprog_is_global(env, subprog)) {
9328 		const char *sub_name = subprog_name(env, subprog);
9329 
9330 		if (env->cur_state->active_locks) {
9331 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9332 				     "use static function instead\n");
9333 			return -EINVAL;
9334 		}
9335 
9336 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9337 			verbose(env, "sleepable global function %s() called in %s\n",
9338 				sub_name, non_sleepable_context_description(env));
9339 			return -EINVAL;
9340 		}
9341 
9342 		if (err) {
9343 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9344 				subprog, sub_name);
9345 			return err;
9346 		}
9347 
9348 		if (env->log.level & BPF_LOG_LEVEL)
9349 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9350 				subprog, sub_name);
9351 		if (env->subprog_info[subprog].changes_pkt_data)
9352 			clear_all_pkt_pointers(env);
9353 		/* mark global subprog for verifying after main prog */
9354 		subprog_aux(env, subprog)->called = true;
9355 		clear_caller_saved_regs(env, caller->regs);
9356 		invalidate_outgoing_stack_args(env, cur_func(env));
9357 
9358 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9359 		if (!subprog_returns_void(env, subprog)) {
9360 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9361 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9362 		}
9363 
9364 		if (env->subprog_info[subprog].might_throw) {
9365 			struct bpf_verifier_state *branch;
9366 
9367 			branch = push_stack(env, *insn_idx + 1, *insn_idx, false);
9368 			if (IS_ERR(branch)) {
9369 				verbose(env, "failed to push state for global subprog exception path\n");
9370 				return PTR_ERR(branch);
9371 			}
9372 			return process_bpf_exit_full(env, NULL, true);
9373 		}
9374 
9375 		/* continue with next insn after call */
9376 		return 0;
9377 	}
9378 
9379 	/*
9380 	 * Track caller's total stack arg count (incoming + max outgoing).
9381 	 * This is needed so the JIT knows how much stack arg space to allocate.
9382 	 */
9383 	caller_info = &env->subprog_info[caller->subprogno];
9384 	callee_incoming = bpf_in_stack_arg_cnt(&env->subprog_info[subprog]);
9385 	stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + callee_incoming;
9386 	if (stack_arg_cnt > caller_info->stack_arg_cnt)
9387 		caller_info->stack_arg_cnt = stack_arg_cnt;
9388 
9389 	/* for regular function entry setup new frame and continue
9390 	 * from that frame.
9391 	 */
9392 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9393 	if (err)
9394 		return err;
9395 
9396 	clear_caller_saved_regs(env, caller->regs);
9397 
9398 	/* and go analyze first insn of the callee */
9399 	*insn_idx = env->subprog_info[subprog].start - 1;
9400 
9401 	if (env->log.level & BPF_LOG_LEVEL) {
9402 		verbose(env, "caller:\n");
9403 		print_verifier_state(env, state, caller->frameno, true);
9404 		verbose(env, "callee:\n");
9405 		print_verifier_state(env, state, state->curframe, true);
9406 	}
9407 
9408 	return 0;
9409 }
9410 
9411 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9412 				   struct bpf_func_state *caller,
9413 				   struct bpf_func_state *callee)
9414 {
9415 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9416 	 *      void *callback_ctx, u64 flags);
9417 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9418 	 *      void *callback_ctx);
9419 	 */
9420 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9421 
9422 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9423 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9424 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9425 
9426 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9427 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9428 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9429 
9430 	/* pointer to stack or null */
9431 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9432 
9433 	/* unused */
9434 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9435 	return 0;
9436 }
9437 
9438 static int set_callee_state(struct bpf_verifier_env *env,
9439 			    struct bpf_func_state *caller,
9440 			    struct bpf_func_state *callee, int insn_idx)
9441 {
9442 	int i;
9443 
9444 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9445 	 * pointers, which connects us up to the liveness chain
9446 	 */
9447 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9448 		callee->regs[i] = caller->regs[i];
9449 	return 0;
9450 }
9451 
9452 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9453 				       struct bpf_func_state *caller,
9454 				       struct bpf_func_state *callee,
9455 				       int insn_idx)
9456 {
9457 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9458 	struct bpf_map *map;
9459 	int err;
9460 
9461 	/* valid map_ptr and poison value does not matter */
9462 	map = insn_aux->map_ptr_state.map_ptr;
9463 	if (!map->ops->map_set_for_each_callback_args ||
9464 	    !map->ops->map_for_each_callback) {
9465 		verbose(env, "callback function not allowed for map\n");
9466 		return -ENOTSUPP;
9467 	}
9468 
9469 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9470 	if (err)
9471 		return err;
9472 
9473 	callee->in_callback_fn = true;
9474 	callee->callback_ret_range = retval_range(0, 1);
9475 	return 0;
9476 }
9477 
9478 static int set_loop_callback_state(struct bpf_verifier_env *env,
9479 				   struct bpf_func_state *caller,
9480 				   struct bpf_func_state *callee,
9481 				   int insn_idx)
9482 {
9483 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9484 	 *	    u64 flags);
9485 	 * callback_fn(u64 index, void *callback_ctx);
9486 	 */
9487 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9488 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9489 
9490 	/* unused */
9491 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9492 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9493 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9494 
9495 	callee->in_callback_fn = true;
9496 	callee->callback_ret_range = retval_range(0, 1);
9497 	return 0;
9498 }
9499 
9500 static int set_timer_callback_state(struct bpf_verifier_env *env,
9501 				    struct bpf_func_state *caller,
9502 				    struct bpf_func_state *callee,
9503 				    int insn_idx)
9504 {
9505 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9506 
9507 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9508 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9509 	 */
9510 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9511 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9512 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9513 
9514 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9515 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9516 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9517 
9518 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9519 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9520 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9521 
9522 	/* unused */
9523 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9524 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9525 	callee->in_async_callback_fn = true;
9526 	callee->callback_ret_range = retval_range(0, 0);
9527 	return 0;
9528 }
9529 
9530 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9531 				       struct bpf_func_state *caller,
9532 				       struct bpf_func_state *callee,
9533 				       int insn_idx)
9534 {
9535 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9536 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9537 	 * (callback_fn)(struct task_struct *task,
9538 	 *               struct vm_area_struct *vma, void *callback_ctx);
9539 	 */
9540 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9541 
9542 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9543 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9544 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9545 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9546 
9547 	/* pointer to stack or null */
9548 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9549 
9550 	/* unused */
9551 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9552 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9553 	callee->in_callback_fn = true;
9554 	callee->callback_ret_range = retval_range(0, 1);
9555 	return 0;
9556 }
9557 
9558 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9559 					   struct bpf_func_state *caller,
9560 					   struct bpf_func_state *callee,
9561 					   int insn_idx)
9562 {
9563 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9564 	 *			  callback_ctx, u64 flags);
9565 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9566 	 */
9567 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9568 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9569 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9570 
9571 	/* unused */
9572 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9573 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9574 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9575 
9576 	callee->in_callback_fn = true;
9577 	callee->callback_ret_range = retval_range(0, 1);
9578 	return 0;
9579 }
9580 
9581 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9582 					 struct bpf_func_state *caller,
9583 					 struct bpf_func_state *callee,
9584 					 int insn_idx)
9585 {
9586 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9587 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9588 	 *
9589 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9590 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9591 	 * by this point, so look at 'root'
9592 	 */
9593 	struct btf_field *field;
9594 
9595 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9596 				      caller->regs[BPF_REG_1].var_off.value,
9597 				      BPF_RB_ROOT);
9598 	if (!field || !field->graph_root.value_btf_id)
9599 		return -EFAULT;
9600 
9601 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9602 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9603 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9604 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9605 
9606 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9607 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9608 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9609 	callee->in_callback_fn = true;
9610 	callee->callback_ret_range = retval_range(0, 1);
9611 	return 0;
9612 }
9613 
9614 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9615 						 struct bpf_func_state *caller,
9616 						 struct bpf_func_state *callee,
9617 						 int insn_idx)
9618 {
9619 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9620 
9621 	/*
9622 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9623 	 */
9624 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9625 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9626 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9627 
9628 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9629 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9630 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9631 
9632 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9633 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9634 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9635 
9636 	/* unused */
9637 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9638 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9639 	callee->in_async_callback_fn = true;
9640 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9641 	return 0;
9642 }
9643 
9644 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9645 
9646 /* Are we currently verifying the callback for a rbtree helper that must
9647  * be called with lock held? If so, no need to complain about unreleased
9648  * lock
9649  */
9650 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9651 {
9652 	struct bpf_verifier_state *state = env->cur_state;
9653 	struct bpf_insn *insn = env->prog->insnsi;
9654 	struct bpf_func_state *callee;
9655 	int kfunc_btf_id;
9656 
9657 	if (!state->curframe)
9658 		return false;
9659 
9660 	callee = state->frame[state->curframe];
9661 
9662 	if (!callee->in_callback_fn)
9663 		return false;
9664 
9665 	kfunc_btf_id = insn[callee->callsite].imm;
9666 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9667 }
9668 
9669 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9670 {
9671 	if (range.return_32bit)
9672 		return range.minval <= reg_s32_min(reg) && reg_s32_max(reg) <= range.maxval;
9673 	else
9674 		return range.minval <= reg_smin(reg) && reg_smax(reg) <= range.maxval;
9675 }
9676 
9677 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9678 {
9679 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9680 	struct bpf_func_state *caller, *callee;
9681 	struct bpf_reg_state *r0;
9682 	bool in_callback_fn;
9683 	int err;
9684 
9685 	callee = state->frame[state->curframe];
9686 	r0 = &callee->regs[BPF_REG_0];
9687 	if (r0->type == PTR_TO_STACK) {
9688 		/* technically it's ok to return caller's stack pointer
9689 		 * (or caller's caller's pointer) back to the caller,
9690 		 * since these pointers are valid. Only current stack
9691 		 * pointer will be invalid as soon as function exits,
9692 		 * but let's be conservative
9693 		 */
9694 		verbose(env, "cannot return stack pointer to the caller\n");
9695 		return -EINVAL;
9696 	}
9697 
9698 	caller = state->frame[state->curframe - 1];
9699 	if (callee->in_callback_fn) {
9700 		if (r0->type != SCALAR_VALUE) {
9701 			verbose(env, "R0 not a scalar value\n");
9702 			return -EACCES;
9703 		}
9704 
9705 		/* we are going to rely on register's precise value */
9706 		err = mark_chain_precision(env, BPF_REG_0);
9707 		if (err)
9708 			return err;
9709 
9710 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9711 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9712 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9713 					       "At callback return", "R0");
9714 			return -EINVAL;
9715 		}
9716 		if (!bpf_calls_callback(env, callee->callsite)) {
9717 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9718 				     *insn_idx, callee->callsite);
9719 			return -EFAULT;
9720 		}
9721 	} else {
9722 		/* return to the caller whatever r0 had in the callee */
9723 		caller->regs[BPF_REG_0] = *r0;
9724 	}
9725 
9726 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9727 	 * there function call logic would reschedule callback visit. If iteration
9728 	 * converges is_state_visited() would prune that visit eventually.
9729 	 */
9730 	in_callback_fn = callee->in_callback_fn;
9731 	if (in_callback_fn)
9732 		*insn_idx = callee->callsite;
9733 	else
9734 		*insn_idx = callee->callsite + 1;
9735 
9736 	if (env->log.level & BPF_LOG_LEVEL) {
9737 		verbose(env, "returning from callee:\n");
9738 		print_verifier_state(env, state, callee->frameno, true);
9739 		verbose(env, "to caller at %d:\n", *insn_idx);
9740 		print_verifier_state(env, state, caller->frameno, true);
9741 	}
9742 	/* clear everything in the callee. In case of exceptional exits using
9743 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9744 	free_func_state(callee);
9745 	state->frame[state->curframe--] = NULL;
9746 	invalidate_outgoing_stack_args(env, caller);
9747 
9748 	/* for callbacks widen imprecise scalars to make programs like below verify:
9749 	 *
9750 	 *   struct ctx { int i; }
9751 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9752 	 *   ...
9753 	 *   struct ctx = { .i = 0; }
9754 	 *   bpf_loop(100, cb, &ctx, 0);
9755 	 *
9756 	 * This is similar to what is done in process_iter_next_call() for open
9757 	 * coded iterators.
9758 	 */
9759 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9760 	if (prev_st) {
9761 		err = widen_imprecise_scalars(env, prev_st, state);
9762 		if (err)
9763 			return err;
9764 	}
9765 	return 0;
9766 }
9767 
9768 static int do_refine_retval_range(struct bpf_verifier_env *env,
9769 				  struct bpf_reg_state *regs, int ret_type,
9770 				  int func_id,
9771 				  struct bpf_call_arg_meta *meta)
9772 {
9773 	struct bpf_retval_range range;
9774 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9775 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9776 
9777 	if (ret_type != RET_INTEGER)
9778 		return 0;
9779 
9780 	switch (func_id) {
9781 	case BPF_FUNC_get_stack:
9782 	case BPF_FUNC_get_task_stack:
9783 	case BPF_FUNC_probe_read_str:
9784 	case BPF_FUNC_probe_read_kernel_str:
9785 	case BPF_FUNC_probe_read_user_str:
9786 		reg_set_srange64(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9787 		reg_set_srange32(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9788 		reg_bounds_sync(ret_reg);
9789 		break;
9790 	case BPF_FUNC_get_smp_processor_id:
9791 		reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1);
9792 		reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1);
9793 		reg_bounds_sync(ret_reg);
9794 		break;
9795 	case BPF_FUNC_get_retval:
9796 		/*
9797 		 * bpf_get_retval may see arbitrary value passed by bpf_prog_run_array_cg for
9798 		 * CGROUP_GETSOCKOPT type.
9799 		 */
9800 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
9801 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
9802 			break;
9803 
9804 		if (prog_type == BPF_PROG_TYPE_LSM &&
9805 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
9806 			if (!env->prog->aux->attach_func_proto->type)
9807 				break;
9808 			bpf_lsm_get_retval_range(env->prog, &range);
9809 		} else {
9810 			range.minval = -MAX_ERRNO;
9811 			range.maxval = 0;
9812 		}
9813 
9814 		reg_set_srange64(ret_reg, range.minval, range.maxval);
9815 		reg_set_srange32(ret_reg, range.minval, range.maxval);
9816 		reg_bounds_sync(ret_reg);
9817 		break;
9818 	}
9819 
9820 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9821 }
9822 
9823 static int
9824 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9825 		int func_id, int insn_idx)
9826 {
9827 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9828 	struct bpf_map *map = meta->map.ptr;
9829 
9830 	if (func_id != BPF_FUNC_tail_call &&
9831 	    func_id != BPF_FUNC_map_lookup_elem &&
9832 	    func_id != BPF_FUNC_map_update_elem &&
9833 	    func_id != BPF_FUNC_map_delete_elem &&
9834 	    func_id != BPF_FUNC_map_push_elem &&
9835 	    func_id != BPF_FUNC_map_pop_elem &&
9836 	    func_id != BPF_FUNC_map_peek_elem &&
9837 	    func_id != BPF_FUNC_for_each_map_elem &&
9838 	    func_id != BPF_FUNC_redirect_map &&
9839 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9840 		return 0;
9841 
9842 	if (map == NULL) {
9843 		verifier_bug(env, "expected map for helper call");
9844 		return -EFAULT;
9845 	}
9846 
9847 	/* In case of read-only, some additional restrictions
9848 	 * need to be applied in order to prevent altering the
9849 	 * state of the map from program side.
9850 	 */
9851 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9852 	    (func_id == BPF_FUNC_map_delete_elem ||
9853 	     func_id == BPF_FUNC_map_update_elem ||
9854 	     func_id == BPF_FUNC_map_push_elem ||
9855 	     func_id == BPF_FUNC_map_pop_elem)) {
9856 		verbose(env, "write into map forbidden\n");
9857 		return -EACCES;
9858 	}
9859 
9860 	if (!aux->map_ptr_state.map_ptr)
9861 		bpf_map_ptr_store(aux, meta->map.ptr,
9862 				  !meta->map.ptr->bypass_spec_v1, false);
9863 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
9864 		bpf_map_ptr_store(aux, meta->map.ptr,
9865 				  !meta->map.ptr->bypass_spec_v1, true);
9866 	return 0;
9867 }
9868 
9869 static int
9870 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9871 		int func_id, int insn_idx)
9872 {
9873 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9874 	struct bpf_reg_state *reg;
9875 	struct bpf_map *map = meta->map.ptr;
9876 	u64 val, max;
9877 	int err;
9878 
9879 	if (func_id != BPF_FUNC_tail_call)
9880 		return 0;
9881 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9882 		verbose(env, "expected prog array map for tail call");
9883 		return -EINVAL;
9884 	}
9885 
9886 	reg = reg_state(env, BPF_REG_3);
9887 	val = reg->var_off.value;
9888 	max = map->max_entries;
9889 
9890 	if (!(is_reg_const(reg, false) && val < max)) {
9891 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9892 		return 0;
9893 	}
9894 
9895 	err = mark_chain_precision(env, BPF_REG_3);
9896 	if (err)
9897 		return err;
9898 	if (bpf_map_key_unseen(aux))
9899 		bpf_map_key_store(aux, val);
9900 	else if (!bpf_map_key_poisoned(aux) &&
9901 		  bpf_map_key_immediate(aux) != val)
9902 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9903 	return 0;
9904 }
9905 
9906 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
9907 {
9908 	struct bpf_verifier_state *state = env->cur_state;
9909 	enum bpf_prog_type type = resolve_prog_type(env->prog);
9910 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
9911 	bool refs_lingering = false;
9912 	int i;
9913 
9914 	if (!exception_exit && cur_func(env)->frameno)
9915 		return 0;
9916 
9917 	for (i = 0; i < state->acquired_refs; i++) {
9918 		if (state->refs[i].type != REF_TYPE_PTR)
9919 			continue;
9920 		/* Allow struct_ops programs to return a referenced kptr back to
9921 		 * kernel. Type checks are performed later in check_return_code.
9922 		 */
9923 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
9924 		    reg->id == state->refs[i].id)
9925 			continue;
9926 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9927 			state->refs[i].id, state->refs[i].insn_idx);
9928 		refs_lingering = true;
9929 	}
9930 	return refs_lingering ? -EINVAL : 0;
9931 }
9932 
9933 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
9934 {
9935 	int err;
9936 
9937 	if (check_lock && env->cur_state->active_locks) {
9938 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
9939 		return -EINVAL;
9940 	}
9941 
9942 	err = check_reference_leak(env, exception_exit);
9943 	if (err) {
9944 		verbose(env, "%s would lead to reference leak\n", prefix);
9945 		return err;
9946 	}
9947 
9948 	if (check_lock && env->cur_state->active_irq_id) {
9949 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
9950 		return -EINVAL;
9951 	}
9952 
9953 	if (check_lock && env->cur_state->active_rcu_locks) {
9954 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
9955 		return -EINVAL;
9956 	}
9957 
9958 	if (check_lock && env->cur_state->active_preempt_locks) {
9959 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
9960 		return -EINVAL;
9961 	}
9962 
9963 	return 0;
9964 }
9965 
9966 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9967 				   struct bpf_reg_state *regs)
9968 {
9969 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9970 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9971 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9972 	struct bpf_bprintf_data data = {};
9973 	int err, fmt_map_off, num_args;
9974 	u64 fmt_addr;
9975 	char *fmt;
9976 
9977 	/* data must be an array of u64 */
9978 	if (data_len_reg->var_off.value % 8)
9979 		return -EINVAL;
9980 	num_args = data_len_reg->var_off.value / 8;
9981 
9982 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
9983 	 * and map_direct_value_addr is set.
9984 	 */
9985 	fmt_map_off = fmt_reg->var_off.value;
9986 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
9987 						  fmt_map_off);
9988 	if (err) {
9989 		verbose(env, "failed to retrieve map value address\n");
9990 		return -EFAULT;
9991 	}
9992 	fmt = (char *)(long)fmt_addr + fmt_map_off;
9993 
9994 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
9995 	 * can focus on validating the format specifiers.
9996 	 */
9997 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
9998 	if (err < 0)
9999 		verbose(env, "Invalid format string\n");
10000 
10001 	return err;
10002 }
10003 
10004 static int check_get_func_ip(struct bpf_verifier_env *env)
10005 {
10006 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10007 	int func_id = BPF_FUNC_get_func_ip;
10008 
10009 	if (type == BPF_PROG_TYPE_TRACING) {
10010 		if (!bpf_prog_has_trampoline(env->prog)) {
10011 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10012 				func_id_name(func_id), func_id);
10013 			return -ENOTSUPP;
10014 		}
10015 		return 0;
10016 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10017 		return 0;
10018 	}
10019 
10020 	verbose(env, "func %s#%d not supported for program type %d\n",
10021 		func_id_name(func_id), func_id, type);
10022 	return -ENOTSUPP;
10023 }
10024 
10025 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10026 {
10027 	return &env->insn_aux_data[env->insn_idx];
10028 }
10029 
10030 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10031 {
10032 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10033 	bool reg_is_null = bpf_register_is_null(reg);
10034 
10035 	if (reg_is_null)
10036 		mark_chain_precision(env, BPF_REG_4);
10037 
10038 	return reg_is_null;
10039 }
10040 
10041 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10042 {
10043 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10044 
10045 	if (!state->initialized) {
10046 		state->initialized = 1;
10047 		state->fit_for_inline = loop_flag_is_zero(env);
10048 		state->callback_subprogno = subprogno;
10049 		return;
10050 	}
10051 
10052 	if (!state->fit_for_inline)
10053 		return;
10054 
10055 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10056 				 state->callback_subprogno == subprogno);
10057 }
10058 
10059 /* Returns whether or not the given map type can potentially elide
10060  * lookup return value nullness check. This is possible if the key
10061  * is statically known.
10062  */
10063 static bool can_elide_value_nullness(enum bpf_map_type type)
10064 {
10065 	switch (type) {
10066 	case BPF_MAP_TYPE_ARRAY:
10067 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10068 		return true;
10069 	default:
10070 		return false;
10071 	}
10072 }
10073 
10074 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10075 			 const struct bpf_func_proto **ptr)
10076 {
10077 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10078 		return -ERANGE;
10079 
10080 	if (!env->ops->get_func_proto)
10081 		return -EINVAL;
10082 
10083 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10084 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10085 }
10086 
10087 /* Check if we're in a sleepable context. */
10088 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10089 {
10090 	return !env->cur_state->active_rcu_locks &&
10091 	       !env->cur_state->active_preempt_locks &&
10092 	       !env->cur_state->active_locks &&
10093 	       !env->cur_state->active_irq_id &&
10094 	       in_sleepable(env);
10095 }
10096 
10097 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10098 {
10099 	if (env->cur_state->active_rcu_locks)
10100 		return "rcu_read_lock region";
10101 	if (env->cur_state->active_preempt_locks)
10102 		return "non-preemptible region";
10103 	if (env->cur_state->active_irq_id)
10104 		return "IRQ-disabled region";
10105 	if (env->cur_state->active_locks)
10106 		return "lock region";
10107 	return "non-sleepable prog";
10108 }
10109 
10110 static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
10111 		       bool convert_rcu, bool release_dynptr)
10112 {
10113 	int err = -EINVAL;
10114 
10115 	if (bpf_register_is_null(reg))
10116 		return 0;
10117 
10118 	if (release_dynptr)
10119 		err = unmark_stack_slots_dynptr(env, reg);
10120 	else if (convert_rcu)
10121 		err = ref_convert_alloc_rcu_protected(env, reg->id);
10122 	else if (reg_is_referenced(env, reg))
10123 		err = release_reference(env, reg->id);
10124 
10125 	return err;
10126 }
10127 
10128 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10129 			     int *insn_idx_p)
10130 {
10131 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10132 	bool returns_cpu_specific_alloc_ptr = false;
10133 	const struct bpf_func_proto *fn = NULL;
10134 	enum bpf_return_type ret_type;
10135 	enum bpf_type_flag ret_flag;
10136 	struct bpf_reg_state *regs;
10137 	struct bpf_call_arg_meta meta;
10138 	int insn_idx = *insn_idx_p;
10139 	bool changes_data;
10140 	int i, err, func_id;
10141 
10142 	/* find function prototype */
10143 	func_id = insn->imm;
10144 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10145 	if (err == -ERANGE) {
10146 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10147 		return -EINVAL;
10148 	}
10149 
10150 	if (err) {
10151 		verbose(env, "program of this type cannot use helper %s#%d\n",
10152 			func_id_name(func_id), func_id);
10153 		return err;
10154 	}
10155 
10156 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10157 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10158 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10159 		return -EINVAL;
10160 	}
10161 
10162 	if (fn->allowed && !fn->allowed(env->prog)) {
10163 		verbose(env, "helper call is not allowed in probe\n");
10164 		return -EINVAL;
10165 	}
10166 
10167 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10168 	changes_data = bpf_helper_changes_pkt_data(func_id);
10169 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10170 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10171 		return -EFAULT;
10172 	}
10173 
10174 	memset(&meta, 0, sizeof(meta));
10175 	meta.pkt_access = fn->pkt_access;
10176 
10177 	err = check_func_proto(fn, &meta);
10178 	if (err) {
10179 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10180 		return err;
10181 	}
10182 
10183 	if (fn->might_sleep && !in_sleepable_context(env)) {
10184 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10185 			non_sleepable_context_description(env));
10186 		return -EINVAL;
10187 	}
10188 
10189 	/* Track non-sleepable context for helpers. */
10190 	if (!in_sleepable_context(env))
10191 		env->insn_aux_data[insn_idx].non_sleepable = true;
10192 
10193 	meta.func_id = func_id;
10194 	/* check args */
10195 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10196 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10197 		if (err)
10198 			return err;
10199 	}
10200 
10201 	err = record_func_map(env, &meta, func_id, insn_idx);
10202 	if (err)
10203 		return err;
10204 
10205 	err = record_func_key(env, &meta, func_id, insn_idx);
10206 	if (err)
10207 		return err;
10208 
10209 	regs = cur_regs(env);
10210 
10211 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10212 	 * is inferred from register state.
10213 	 */
10214 	for (i = 0; i < meta.access_size; i++) {
10215 		err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B,
10216 				       BPF_WRITE, -1, false, false);
10217 		if (err)
10218 			return err;
10219 	}
10220 
10221 	if (meta.release_regno) {
10222 		struct bpf_reg_state *reg = &regs[meta.release_regno];
10223 		bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) &&
10224 				   (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU);
10225 
10226 		err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id);
10227 		if (err)
10228 			return err;
10229 	}
10230 
10231 	switch (func_id) {
10232 	case BPF_FUNC_tail_call:
10233 		err = check_resource_leak(env, false, true, "tail_call");
10234 		if (err)
10235 			return err;
10236 		break;
10237 	case BPF_FUNC_get_local_storage:
10238 		/* check that flags argument in get_local_storage(map, flags) is 0,
10239 		 * this is required because get_local_storage() can't return an error.
10240 		 */
10241 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10242 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10243 			return -EINVAL;
10244 		}
10245 		break;
10246 	case BPF_FUNC_for_each_map_elem:
10247 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10248 					 set_map_elem_callback_state);
10249 		break;
10250 	case BPF_FUNC_timer_set_callback:
10251 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10252 					 set_timer_callback_state);
10253 		break;
10254 	case BPF_FUNC_find_vma:
10255 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10256 					 set_find_vma_callback_state);
10257 		break;
10258 	case BPF_FUNC_snprintf:
10259 		err = check_bpf_snprintf_call(env, regs);
10260 		break;
10261 	case BPF_FUNC_loop:
10262 		update_loop_inline_state(env, meta.subprogno);
10263 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10264 		 * is finished, thus mark it precise.
10265 		 */
10266 		err = mark_chain_precision(env, BPF_REG_1);
10267 		if (err)
10268 			return err;
10269 		if (cur_func(env)->callback_depth < reg_umax(&regs[BPF_REG_1])) {
10270 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10271 						 set_loop_callback_state);
10272 		} else {
10273 			cur_func(env)->callback_depth = 0;
10274 			if (env->log.level & BPF_LOG_LEVEL2)
10275 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10276 					env->cur_state->curframe);
10277 		}
10278 		break;
10279 	case BPF_FUNC_dynptr_from_mem:
10280 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10281 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10282 				reg_type_str(env, regs[BPF_REG_1].type));
10283 			return -EACCES;
10284 		}
10285 		break;
10286 	case BPF_FUNC_set_retval:
10287 	{
10288 		struct bpf_retval_range range = {
10289 			.minval = -MAX_ERRNO,
10290 			.maxval = 0,
10291 			.return_32bit = true
10292 		};
10293 		struct bpf_reg_state *r1 = &regs[BPF_REG_1];
10294 
10295 		if (r1->type != SCALAR_VALUE) {
10296 			verbose(env, "R1 is not a scalar\n");
10297 			return -EINVAL;
10298 		}
10299 
10300 		/* CGROUP_GETSOCKOPT is allowed to return arbitrary value */
10301 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
10302 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
10303 			break;
10304 
10305 		if (prog_type == BPF_PROG_TYPE_LSM &&
10306 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10307 			if (!env->prog->aux->attach_func_proto->type) {
10308 				/* Make sure programs that attach to void
10309 				 * hooks don't try to modify return value.
10310 				 */
10311 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10312 				return -EINVAL;
10313 			}
10314 			bpf_lsm_get_retval_range(env->prog, &range);
10315 		}
10316 
10317 		err = mark_chain_precision(env, BPF_REG_1);
10318 		if (err)
10319 			return err;
10320 
10321 		if (!retval_range_within(range, r1)) {
10322 			verbose_invalid_scalar(env, r1, range, "At bpf_set_retval", "R1");
10323 			return -EINVAL;
10324 		}
10325 
10326 		break;
10327 	}
10328 	case BPF_FUNC_dynptr_write:
10329 	{
10330 		enum bpf_dynptr_type dynptr_type = meta.dynptr.type;
10331 
10332 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10333 			return -EFAULT;
10334 
10335 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10336 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10337 			/* this will trigger clear_all_pkt_pointers(), which will
10338 			 * invalidate all dynptr slices associated with the skb
10339 			 */
10340 			changes_data = true;
10341 
10342 		break;
10343 	}
10344 	case BPF_FUNC_per_cpu_ptr:
10345 	case BPF_FUNC_this_cpu_ptr:
10346 	{
10347 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10348 		const struct btf_type *type;
10349 
10350 		if (reg->type & MEM_RCU) {
10351 			type = btf_type_by_id(reg->btf, reg->btf_id);
10352 			if (!type || !btf_type_is_struct(type)) {
10353 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10354 				return -EFAULT;
10355 			}
10356 			returns_cpu_specific_alloc_ptr = true;
10357 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10358 		}
10359 		break;
10360 	}
10361 	case BPF_FUNC_user_ringbuf_drain:
10362 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10363 					 set_user_ringbuf_callback_state);
10364 		break;
10365 	}
10366 
10367 	if (err)
10368 		return err;
10369 
10370 	/* reset caller saved regs */
10371 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10372 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10373 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10374 	}
10375 	invalidate_outgoing_stack_args(env, cur_func(env));
10376 
10377 	/* helper call returns 64-bit value. */
10378 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10379 
10380 	/* update return register (already marked as written above) */
10381 	ret_type = fn->ret_type;
10382 	ret_flag = type_flag(ret_type);
10383 
10384 	switch (base_type(ret_type)) {
10385 	case RET_INTEGER:
10386 		/* sets type to SCALAR_VALUE */
10387 		mark_reg_unknown(env, regs, BPF_REG_0);
10388 		break;
10389 	case RET_VOID:
10390 		regs[BPF_REG_0].type = NOT_INIT;
10391 		break;
10392 	case RET_PTR_TO_MAP_VALUE:
10393 		/* There is no offset yet applied, variable or fixed */
10394 		mark_reg_known_zero(env, regs, BPF_REG_0);
10395 		/* remember map_ptr, so that check_map_access()
10396 		 * can check 'value_size' boundary of memory access
10397 		 * to map element returned from bpf_map_lookup_elem()
10398 		 */
10399 		if (meta.map.ptr == NULL) {
10400 			verifier_bug(env, "unexpected null map_ptr");
10401 			return -EFAULT;
10402 		}
10403 
10404 		if (func_id == BPF_FUNC_map_lookup_elem &&
10405 		    can_elide_value_nullness(meta.map.ptr->map_type) &&
10406 		    meta.const_map_key >= 0 &&
10407 		    meta.const_map_key < meta.map.ptr->max_entries)
10408 			ret_flag &= ~PTR_MAYBE_NULL;
10409 
10410 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10411 		regs[BPF_REG_0].map_uid = meta.map.uid;
10412 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10413 		if (!type_may_be_null(ret_flag) &&
10414 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10415 			regs[BPF_REG_0].id = ++env->id_gen;
10416 		}
10417 		break;
10418 	case RET_PTR_TO_SOCKET:
10419 		mark_reg_known_zero(env, regs, BPF_REG_0);
10420 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10421 		break;
10422 	case RET_PTR_TO_SOCK_COMMON:
10423 		mark_reg_known_zero(env, regs, BPF_REG_0);
10424 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10425 		break;
10426 	case RET_PTR_TO_TCP_SOCK:
10427 		mark_reg_known_zero(env, regs, BPF_REG_0);
10428 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10429 		break;
10430 	case RET_PTR_TO_MEM:
10431 		mark_reg_known_zero(env, regs, BPF_REG_0);
10432 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10433 		regs[BPF_REG_0].mem_size = meta.mem_size;
10434 		break;
10435 	case RET_PTR_TO_MEM_OR_BTF_ID:
10436 	{
10437 		const struct btf_type *t;
10438 
10439 		mark_reg_known_zero(env, regs, BPF_REG_0);
10440 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10441 		if (!btf_type_is_struct(t)) {
10442 			u32 tsize;
10443 			const struct btf_type *ret;
10444 			const char *tname;
10445 
10446 			/* resolve the type size of ksym. */
10447 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10448 			if (IS_ERR(ret)) {
10449 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10450 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10451 					tname, PTR_ERR(ret));
10452 				return -EINVAL;
10453 			}
10454 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10455 			regs[BPF_REG_0].mem_size = tsize;
10456 		} else {
10457 			if (returns_cpu_specific_alloc_ptr) {
10458 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10459 			} else {
10460 				/* MEM_RDONLY may be carried from ret_flag, but it
10461 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10462 				 * it will confuse the check of PTR_TO_BTF_ID in
10463 				 * check_mem_access().
10464 				 */
10465 				ret_flag &= ~MEM_RDONLY;
10466 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10467 			}
10468 
10469 			regs[BPF_REG_0].btf = meta.ret_btf;
10470 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10471 		}
10472 		break;
10473 	}
10474 	case RET_PTR_TO_BTF_ID:
10475 	{
10476 		struct btf *ret_btf;
10477 		int ret_btf_id;
10478 
10479 		mark_reg_known_zero(env, regs, BPF_REG_0);
10480 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10481 		if (func_id == BPF_FUNC_kptr_xchg) {
10482 			ret_btf = meta.kptr_field->kptr.btf;
10483 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10484 			if (!btf_is_kernel(ret_btf)) {
10485 				regs[BPF_REG_0].type |= MEM_ALLOC;
10486 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10487 					regs[BPF_REG_0].type |= MEM_PERCPU;
10488 			}
10489 		} else {
10490 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10491 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10492 					     func_id_name(func_id));
10493 				return -EFAULT;
10494 			}
10495 			ret_btf = btf_vmlinux;
10496 			ret_btf_id = *fn->ret_btf_id;
10497 		}
10498 		if (ret_btf_id == 0) {
10499 			verbose(env, "invalid return type %u of func %s#%d\n",
10500 				base_type(ret_type), func_id_name(func_id),
10501 				func_id);
10502 			return -EINVAL;
10503 		}
10504 		regs[BPF_REG_0].btf = ret_btf;
10505 		regs[BPF_REG_0].btf_id = ret_btf_id;
10506 		break;
10507 	}
10508 	default:
10509 		verbose(env, "unknown return type %u of func %s#%d\n",
10510 			base_type(ret_type), func_id_name(func_id), func_id);
10511 		return -EINVAL;
10512 	}
10513 
10514 	if (type_may_be_null(regs[BPF_REG_0].type))
10515 		regs[BPF_REG_0].id = ++env->id_gen;
10516 
10517 	if (is_ptr_cast_function(func_id) &&
10518 	    find_reference_state(env->cur_state, meta.ref_obj.id)) {
10519 		struct bpf_verifier_state *branch;
10520 		struct bpf_reg_state *r0;
10521 
10522 		err = validate_ref_obj(env, &meta.ref_obj);
10523 		if (err)
10524 			return err;
10525 
10526 		/*
10527 		 * In order for a release of any of the original or cast pointers
10528 		 * to invalidate all other pointers, reuse the same reference id for
10529 		 * the cast result.
10530 		 * This reference id can't be used for nullness propagation,
10531 		 * as cast might return NULL for a non-NULL input.
10532 		 * Hence, explore the NULL case as a separate branch.
10533 		 */
10534 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10535 		if (IS_ERR(branch))
10536 			return PTR_ERR(branch);
10537 
10538 		r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0];
10539 		__mark_reg_known_zero(r0);
10540 		r0->type = SCALAR_VALUE;
10541 
10542 		regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL;
10543 		regs[BPF_REG_0].id = meta.ref_obj.id;
10544 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10545 		int id = acquire_reference(env, insn_idx, 0);
10546 
10547 		if (id < 0)
10548 			return id;
10549 
10550 		regs[BPF_REG_0].id = id;
10551 	}
10552 
10553 	if (func_id == BPF_FUNC_dynptr_data)
10554 		regs[BPF_REG_0].parent_id = meta.dynptr.id;
10555 
10556 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10557 	if (err)
10558 		return err;
10559 
10560 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10561 	if (err)
10562 		return err;
10563 
10564 	if ((func_id == BPF_FUNC_get_stack ||
10565 	     func_id == BPF_FUNC_get_task_stack) &&
10566 	    !env->prog->has_callchain_buf) {
10567 		const char *err_str;
10568 
10569 #ifdef CONFIG_PERF_EVENTS
10570 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10571 		err_str = "cannot get callchain buffer for func %s#%d\n";
10572 #else
10573 		err = -ENOTSUPP;
10574 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10575 #endif
10576 		if (err) {
10577 			verbose(env, err_str, func_id_name(func_id), func_id);
10578 			return err;
10579 		}
10580 
10581 		env->prog->has_callchain_buf = true;
10582 	}
10583 
10584 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10585 		env->prog->call_get_stack = true;
10586 
10587 	if (func_id == BPF_FUNC_get_func_ip) {
10588 		if (check_get_func_ip(env))
10589 			return -ENOTSUPP;
10590 		env->prog->call_get_func_ip = true;
10591 	}
10592 
10593 	if (func_id == BPF_FUNC_tail_call) {
10594 		if (env->cur_state->curframe) {
10595 			struct bpf_verifier_state *branch;
10596 
10597 			mark_reg_scratched(env, BPF_REG_0);
10598 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10599 			if (IS_ERR(branch))
10600 				return PTR_ERR(branch);
10601 			clear_all_pkt_pointers(env);
10602 			mark_reg_unknown(env, regs, BPF_REG_0);
10603 			err = prepare_func_exit(env, &env->insn_idx);
10604 			if (err)
10605 				return err;
10606 			env->insn_idx--;
10607 		} else {
10608 			changes_data = false;
10609 		}
10610 	}
10611 
10612 	if (changes_data)
10613 		clear_all_pkt_pointers(env);
10614 	return 0;
10615 }
10616 
10617 /* mark_btf_func_reg_size() is used when the reg size is determined by
10618  * the BTF func_proto's return value size and argument.
10619  */
10620 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10621 				     u32 regno, size_t reg_size)
10622 {
10623 	struct bpf_reg_state *reg = &regs[regno];
10624 
10625 	if (regno == BPF_REG_0) {
10626 		/* Function return value */
10627 		reg->subreg_def = reg_size == sizeof(u64) ?
10628 			DEF_NOT_SUBREG : env->insn_idx + 1;
10629 	} else if (reg_size == sizeof(u64)) {
10630 		/* Function argument */
10631 		mark_insn_zext(env, reg);
10632 	}
10633 }
10634 
10635 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10636 				   size_t reg_size)
10637 {
10638 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10639 }
10640 
10641 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10642 {
10643 	return meta->kfunc_flags & KF_ACQUIRE;
10644 }
10645 
10646 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10647 {
10648 	return meta->kfunc_flags & KF_RELEASE;
10649 }
10650 
10651 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10652 {
10653 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10654 }
10655 
10656 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10657 {
10658 	return meta->kfunc_flags & KF_RCU;
10659 }
10660 
10661 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10662 {
10663 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10664 }
10665 
10666 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10667 				  const struct btf_param *arg,
10668 				  const struct bpf_reg_state *reg)
10669 {
10670 	const struct btf_type *t;
10671 
10672 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10673 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10674 		return false;
10675 
10676 	return btf_param_match_suffix(btf, arg, "__sz");
10677 }
10678 
10679 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10680 					const struct btf_param *arg,
10681 					const struct bpf_reg_state *reg)
10682 {
10683 	const struct btf_type *t;
10684 
10685 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10686 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10687 		return false;
10688 
10689 	return btf_param_match_suffix(btf, arg, "__szk");
10690 }
10691 
10692 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10693 {
10694 	return btf_param_match_suffix(btf, arg, "__k");
10695 }
10696 
10697 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10698 {
10699 	return btf_param_match_suffix(btf, arg, "__ign");
10700 }
10701 
10702 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10703 {
10704 	return btf_param_match_suffix(btf, arg, "__map");
10705 }
10706 
10707 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10708 {
10709 	return btf_param_match_suffix(btf, arg, "__alloc");
10710 }
10711 
10712 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10713 {
10714 	return btf_param_match_suffix(btf, arg, "__uninit");
10715 }
10716 
10717 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10718 {
10719 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10720 }
10721 
10722 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10723 {
10724 	return btf_param_match_suffix(btf, arg, "__nullable");
10725 }
10726 
10727 static bool is_kfunc_arg_nonown_allowed(const struct btf *btf, const struct btf_param *arg)
10728 {
10729 	return btf_param_match_suffix(btf, arg, "__nonown_allowed");
10730 }
10731 
10732 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10733 {
10734 	return btf_param_match_suffix(btf, arg, "__str");
10735 }
10736 
10737 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10738 {
10739 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10740 }
10741 
10742 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10743 					  const struct btf_param *arg,
10744 					  const char *name)
10745 {
10746 	int len, target_len = strlen(name);
10747 	const char *param_name;
10748 
10749 	param_name = btf_name_by_offset(btf, arg->name_off);
10750 	if (str_is_empty(param_name))
10751 		return false;
10752 	len = strlen(param_name);
10753 	if (len != target_len)
10754 		return false;
10755 	if (strcmp(param_name, name))
10756 		return false;
10757 
10758 	return true;
10759 }
10760 
10761 enum {
10762 	KF_ARG_DYNPTR_ID,
10763 	KF_ARG_LIST_HEAD_ID,
10764 	KF_ARG_LIST_NODE_ID,
10765 	KF_ARG_RB_ROOT_ID,
10766 	KF_ARG_RB_NODE_ID,
10767 	KF_ARG_WORKQUEUE_ID,
10768 	KF_ARG_RES_SPIN_LOCK_ID,
10769 	KF_ARG_TASK_WORK_ID,
10770 	KF_ARG_PROG_AUX_ID,
10771 	KF_ARG_TIMER_ID
10772 };
10773 
10774 BTF_ID_LIST(kf_arg_btf_ids)
10775 BTF_ID(struct, bpf_dynptr)
10776 BTF_ID(struct, bpf_list_head)
10777 BTF_ID(struct, bpf_list_node)
10778 BTF_ID(struct, bpf_rb_root)
10779 BTF_ID(struct, bpf_rb_node)
10780 BTF_ID(struct, bpf_wq)
10781 BTF_ID(struct, bpf_res_spin_lock)
10782 BTF_ID(struct, bpf_task_work)
10783 BTF_ID(struct, bpf_prog_aux)
10784 BTF_ID(struct, bpf_timer)
10785 
10786 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10787 				    const struct btf_param *arg, int type)
10788 {
10789 	const struct btf_type *t;
10790 	u32 res_id;
10791 
10792 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10793 	if (!t)
10794 		return false;
10795 	if (!btf_type_is_ptr(t))
10796 		return false;
10797 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10798 	if (!t)
10799 		return false;
10800 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10801 }
10802 
10803 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10804 {
10805 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10806 }
10807 
10808 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10809 {
10810 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10811 }
10812 
10813 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10814 {
10815 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10816 }
10817 
10818 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10819 {
10820 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10821 }
10822 
10823 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10824 {
10825 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10826 }
10827 
10828 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10829 {
10830 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10831 }
10832 
10833 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10834 {
10835 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10836 }
10837 
10838 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10839 {
10840 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10841 }
10842 
10843 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10844 {
10845 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
10846 }
10847 
10848 static bool is_rbtree_node_type(const struct btf_type *t)
10849 {
10850 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
10851 }
10852 
10853 static bool is_list_node_type(const struct btf_type *t)
10854 {
10855 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
10856 }
10857 
10858 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10859 				  const struct btf_param *arg)
10860 {
10861 	const struct btf_type *t;
10862 
10863 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10864 	if (!t)
10865 		return false;
10866 
10867 	return true;
10868 }
10869 
10870 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
10871 {
10872 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
10873 }
10874 
10875 /*
10876  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
10877  *   - the _impl prototype with full arg list (meta->func_proto)
10878  *   - the BPF API prototype w/o implicit args (func->type in BTF)
10879  * To determine whether an argument is implicit, we compare its position
10880  * against the number of arguments in the prototype w/o implicit args.
10881  */
10882 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
10883 {
10884 	const struct btf_type *func, *func_proto;
10885 	u32 argn;
10886 
10887 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
10888 		return false;
10889 
10890 	func = btf_type_by_id(meta->btf, meta->func_id);
10891 	func_proto = btf_type_by_id(meta->btf, func->type);
10892 	argn = btf_type_vlen(func_proto);
10893 
10894 	return argn <= arg_idx;
10895 }
10896 
10897 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10898 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10899 					const struct btf *btf,
10900 					const struct btf_type *t, int rec)
10901 {
10902 	const struct btf_type *member_type;
10903 	const struct btf_member *member;
10904 	u32 i;
10905 
10906 	if (!btf_type_is_struct(t))
10907 		return false;
10908 
10909 	for_each_member(i, t, member) {
10910 		const struct btf_array *array;
10911 
10912 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10913 		if (btf_type_is_struct(member_type)) {
10914 			if (rec >= 3) {
10915 				verbose(env, "max struct nesting depth exceeded\n");
10916 				return false;
10917 			}
10918 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10919 				return false;
10920 			continue;
10921 		}
10922 		if (btf_type_is_array(member_type)) {
10923 			array = btf_array(member_type);
10924 			if (!array->nelems)
10925 				return false;
10926 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10927 			if (!btf_type_is_scalar(member_type))
10928 				return false;
10929 			continue;
10930 		}
10931 		if (!btf_type_is_scalar(member_type))
10932 			return false;
10933 	}
10934 	return true;
10935 }
10936 
10937 enum kfunc_ptr_arg_type {
10938 	KF_ARG_PTR_TO_CTX,
10939 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10940 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10941 	KF_ARG_PTR_TO_DYNPTR,
10942 	KF_ARG_PTR_TO_ITER,
10943 	KF_ARG_PTR_TO_LIST_HEAD,
10944 	KF_ARG_PTR_TO_LIST_NODE,
10945 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
10946 	KF_ARG_PTR_TO_MEM,
10947 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
10948 	KF_ARG_PTR_TO_CALLBACK,
10949 	KF_ARG_PTR_TO_RB_ROOT,
10950 	KF_ARG_PTR_TO_RB_NODE,
10951 	KF_ARG_PTR_TO_NULL,
10952 	KF_ARG_PTR_TO_CONST_STR,
10953 	KF_ARG_PTR_TO_MAP,
10954 	KF_ARG_PTR_TO_TIMER,
10955 	KF_ARG_PTR_TO_WORKQUEUE,
10956 	KF_ARG_PTR_TO_IRQ_FLAG,
10957 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
10958 	KF_ARG_PTR_TO_TASK_WORK,
10959 };
10960 
10961 enum special_kfunc_type {
10962 	KF_bpf_obj_new_impl,
10963 	KF_bpf_obj_new,
10964 	KF_bpf_obj_drop_impl,
10965 	KF_bpf_obj_drop,
10966 	KF_bpf_refcount_acquire_impl,
10967 	KF_bpf_refcount_acquire,
10968 	KF_bpf_list_push_front_impl,
10969 	KF_bpf_list_push_front,
10970 	KF_bpf_list_push_back_impl,
10971 	KF_bpf_list_push_back,
10972 	KF_bpf_list_add,
10973 	KF_bpf_list_pop_front,
10974 	KF_bpf_list_pop_back,
10975 	KF_bpf_list_del,
10976 	KF_bpf_list_front,
10977 	KF_bpf_list_back,
10978 	KF_bpf_list_is_first,
10979 	KF_bpf_list_is_last,
10980 	KF_bpf_list_empty,
10981 	KF_bpf_cast_to_kern_ctx,
10982 	KF_bpf_rdonly_cast,
10983 	KF_bpf_rcu_read_lock,
10984 	KF_bpf_rcu_read_unlock,
10985 	KF_bpf_rbtree_remove,
10986 	KF_bpf_rbtree_add_impl,
10987 	KF_bpf_rbtree_add,
10988 	KF_bpf_rbtree_first,
10989 	KF_bpf_rbtree_root,
10990 	KF_bpf_rbtree_left,
10991 	KF_bpf_rbtree_right,
10992 	KF_bpf_dynptr_from_skb,
10993 	KF_bpf_dynptr_from_xdp,
10994 	KF_bpf_dynptr_from_skb_meta,
10995 	KF_bpf_xdp_pull_data,
10996 	KF_bpf_dynptr_slice,
10997 	KF_bpf_dynptr_slice_rdwr,
10998 	KF_bpf_dynptr_clone,
10999 	KF_bpf_percpu_obj_new_impl,
11000 	KF_bpf_percpu_obj_new,
11001 	KF_bpf_percpu_obj_drop_impl,
11002 	KF_bpf_percpu_obj_drop,
11003 	KF_bpf_throw,
11004 	KF_bpf_wq_set_callback,
11005 	KF_bpf_preempt_disable,
11006 	KF_bpf_preempt_enable,
11007 	KF_bpf_iter_css_task_new,
11008 	KF_bpf_session_cookie,
11009 	KF_bpf_get_kmem_cache,
11010 	KF_bpf_local_irq_save,
11011 	KF_bpf_local_irq_restore,
11012 	KF_bpf_iter_num_new,
11013 	KF_bpf_iter_num_next,
11014 	KF_bpf_iter_num_destroy,
11015 	KF_bpf_set_dentry_xattr,
11016 	KF_bpf_remove_dentry_xattr,
11017 	KF_bpf_res_spin_lock,
11018 	KF_bpf_res_spin_unlock,
11019 	KF_bpf_res_spin_lock_irqsave,
11020 	KF_bpf_res_spin_unlock_irqrestore,
11021 	KF_bpf_dynptr_from_file,
11022 	KF_bpf_dynptr_file_discard,
11023 	KF___bpf_trap,
11024 	KF_bpf_task_work_schedule_signal,
11025 	KF_bpf_task_work_schedule_resume,
11026 	KF_bpf_arena_alloc_pages,
11027 	KF_bpf_arena_free_pages,
11028 	KF_bpf_arena_reserve_pages,
11029 	KF_bpf_session_is_return,
11030 	KF_bpf_stream_vprintk,
11031 	KF_bpf_stream_print_stack,
11032 };
11033 
11034 BTF_ID_LIST(special_kfunc_list)
11035 BTF_ID(func, bpf_obj_new_impl)
11036 BTF_ID(func, bpf_obj_new)
11037 BTF_ID(func, bpf_obj_drop_impl)
11038 BTF_ID(func, bpf_obj_drop)
11039 BTF_ID(func, bpf_refcount_acquire_impl)
11040 BTF_ID(func, bpf_refcount_acquire)
11041 BTF_ID(func, bpf_list_push_front_impl)
11042 BTF_ID(func, bpf_list_push_front)
11043 BTF_ID(func, bpf_list_push_back_impl)
11044 BTF_ID(func, bpf_list_push_back)
11045 BTF_ID(func, bpf_list_add)
11046 BTF_ID(func, bpf_list_pop_front)
11047 BTF_ID(func, bpf_list_pop_back)
11048 BTF_ID(func, bpf_list_del)
11049 BTF_ID(func, bpf_list_front)
11050 BTF_ID(func, bpf_list_back)
11051 BTF_ID(func, bpf_list_is_first)
11052 BTF_ID(func, bpf_list_is_last)
11053 BTF_ID(func, bpf_list_empty)
11054 BTF_ID(func, bpf_cast_to_kern_ctx)
11055 BTF_ID(func, bpf_rdonly_cast)
11056 BTF_ID(func, bpf_rcu_read_lock)
11057 BTF_ID(func, bpf_rcu_read_unlock)
11058 BTF_ID(func, bpf_rbtree_remove)
11059 BTF_ID(func, bpf_rbtree_add_impl)
11060 BTF_ID(func, bpf_rbtree_add)
11061 BTF_ID(func, bpf_rbtree_first)
11062 BTF_ID(func, bpf_rbtree_root)
11063 BTF_ID(func, bpf_rbtree_left)
11064 BTF_ID(func, bpf_rbtree_right)
11065 #ifdef CONFIG_NET
11066 BTF_ID(func, bpf_dynptr_from_skb)
11067 BTF_ID(func, bpf_dynptr_from_xdp)
11068 BTF_ID(func, bpf_dynptr_from_skb_meta)
11069 BTF_ID(func, bpf_xdp_pull_data)
11070 #else
11071 BTF_ID_UNUSED
11072 BTF_ID_UNUSED
11073 BTF_ID_UNUSED
11074 BTF_ID_UNUSED
11075 #endif
11076 BTF_ID(func, bpf_dynptr_slice)
11077 BTF_ID(func, bpf_dynptr_slice_rdwr)
11078 BTF_ID(func, bpf_dynptr_clone)
11079 BTF_ID(func, bpf_percpu_obj_new_impl)
11080 BTF_ID(func, bpf_percpu_obj_new)
11081 BTF_ID(func, bpf_percpu_obj_drop_impl)
11082 BTF_ID(func, bpf_percpu_obj_drop)
11083 BTF_ID(func, bpf_throw)
11084 BTF_ID(func, bpf_wq_set_callback)
11085 BTF_ID(func, bpf_preempt_disable)
11086 BTF_ID(func, bpf_preempt_enable)
11087 #ifdef CONFIG_CGROUPS
11088 BTF_ID(func, bpf_iter_css_task_new)
11089 #else
11090 BTF_ID_UNUSED
11091 #endif
11092 #ifdef CONFIG_BPF_EVENTS
11093 BTF_ID(func, bpf_session_cookie)
11094 #else
11095 BTF_ID_UNUSED
11096 #endif
11097 BTF_ID(func, bpf_get_kmem_cache)
11098 BTF_ID(func, bpf_local_irq_save)
11099 BTF_ID(func, bpf_local_irq_restore)
11100 BTF_ID(func, bpf_iter_num_new)
11101 BTF_ID(func, bpf_iter_num_next)
11102 BTF_ID(func, bpf_iter_num_destroy)
11103 #ifdef CONFIG_BPF_LSM
11104 BTF_ID(func, bpf_set_dentry_xattr)
11105 BTF_ID(func, bpf_remove_dentry_xattr)
11106 #else
11107 BTF_ID_UNUSED
11108 BTF_ID_UNUSED
11109 #endif
11110 BTF_ID(func, bpf_res_spin_lock)
11111 BTF_ID(func, bpf_res_spin_unlock)
11112 BTF_ID(func, bpf_res_spin_lock_irqsave)
11113 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11114 BTF_ID(func, bpf_dynptr_from_file)
11115 BTF_ID(func, bpf_dynptr_file_discard)
11116 BTF_ID(func, __bpf_trap)
11117 BTF_ID(func, bpf_task_work_schedule_signal)
11118 BTF_ID(func, bpf_task_work_schedule_resume)
11119 BTF_ID(func, bpf_arena_alloc_pages)
11120 BTF_ID(func, bpf_arena_free_pages)
11121 BTF_ID(func, bpf_arena_reserve_pages)
11122 #ifdef CONFIG_BPF_EVENTS
11123 BTF_ID(func, bpf_session_is_return)
11124 #else
11125 BTF_ID_UNUSED
11126 #endif
11127 BTF_ID(func, bpf_stream_vprintk)
11128 BTF_ID(func, bpf_stream_print_stack)
11129 
11130 static bool is_bpf_obj_new_kfunc(u32 func_id)
11131 {
11132 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11133 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11134 }
11135 
11136 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11137 {
11138 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11139 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11140 }
11141 
11142 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11143 {
11144 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11145 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11146 }
11147 
11148 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11149 {
11150 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11151 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11152 }
11153 
11154 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11155 {
11156 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11157 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11158 }
11159 
11160 static bool is_bpf_list_push_kfunc(u32 func_id)
11161 {
11162 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11163 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11164 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11165 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11166 	       func_id == special_kfunc_list[KF_bpf_list_add];
11167 }
11168 
11169 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11170 {
11171 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11172 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11173 }
11174 
11175 static bool is_task_work_add_kfunc(u32 func_id)
11176 {
11177 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11178 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11179 }
11180 
11181 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11182 {
11183 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11184 		return false;
11185 
11186 	return meta->kfunc_flags & KF_RET_NULL;
11187 }
11188 
11189 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11190 {
11191 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11192 }
11193 
11194 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11195 {
11196 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11197 }
11198 
11199 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11200 {
11201 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11202 }
11203 
11204 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11205 {
11206 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11207 }
11208 
11209 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11210 {
11211 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11212 }
11213 
11214 static enum kfunc_ptr_arg_type
11215 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_func_state *caller,
11216 		       struct bpf_reg_state *regs, struct bpf_kfunc_call_arg_meta *meta,
11217 		       const struct btf_type *t, const struct btf_type *ref_t,
11218 		       const char *ref_tname, const struct btf_param *args,
11219 		       int arg, int nargs, argno_t argno, struct bpf_reg_state *reg)
11220 {
11221 	bool arg_mem_size = false;
11222 
11223 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11224 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11225 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11226 		return KF_ARG_PTR_TO_CTX;
11227 
11228 	if (arg + 1 < nargs &&
11229 	    (is_kfunc_arg_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)) ||
11230 	     is_kfunc_arg_const_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1))))
11231 		arg_mem_size = true;
11232 
11233 	/* In this function, we verify the kfunc's BTF as per the argument type,
11234 	 * leaving the rest of the verification with respect to the register
11235 	 * type to our caller. When a set of conditions hold in the BTF type of
11236 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11237 	 */
11238 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), arg))
11239 		return KF_ARG_PTR_TO_CTX;
11240 
11241 	if (is_kfunc_arg_nullable(meta->btf, &args[arg]) && bpf_register_is_null(reg) &&
11242 	    !arg_mem_size)
11243 		return KF_ARG_PTR_TO_NULL;
11244 
11245 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[arg]))
11246 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11247 
11248 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[arg]))
11249 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11250 
11251 	if (is_kfunc_arg_dynptr(meta->btf, &args[arg]))
11252 		return KF_ARG_PTR_TO_DYNPTR;
11253 
11254 	if (is_kfunc_arg_iter(meta, arg, &args[arg]))
11255 		return KF_ARG_PTR_TO_ITER;
11256 
11257 	if (is_kfunc_arg_list_head(meta->btf, &args[arg]))
11258 		return KF_ARG_PTR_TO_LIST_HEAD;
11259 
11260 	if (is_kfunc_arg_list_node(meta->btf, &args[arg]))
11261 		return KF_ARG_PTR_TO_LIST_NODE;
11262 
11263 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[arg]))
11264 		return KF_ARG_PTR_TO_RB_ROOT;
11265 
11266 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[arg]))
11267 		return KF_ARG_PTR_TO_RB_NODE;
11268 
11269 	if (is_kfunc_arg_const_str(meta->btf, &args[arg]))
11270 		return KF_ARG_PTR_TO_CONST_STR;
11271 
11272 	if (is_kfunc_arg_map(meta->btf, &args[arg]))
11273 		return KF_ARG_PTR_TO_MAP;
11274 
11275 	if (is_kfunc_arg_wq(meta->btf, &args[arg]))
11276 		return KF_ARG_PTR_TO_WORKQUEUE;
11277 
11278 	if (is_kfunc_arg_timer(meta->btf, &args[arg]))
11279 		return KF_ARG_PTR_TO_TIMER;
11280 
11281 	if (is_kfunc_arg_task_work(meta->btf, &args[arg]))
11282 		return KF_ARG_PTR_TO_TASK_WORK;
11283 
11284 	if (is_kfunc_arg_irq_flag(meta->btf, &args[arg]))
11285 		return KF_ARG_PTR_TO_IRQ_FLAG;
11286 
11287 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[arg]))
11288 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11289 
11290 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11291 		if (!btf_type_is_struct(ref_t)) {
11292 			verbose(env, "kernel function %s %s pointer type %s %s is not supported\n",
11293 				meta->func_name, reg_arg_name(env, argno),
11294 				btf_type_str(ref_t), ref_tname);
11295 			return -EINVAL;
11296 		}
11297 		return KF_ARG_PTR_TO_BTF_ID;
11298 	}
11299 
11300 	if (is_kfunc_arg_callback(env, meta->btf, &args[arg]))
11301 		return KF_ARG_PTR_TO_CALLBACK;
11302 
11303 	/* This is the catch all argument type of register types supported by
11304 	 * check_helper_mem_access. However, we only allow when argument type is
11305 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11306 	 * arg_mem_size is true, the pointer can be void *.
11307 	 */
11308 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11309 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11310 		verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n",
11311 			reg_arg_name(env, argno),
11312 			btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11313 		return -EINVAL;
11314 	}
11315 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11316 }
11317 
11318 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11319 					struct bpf_reg_state *reg,
11320 					const struct btf_type *ref_t,
11321 					const char *ref_tname, u32 ref_id,
11322 					struct bpf_kfunc_call_arg_meta *meta,
11323 					int arg, argno_t argno)
11324 {
11325 	const struct btf_type *reg_ref_t;
11326 	bool strict_type_match = false;
11327 	const struct btf *reg_btf;
11328 	const char *reg_ref_tname;
11329 	bool taking_projection;
11330 	bool struct_same;
11331 	u32 reg_ref_id;
11332 
11333 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11334 		reg_btf = reg->btf;
11335 		reg_ref_id = reg->btf_id;
11336 	} else {
11337 		reg_btf = btf_vmlinux;
11338 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11339 	}
11340 
11341 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11342 	 * or releasing a reference, or are no-cast aliases. We do _not_
11343 	 * enforce strict matching for kfuncs by default,
11344 	 * as we want to enable BPF programs to pass types that are bitwise
11345 	 * equivalent without forcing them to explicitly cast with something
11346 	 * like bpf_cast_to_kern_ctx().
11347 	 *
11348 	 * For example, say we had a type like the following:
11349 	 *
11350 	 * struct bpf_cpumask {
11351 	 *	cpumask_t cpumask;
11352 	 *	refcount_t usage;
11353 	 * };
11354 	 *
11355 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11356 	 * to a struct cpumask, so it would be safe to pass a struct
11357 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11358 	 *
11359 	 * The philosophy here is similar to how we allow scalars of different
11360 	 * types to be passed to kfuncs as long as the size is the same. The
11361 	 * only difference here is that we're simply allowing
11362 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11363 	 * resolve types.
11364 	 */
11365 	if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) ||
11366 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11367 		strict_type_match = true;
11368 
11369 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11370 
11371 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11372 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11373 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11374 					   meta->btf, ref_id, strict_type_match);
11375 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11376 	 * actually use it -- it must cast to the underlying type. So we allow
11377 	 * caller to pass in the underlying type.
11378 	 */
11379 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11380 	if (!taking_projection && !struct_same) {
11381 		verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n",
11382 			meta->func_name, reg_arg_name(env, argno),
11383 			btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno),
11384 			btf_type_str(reg_ref_t), reg_ref_tname);
11385 		return -EINVAL;
11386 	}
11387 	return 0;
11388 }
11389 
11390 static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
11391 			     struct bpf_kfunc_call_arg_meta *meta)
11392 {
11393 	int err, spi, kfunc_class = IRQ_NATIVE_KFUNC;
11394 	bool irq_save;
11395 
11396 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11397 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11398 		irq_save = true;
11399 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11400 			kfunc_class = IRQ_LOCK_KFUNC;
11401 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11402 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11403 		irq_save = false;
11404 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11405 			kfunc_class = IRQ_LOCK_KFUNC;
11406 	} else {
11407 		verifier_bug(env, "unknown irq flags kfunc");
11408 		return -EFAULT;
11409 	}
11410 
11411 	if (irq_save) {
11412 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11413 			verbose(env, "expected uninitialized irq flag as %s\n",
11414 				reg_arg_name(env, argno));
11415 			return -EINVAL;
11416 		}
11417 
11418 		err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW,
11419 				       BPF_WRITE, -1, false, false);
11420 		if (err)
11421 			return err;
11422 
11423 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11424 		if (err)
11425 			return err;
11426 	} else {
11427 		err = is_irq_flag_reg_valid_init(env, reg);
11428 		if (err) {
11429 			verbose(env, "expected an initialized irq flag as %s\n",
11430 				reg_arg_name(env, argno));
11431 			return err;
11432 		}
11433 
11434 		spi = irq_flag_get_spi(env, reg);
11435 		if (spi < 0)
11436 			return spi;
11437 
11438 		mark_stack_slots_scratched(env, spi, 1);
11439 
11440 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11441 		if (err)
11442 			return err;
11443 	}
11444 	return 0;
11445 }
11446 
11447 
11448 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11449 {
11450 	struct btf_record *rec = reg_btf_record(reg);
11451 
11452 	if (!env->cur_state->active_locks) {
11453 		verifier_bug(env, "%s w/o active lock", __func__);
11454 		return -EFAULT;
11455 	}
11456 
11457 	if (type_flag(reg->type) & NON_OWN_REF) {
11458 		verifier_bug(env, "NON_OWN_REF already set");
11459 		return -EFAULT;
11460 	}
11461 
11462 	reg->type |= NON_OWN_REF;
11463 	if (rec->refcount_off >= 0)
11464 		reg->type |= MEM_RCU;
11465 
11466 	return 0;
11467 }
11468 
11469 static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id)
11470 {
11471 	struct bpf_func_state *unused;
11472 	struct bpf_reg_state *reg;
11473 
11474 	WARN_ON_ONCE(release_reference_nomark(env->cur_state, id));
11475 
11476 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11477 		if (reg->id == id) {
11478 			reg->id = 0;
11479 			ref_set_non_owning(env, reg);
11480 		}
11481 	}));
11482 
11483 	return;
11484 }
11485 
11486 /* Implementation details:
11487  *
11488  * Each register points to some region of memory, which we define as an
11489  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11490  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11491  * allocation. The lock and the data it protects are colocated in the same
11492  * memory region.
11493  *
11494  * Hence, everytime a register holds a pointer value pointing to such
11495  * allocation, the verifier preserves a unique reg->id for it.
11496  *
11497  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11498  * bpf_spin_lock is called.
11499  *
11500  * To enable this, lock state in the verifier captures two values:
11501  *	active_lock.ptr = Register's type specific pointer
11502  *	active_lock.id  = A unique ID for each register pointer value
11503  *
11504  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11505  * supported register types.
11506  *
11507  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11508  * allocated objects is the reg->btf pointer.
11509  *
11510  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11511  * can establish the provenance of the map value statically for each distinct
11512  * lookup into such maps. They always contain a single map value hence unique
11513  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11514  *
11515  * So, in case of global variables, they use array maps with max_entries = 1,
11516  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11517  * into the same map value as max_entries is 1, as described above).
11518  *
11519  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11520  * outer map pointer (in verifier context), but each lookup into an inner map
11521  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11522  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11523  * will get different reg->id assigned to each lookup, hence different
11524  * active_lock.id.
11525  *
11526  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11527  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11528  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11529  */
11530 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11531 {
11532 	struct bpf_reference_state *s;
11533 	void *ptr;
11534 	u32 id;
11535 
11536 	switch ((int)reg->type) {
11537 	case PTR_TO_MAP_VALUE:
11538 		ptr = reg->map_ptr;
11539 		break;
11540 	case PTR_TO_BTF_ID | MEM_ALLOC:
11541 		ptr = reg->btf;
11542 		break;
11543 	default:
11544 		verifier_bug(env, "unknown reg type for lock check");
11545 		return -EFAULT;
11546 	}
11547 	id = reg->id;
11548 
11549 	if (!env->cur_state->active_locks)
11550 		return -EINVAL;
11551 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11552 	if (!s) {
11553 		verbose(env, "held lock and object are not in the same allocation\n");
11554 		return -EINVAL;
11555 	}
11556 	return 0;
11557 }
11558 
11559 static bool is_bpf_list_api_kfunc(u32 btf_id)
11560 {
11561 	return is_bpf_list_push_kfunc(btf_id) ||
11562 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11563 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11564 	       btf_id == special_kfunc_list[KF_bpf_list_del] ||
11565 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11566 	       btf_id == special_kfunc_list[KF_bpf_list_back] ||
11567 	       btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11568 	       btf_id == special_kfunc_list[KF_bpf_list_is_last] ||
11569 	       btf_id == special_kfunc_list[KF_bpf_list_empty];
11570 }
11571 
11572 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11573 {
11574 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11575 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11576 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11577 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11578 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11579 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11580 }
11581 
11582 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11583 {
11584 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11585 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11586 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11587 }
11588 
11589 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11590 {
11591 	return is_bpf_list_api_kfunc(btf_id) ||
11592 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11593 	       is_bpf_refcount_acquire_kfunc(btf_id);
11594 }
11595 
11596 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11597 {
11598 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11599 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11600 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11601 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11602 }
11603 
11604 static bool is_bpf_arena_kfunc(u32 btf_id)
11605 {
11606 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11607 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11608 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11609 }
11610 
11611 static bool is_bpf_stream_kfunc(u32 btf_id)
11612 {
11613 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11614 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11615 }
11616 
11617 static bool kfunc_spin_allowed(u32 btf_id)
11618 {
11619 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11620 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11621 	       is_bpf_stream_kfunc(btf_id);
11622 }
11623 
11624 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11625 {
11626 	return is_bpf_rbtree_add_kfunc(btf_id);
11627 }
11628 
11629 static bool is_async_callback_calling_kfunc(u32 btf_id)
11630 {
11631 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11632 	       is_task_work_add_kfunc(btf_id);
11633 }
11634 
11635 bool bpf_is_throw_kfunc(struct bpf_insn *insn)
11636 {
11637 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11638 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11639 }
11640 
11641 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11642 {
11643 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11644 }
11645 
11646 static bool is_callback_calling_kfunc(u32 btf_id)
11647 {
11648 	return is_sync_callback_calling_kfunc(btf_id) ||
11649 	       is_async_callback_calling_kfunc(btf_id);
11650 }
11651 
11652 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11653 {
11654 	return is_bpf_rbtree_api_kfunc(btf_id);
11655 }
11656 
11657 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11658 					  enum btf_field_type head_field_type,
11659 					  u32 kfunc_btf_id)
11660 {
11661 	bool ret;
11662 
11663 	switch (head_field_type) {
11664 	case BPF_LIST_HEAD:
11665 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11666 		break;
11667 	case BPF_RB_ROOT:
11668 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11669 		break;
11670 	default:
11671 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11672 			btf_field_type_name(head_field_type));
11673 		return false;
11674 	}
11675 
11676 	if (!ret)
11677 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11678 			btf_field_type_name(head_field_type));
11679 	return ret;
11680 }
11681 
11682 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11683 					  enum btf_field_type node_field_type,
11684 					  u32 kfunc_btf_id)
11685 {
11686 	bool ret;
11687 
11688 	switch (node_field_type) {
11689 	case BPF_LIST_NODE:
11690 		ret = is_bpf_list_push_kfunc(kfunc_btf_id) ||
11691 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
11692 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11693 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_last];
11694 		break;
11695 	case BPF_RB_NODE:
11696 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11697 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11698 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11699 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11700 		break;
11701 	default:
11702 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11703 			btf_field_type_name(node_field_type));
11704 		return false;
11705 	}
11706 
11707 	if (!ret)
11708 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11709 			btf_field_type_name(node_field_type));
11710 	return ret;
11711 }
11712 
11713 static int
11714 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11715 				   struct bpf_reg_state *reg, argno_t argno,
11716 				   struct bpf_kfunc_call_arg_meta *meta,
11717 				   enum btf_field_type head_field_type,
11718 				   struct btf_field **head_field)
11719 {
11720 	const char *head_type_name;
11721 	struct btf_field *field;
11722 	struct btf_record *rec;
11723 	u32 head_off;
11724 
11725 	if (meta->btf != btf_vmlinux) {
11726 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11727 		return -EFAULT;
11728 	}
11729 
11730 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11731 		return -EFAULT;
11732 
11733 	head_type_name = btf_field_type_name(head_field_type);
11734 	if (!tnum_is_const(reg->var_off)) {
11735 		verbose(env,
11736 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11737 			reg_arg_name(env, argno), head_type_name);
11738 		return -EINVAL;
11739 	}
11740 
11741 	rec = reg_btf_record(reg);
11742 	head_off = reg->var_off.value;
11743 	field = btf_record_find(rec, head_off, head_field_type);
11744 	if (!field) {
11745 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11746 		return -EINVAL;
11747 	}
11748 
11749 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11750 	if (check_reg_allocation_locked(env, reg)) {
11751 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11752 			rec->spin_lock_off, head_type_name);
11753 		return -EINVAL;
11754 	}
11755 
11756 	if (*head_field) {
11757 		verifier_bug(env, "repeating %s arg", head_type_name);
11758 		return -EFAULT;
11759 	}
11760 	*head_field = field;
11761 	return 0;
11762 }
11763 
11764 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11765 					   struct bpf_reg_state *reg, argno_t argno,
11766 					   struct bpf_kfunc_call_arg_meta *meta)
11767 {
11768 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD,
11769 							  &meta->arg_list_head.field);
11770 }
11771 
11772 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11773 					     struct bpf_reg_state *reg, argno_t argno,
11774 					     struct bpf_kfunc_call_arg_meta *meta)
11775 {
11776 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT,
11777 							  &meta->arg_rbtree_root.field);
11778 }
11779 
11780 static int
11781 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11782 				   struct bpf_reg_state *reg, argno_t argno,
11783 				   struct bpf_kfunc_call_arg_meta *meta,
11784 				   enum btf_field_type head_field_type,
11785 				   enum btf_field_type node_field_type,
11786 				   struct btf_field **node_field)
11787 {
11788 	const char *node_type_name;
11789 	const struct btf_type *et, *t;
11790 	struct btf_field *field;
11791 	u32 node_off;
11792 
11793 	if (meta->btf != btf_vmlinux) {
11794 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11795 		return -EFAULT;
11796 	}
11797 
11798 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11799 		return -EFAULT;
11800 
11801 	node_type_name = btf_field_type_name(node_field_type);
11802 	if (!tnum_is_const(reg->var_off)) {
11803 		verbose(env,
11804 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11805 			reg_arg_name(env, argno), node_type_name);
11806 		return -EINVAL;
11807 	}
11808 
11809 	node_off = reg->var_off.value;
11810 	field = reg_find_field_offset(reg, node_off, node_field_type);
11811 	if (!field) {
11812 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11813 		return -EINVAL;
11814 	}
11815 
11816 	field = *node_field;
11817 
11818 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11819 	t = btf_type_by_id(reg->btf, reg->btf_id);
11820 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11821 				  field->graph_root.value_btf_id, true)) {
11822 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11823 			"in struct %s, but arg is at offset=%d in struct %s\n",
11824 			btf_field_type_name(head_field_type),
11825 			btf_field_type_name(node_field_type),
11826 			field->graph_root.node_offset,
11827 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11828 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11829 		return -EINVAL;
11830 	}
11831 	meta->arg_btf = reg->btf;
11832 	meta->arg_btf_id = reg->btf_id;
11833 
11834 	if (node_off != field->graph_root.node_offset) {
11835 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11836 			node_off, btf_field_type_name(node_field_type),
11837 			field->graph_root.node_offset,
11838 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11839 		return -EINVAL;
11840 	}
11841 
11842 	return 0;
11843 }
11844 
11845 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11846 					   struct bpf_reg_state *reg, argno_t argno,
11847 					   struct bpf_kfunc_call_arg_meta *meta)
11848 {
11849 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11850 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11851 						  &meta->arg_list_head.field);
11852 }
11853 
11854 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11855 					     struct bpf_reg_state *reg, argno_t argno,
11856 					     struct bpf_kfunc_call_arg_meta *meta)
11857 {
11858 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11859 						  BPF_RB_ROOT, BPF_RB_NODE,
11860 						  &meta->arg_rbtree_root.field);
11861 }
11862 
11863 /*
11864  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11865  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11866  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11867  * them can only be attached to some specific hook points.
11868  */
11869 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11870 {
11871 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11872 
11873 	switch (prog_type) {
11874 	case BPF_PROG_TYPE_LSM:
11875 		return true;
11876 	case BPF_PROG_TYPE_TRACING:
11877 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11878 			return true;
11879 		fallthrough;
11880 	default:
11881 		return in_sleepable(env);
11882 	}
11883 }
11884 
11885 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11886 			    int insn_idx)
11887 {
11888 	const char *func_name = meta->func_name, *ref_tname;
11889 	struct bpf_func_state *caller = cur_func(env);
11890 	struct bpf_reg_state *regs = cur_regs(env);
11891 	const struct btf *btf = meta->btf;
11892 	const struct btf_param *args;
11893 	struct btf_record *rec;
11894 	u32 i, nargs;
11895 	int ret;
11896 
11897 	args = (const struct btf_param *)(meta->func_proto + 1);
11898 	nargs = btf_type_vlen(meta->func_proto);
11899 	if (nargs > MAX_BPF_FUNC_ARGS) {
11900 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11901 			MAX_BPF_FUNC_ARGS);
11902 		return -EINVAL;
11903 	}
11904 	if (nargs > MAX_BPF_FUNC_REG_ARGS && !bpf_jit_supports_stack_args()) {
11905 		verbose(env, "JIT does not support kfunc %s() with %d args\n",
11906 			func_name, nargs);
11907 		return -ENOTSUPP;
11908 	}
11909 
11910 	ret = check_outgoing_stack_args(env, caller, nargs);
11911 	if (ret)
11912 		return ret;
11913 
11914 	/* Check that BTF function arguments match actual types that the
11915 	 * verifier sees.
11916 	 */
11917 	for (i = 0; i < nargs; i++) {
11918 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
11919 		const struct btf_type *t, *ref_t, *resolve_ret;
11920 		enum bpf_arg_type arg_type = ARG_DONTCARE;
11921 		argno_t argno = argno_from_arg(i + 1);
11922 		int regno = reg_from_argno(argno);
11923 		u32 ref_id, type_size;
11924 		bool is_ret_buf_sz = false;
11925 		int kf_arg_type;
11926 
11927 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
11928 			/* Reject repeated use bpf_prog_aux */
11929 			if (meta->arg_prog) {
11930 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
11931 				return -EFAULT;
11932 			}
11933 			if (regno < 0) {
11934 				verbose(env, "%s prog->aux cannot be a stack argument\n",
11935 					reg_arg_name(env, argno));
11936 				return -EINVAL;
11937 			}
11938 			meta->arg_prog = true;
11939 			cur_aux(env)->arg_prog = regno;
11940 			continue;
11941 		}
11942 
11943 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
11944 			continue;
11945 
11946 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11947 
11948 		if (btf_type_is_scalar(t)) {
11949 			if (reg->type != SCALAR_VALUE) {
11950 				verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno));
11951 				return -EINVAL;
11952 			}
11953 
11954 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11955 				if (meta->arg_constant.found) {
11956 					verifier_bug(env, "only one constant argument permitted");
11957 					return -EFAULT;
11958 				}
11959 				if (!tnum_is_const(reg->var_off)) {
11960 					verbose(env, "%s must be a known constant\n",
11961 						reg_arg_name(env, argno));
11962 					return -EINVAL;
11963 				}
11964 				if (regno >= 0)
11965 					ret = mark_chain_precision(env, regno);
11966 				else
11967 					ret = mark_stack_arg_precision(env, i);
11968 				if (ret < 0)
11969 					return ret;
11970 				meta->arg_constant.found = true;
11971 				meta->arg_constant.value = reg->var_off.value;
11972 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
11973 				meta->r0_rdonly = true;
11974 				is_ret_buf_sz = true;
11975 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
11976 				is_ret_buf_sz = true;
11977 			}
11978 
11979 			if (is_ret_buf_sz) {
11980 				if (meta->r0_size) {
11981 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11982 					return -EINVAL;
11983 				}
11984 
11985 				if (!tnum_is_const(reg->var_off)) {
11986 					verbose(env, "%s is not a const\n",
11987 						reg_arg_name(env, argno));
11988 					return -EINVAL;
11989 				}
11990 
11991 				meta->r0_size = reg->var_off.value;
11992 				if (regno >= 0)
11993 					ret = mark_chain_precision(env, regno);
11994 				else
11995 					ret = mark_stack_arg_precision(env, i);
11996 				if (ret)
11997 					return ret;
11998 			}
11999 			continue;
12000 		}
12001 
12002 		if (!btf_type_is_ptr(t)) {
12003 			verbose(env, "Unrecognized %s type %s\n",
12004 				reg_arg_name(env, argno), btf_type_str(t));
12005 			return -EINVAL;
12006 		}
12007 
12008 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12009 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12010 			verbose(env, "Possibly NULL pointer passed to trusted %s\n",
12011 				reg_arg_name(env, argno));
12012 			return -EACCES;
12013 		}
12014 
12015 		if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) &&
12016 		    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
12017 			verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n",
12018 				func_name, reg_arg_name(env, argno));
12019 			return -EINVAL;
12020 		}
12021 
12022 		if (reg_is_referenced(env, reg))
12023 			update_ref_obj(&meta->ref_obj, reg);
12024 
12025 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12026 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12027 
12028 		kf_arg_type = get_kfunc_ptr_arg_type(env, caller, regs, meta, t, ref_t, ref_tname,
12029 						     args, i, nargs, argno, reg);
12030 		if (kf_arg_type < 0)
12031 			return kf_arg_type;
12032 
12033 		switch (kf_arg_type) {
12034 		case KF_ARG_PTR_TO_NULL:
12035 			continue;
12036 		case KF_ARG_PTR_TO_MAP:
12037 			if (!reg->map_ptr) {
12038 				verbose(env, "pointer in %s isn't map pointer\n",
12039 					reg_arg_name(env, argno));
12040 				return -EINVAL;
12041 			}
12042 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12043 					      reg->map_ptr->record->task_work_off >= 0)) {
12044 				/* Use map_uid (which is unique id of inner map) to reject:
12045 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12046 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12047 				 * if (inner_map1 && inner_map2) {
12048 				 *     wq = bpf_map_lookup_elem(inner_map1);
12049 				 *     if (wq)
12050 				 *         // mismatch would have been allowed
12051 				 *         bpf_wq_init(wq, inner_map2);
12052 				 * }
12053 				 *
12054 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12055 				 */
12056 				if (meta->map.ptr != reg->map_ptr ||
12057 				    meta->map.uid != reg->map_uid) {
12058 					if (reg->map_ptr->record->task_work_off >= 0) {
12059 						verbose(env,
12060 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12061 							meta->map.uid, reg->map_uid);
12062 						return -EINVAL;
12063 					}
12064 					verbose(env,
12065 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12066 						meta->map.uid, reg->map_uid);
12067 					return -EINVAL;
12068 				}
12069 			}
12070 			meta->map.ptr = reg->map_ptr;
12071 			meta->map.uid = reg->map_uid;
12072 			fallthrough;
12073 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12074 		case KF_ARG_PTR_TO_BTF_ID:
12075 			if (!is_trusted_reg(env, reg)) {
12076 				if (!is_kfunc_rcu(meta)) {
12077 					verbose(env, "%s must be referenced or trusted\n",
12078 						reg_arg_name(env, argno));
12079 					return -EINVAL;
12080 				}
12081 				if (!is_rcu_reg(reg)) {
12082 					verbose(env, "%s must be a rcu pointer\n",
12083 						reg_arg_name(env, argno));
12084 					return -EINVAL;
12085 				}
12086 			}
12087 			fallthrough;
12088 		case KF_ARG_PTR_TO_ITER:
12089 		case KF_ARG_PTR_TO_LIST_HEAD:
12090 		case KF_ARG_PTR_TO_LIST_NODE:
12091 		case KF_ARG_PTR_TO_RB_ROOT:
12092 		case KF_ARG_PTR_TO_RB_NODE:
12093 		case KF_ARG_PTR_TO_MEM:
12094 		case KF_ARG_PTR_TO_MEM_SIZE:
12095 		case KF_ARG_PTR_TO_CALLBACK:
12096 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12097 		case KF_ARG_PTR_TO_CONST_STR:
12098 		case KF_ARG_PTR_TO_WORKQUEUE:
12099 		case KF_ARG_PTR_TO_TIMER:
12100 		case KF_ARG_PTR_TO_TASK_WORK:
12101 		case KF_ARG_PTR_TO_IRQ_FLAG:
12102 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12103 			break;
12104 		case KF_ARG_PTR_TO_DYNPTR:
12105 			arg_type = ARG_PTR_TO_DYNPTR;
12106 			break;
12107 		case KF_ARG_PTR_TO_CTX:
12108 			arg_type = ARG_PTR_TO_CTX;
12109 			break;
12110 		default:
12111 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12112 			return -EFAULT;
12113 		}
12114 
12115 		if (regno == meta->release_regno)
12116 			arg_type |= OBJ_RELEASE;
12117 		ret = check_func_arg_reg_off(env, reg, argno, arg_type);
12118 		if (ret < 0)
12119 			return ret;
12120 
12121 		switch (kf_arg_type) {
12122 		case KF_ARG_PTR_TO_CTX:
12123 			if (reg->type != PTR_TO_CTX) {
12124 				verbose(env, "%s expected pointer to ctx, but got %s\n",
12125 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
12126 				return -EINVAL;
12127 			}
12128 
12129 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12130 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12131 				if (ret < 0)
12132 					return -EINVAL;
12133 				meta->ret_btf_id  = ret;
12134 			}
12135 			break;
12136 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12137 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12138 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12139 					verbose(env, "%s expected for bpf_obj_drop()\n",
12140 						reg_arg_name(env, argno));
12141 					return -EINVAL;
12142 				}
12143 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12144 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12145 					verbose(env, "%s expected for bpf_percpu_obj_drop()\n",
12146 						reg_arg_name(env, argno));
12147 					return -EINVAL;
12148 				}
12149 			} else {
12150 				verbose(env, "%s expected pointer to allocated object\n",
12151 					reg_arg_name(env, argno));
12152 				return -EINVAL;
12153 			}
12154 			if (!reg_is_referenced(env, reg)) {
12155 				verbose(env, "allocated object must be referenced\n");
12156 				return -EINVAL;
12157 			}
12158 			if (meta->btf == btf_vmlinux) {
12159 				meta->arg_btf = reg->btf;
12160 				meta->arg_btf_id = reg->btf_id;
12161 			}
12162 			break;
12163 		case KF_ARG_PTR_TO_DYNPTR:
12164 		{
12165 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12166 
12167 			if (is_kfunc_arg_uninit(btf, &args[i]))
12168 				dynptr_arg_type |= MEM_UNINIT;
12169 
12170 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12171 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12172 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12173 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12174 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12175 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12176 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12177 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12178 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12179 				dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE;
12180 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12181 				   (dynptr_arg_type & MEM_UNINIT)) {
12182 				enum bpf_dynptr_type parent_type = meta->dynptr.type;
12183 
12184 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12185 					verifier_bug(env, "no dynptr type for parent of clone");
12186 					return -EFAULT;
12187 				}
12188 
12189 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12190 			}
12191 
12192 			ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type,
12193 						  &meta->ref_obj, &meta->dynptr);
12194 			if (ret < 0)
12195 				return ret;
12196 			break;
12197 		}
12198 		case KF_ARG_PTR_TO_ITER:
12199 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12200 				if (!check_css_task_iter_allowlist(env)) {
12201 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12202 					return -EINVAL;
12203 				}
12204 			}
12205 			ret = process_iter_arg(env, reg, argno, insn_idx, meta);
12206 			if (ret < 0)
12207 				return ret;
12208 			break;
12209 		case KF_ARG_PTR_TO_LIST_HEAD:
12210 			if (reg->type != PTR_TO_MAP_VALUE &&
12211 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12212 				verbose(env, "%s expected pointer to map value or allocated object\n",
12213 					reg_arg_name(env, argno));
12214 				return -EINVAL;
12215 			}
12216 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12217 			    !reg_is_referenced(env, reg)) {
12218 				verbose(env, "allocated object must be referenced\n");
12219 				return -EINVAL;
12220 			}
12221 			ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta);
12222 			if (ret < 0)
12223 				return ret;
12224 			break;
12225 		case KF_ARG_PTR_TO_RB_ROOT:
12226 			if (reg->type != PTR_TO_MAP_VALUE &&
12227 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12228 				verbose(env, "%s expected pointer to map value or allocated object\n",
12229 					reg_arg_name(env, argno));
12230 				return -EINVAL;
12231 			}
12232 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12233 			    !reg_is_referenced(env, reg)) {
12234 				verbose(env, "allocated object must be referenced\n");
12235 				return -EINVAL;
12236 			}
12237 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta);
12238 			if (ret < 0)
12239 				return ret;
12240 			break;
12241 		case KF_ARG_PTR_TO_LIST_NODE:
12242 			if (is_kfunc_arg_nonown_allowed(btf, &args[i]) &&
12243 			    type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) {
12244 				/* Allow bpf_list_front/back return value for
12245 				 * __nonown_allowed list-node arguments.
12246 				 */
12247 				goto check_ok;
12248 			}
12249 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12250 				verbose(env, "%s expected pointer to allocated object\n",
12251 					reg_arg_name(env, argno));
12252 				return -EINVAL;
12253 			}
12254 			if (!reg_is_referenced(env, reg)) {
12255 				verbose(env, "allocated object must be referenced\n");
12256 				return -EINVAL;
12257 			}
12258 check_ok:
12259 			ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta);
12260 			if (ret < 0)
12261 				return ret;
12262 			break;
12263 		case KF_ARG_PTR_TO_RB_NODE:
12264 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12265 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12266 					verbose(env, "%s expected pointer to allocated object\n",
12267 						reg_arg_name(env, argno));
12268 					return -EINVAL;
12269 				}
12270 				if (!reg_is_referenced(env, reg)) {
12271 					verbose(env, "allocated object must be referenced\n");
12272 					return -EINVAL;
12273 				}
12274 			} else {
12275 				if (!type_is_non_owning_ref(reg->type) &&
12276 				    !reg_is_referenced(env, reg)) {
12277 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12278 					return -EINVAL;
12279 				}
12280 				if (in_rbtree_lock_required_cb(env)) {
12281 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12282 					return -EINVAL;
12283 				}
12284 			}
12285 
12286 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta);
12287 			if (ret < 0)
12288 				return ret;
12289 			break;
12290 		case KF_ARG_PTR_TO_MAP:
12291 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12292 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12293 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12294 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12295 			fallthrough;
12296 		case KF_ARG_PTR_TO_BTF_ID:
12297 			/* Only base_type is checked, further checks are done here */
12298 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12299 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12300 			    !reg2btf_ids[base_type(reg->type)]) {
12301 				verbose(env, "%s is %s ", reg_arg_name(env, argno),
12302 					reg_type_str(env, reg->type));
12303 				verbose(env, "expected %s or socket\n",
12304 					reg_type_str(env, base_type(reg->type) |
12305 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12306 				return -EINVAL;
12307 			}
12308 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno);
12309 			if (ret < 0)
12310 				return ret;
12311 			break;
12312 		case KF_ARG_PTR_TO_MEM:
12313 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12314 			if (IS_ERR(resolve_ret)) {
12315 				verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n",
12316 					reg_arg_name(env, argno), btf_type_str(ref_t),
12317 					ref_tname, PTR_ERR(resolve_ret));
12318 				return -EINVAL;
12319 			}
12320 			ret = check_mem_reg(env, reg, argno, type_size);
12321 			if (ret < 0)
12322 				return ret;
12323 			break;
12324 		case KF_ARG_PTR_TO_MEM_SIZE:
12325 		{
12326 			struct bpf_reg_state *buff_reg = reg;
12327 			const struct btf_param *buff_arg = &args[i];
12328 			struct bpf_reg_state *size_reg = get_func_arg_reg(caller, regs, i + 1);
12329 			const struct btf_param *size_arg = &args[i + 1];
12330 			argno_t next_argno = argno_from_arg(i + 2);
12331 
12332 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12333 				ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg,
12334 							       argno, next_argno);
12335 				if (ret < 0) {
12336 					verbose(env, "%s and ", reg_arg_name(env, argno));
12337 					verbose(env, "%s memory, len pair leads to invalid memory access\n",
12338 						reg_arg_name(env, next_argno));
12339 					return ret;
12340 				}
12341 			}
12342 
12343 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12344 				if (meta->arg_constant.found) {
12345 					verifier_bug(env, "only one constant argument permitted");
12346 					return -EFAULT;
12347 				}
12348 				if (!tnum_is_const(size_reg->var_off)) {
12349 					verbose(env, "%s must be a known constant\n",
12350 						reg_arg_name(env, next_argno));
12351 					return -EINVAL;
12352 				}
12353 				meta->arg_constant.found = true;
12354 				meta->arg_constant.value = size_reg->var_off.value;
12355 			}
12356 
12357 			/* Skip next '__sz' or '__szk' argument */
12358 			i++;
12359 			break;
12360 		}
12361 		case KF_ARG_PTR_TO_CALLBACK:
12362 			if (reg->type != PTR_TO_FUNC) {
12363 				verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno));
12364 				return -EINVAL;
12365 			}
12366 			meta->subprogno = reg->subprogno;
12367 			break;
12368 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12369 			if (!type_is_ptr_alloc_obj(reg->type)) {
12370 				verbose(env, "%s is neither owning or non-owning ref\n",
12371 					reg_arg_name(env, argno));
12372 				return -EINVAL;
12373 			}
12374 			if (!type_is_non_owning_ref(reg->type))
12375 				meta->arg_owning_ref = true;
12376 
12377 			rec = reg_btf_record(reg);
12378 			if (!rec) {
12379 				verifier_bug(env, "Couldn't find btf_record");
12380 				return -EFAULT;
12381 			}
12382 
12383 			if (rec->refcount_off < 0) {
12384 				verbose(env, "%s doesn't point to a type with bpf_refcount field\n",
12385 					reg_arg_name(env, argno));
12386 				return -EINVAL;
12387 			}
12388 
12389 			meta->arg_btf = reg->btf;
12390 			meta->arg_btf_id = reg->btf_id;
12391 			break;
12392 		case KF_ARG_PTR_TO_CONST_STR:
12393 			if (reg->type != PTR_TO_MAP_VALUE) {
12394 				verbose(env, "%s doesn't point to a const string\n",
12395 					reg_arg_name(env, argno));
12396 				return -EINVAL;
12397 			}
12398 			ret = check_arg_const_str(env, reg, argno);
12399 			if (ret)
12400 				return ret;
12401 			break;
12402 		case KF_ARG_PTR_TO_WORKQUEUE:
12403 			if (reg->type != PTR_TO_MAP_VALUE) {
12404 				verbose(env, "%s doesn't point to a map value\n",
12405 					reg_arg_name(env, argno));
12406 				return -EINVAL;
12407 			}
12408 			ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map);
12409 			if (ret < 0)
12410 				return ret;
12411 			break;
12412 		case KF_ARG_PTR_TO_TIMER:
12413 			if (reg->type != PTR_TO_MAP_VALUE) {
12414 				verbose(env, "%s doesn't point to a map value\n",
12415 					reg_arg_name(env, argno));
12416 				return -EINVAL;
12417 			}
12418 			ret = process_timer_kfunc(env, reg, argno, meta);
12419 			if (ret < 0)
12420 				return ret;
12421 			break;
12422 		case KF_ARG_PTR_TO_TASK_WORK:
12423 			if (reg->type != PTR_TO_MAP_VALUE) {
12424 				verbose(env, "%s doesn't point to a map value\n",
12425 					reg_arg_name(env, argno));
12426 				return -EINVAL;
12427 			}
12428 			ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map);
12429 			if (ret < 0)
12430 				return ret;
12431 			break;
12432 		case KF_ARG_PTR_TO_IRQ_FLAG:
12433 			if (reg->type != PTR_TO_STACK) {
12434 				verbose(env, "%s doesn't point to an irq flag on stack\n",
12435 					reg_arg_name(env, argno));
12436 				return -EINVAL;
12437 			}
12438 			ret = process_irq_flag(env, reg, argno, meta);
12439 			if (ret < 0)
12440 				return ret;
12441 			break;
12442 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12443 		{
12444 			int flags = PROCESS_RES_LOCK;
12445 
12446 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12447 				verbose(env, "%s doesn't point to map value or allocated object\n",
12448 					reg_arg_name(env, argno));
12449 				return -EINVAL;
12450 			}
12451 
12452 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12453 				return -EFAULT;
12454 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12455 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12456 				flags |= PROCESS_SPIN_LOCK;
12457 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12458 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12459 				flags |= PROCESS_LOCK_IRQ;
12460 			ret = process_spin_lock(env, reg, argno, flags);
12461 			if (ret < 0)
12462 				return ret;
12463 			break;
12464 		}
12465 		}
12466 	}
12467 
12468 	return 0;
12469 }
12470 
12471 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12472 			     s32 func_id,
12473 			     s16 offset,
12474 			     struct bpf_kfunc_call_arg_meta *meta)
12475 {
12476 	struct bpf_kfunc_meta kfunc;
12477 	int err;
12478 
12479 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12480 	if (err)
12481 		return err;
12482 
12483 	memset(meta, 0, sizeof(*meta));
12484 	meta->btf = kfunc.btf;
12485 	meta->func_id = kfunc.id;
12486 	meta->func_proto = kfunc.proto;
12487 	meta->func_name = kfunc.name;
12488 
12489 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12490 		return -EACCES;
12491 
12492 	meta->kfunc_flags = *kfunc.flags;
12493 
12494 	/* Only support release referenced argument passed by register */
12495 	if (is_kfunc_release(meta))
12496 		meta->release_regno = BPF_REG_1;
12497 
12498 	return 0;
12499 }
12500 
12501 /*
12502  * Determine how many bytes a helper accesses through a stack pointer at
12503  * argument position @arg (0-based, corresponding to R1-R5).
12504  *
12505  * Returns:
12506  *   > 0   known read access size in bytes
12507  *     0   doesn't read anything directly
12508  * S64_MIN unknown
12509  *   < 0   known write access of (-return) bytes
12510  */
12511 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12512 				  int arg, int insn_idx)
12513 {
12514 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12515 	const struct bpf_func_proto *fn;
12516 	enum bpf_arg_type at;
12517 	s64 size;
12518 
12519 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12520 		return S64_MIN;
12521 
12522 	at = fn->arg_type[arg];
12523 
12524 	switch (base_type(at)) {
12525 	case ARG_PTR_TO_MAP_KEY:
12526 	case ARG_PTR_TO_MAP_VALUE: {
12527 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12528 		u64 val;
12529 		int i, map_reg;
12530 
12531 		for (i = 0; i < arg; i++) {
12532 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12533 				break;
12534 		}
12535 		if (i >= arg)
12536 			goto scan_all_maps;
12537 
12538 		map_reg = BPF_REG_1 + i;
12539 
12540 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12541 			goto scan_all_maps;
12542 
12543 		i = aux->const_reg_vals[map_reg];
12544 		if (i < env->used_map_cnt) {
12545 			size = is_key ? env->used_maps[i]->key_size
12546 				      : env->used_maps[i]->value_size;
12547 			goto out;
12548 		}
12549 scan_all_maps:
12550 		/*
12551 		 * Map pointer is not known at this call site (e.g. different
12552 		 * maps on merged paths).  Conservatively return the largest
12553 		 * key_size or value_size across all maps used by the program.
12554 		 */
12555 		val = 0;
12556 		for (i = 0; i < env->used_map_cnt; i++) {
12557 			struct bpf_map *map = env->used_maps[i];
12558 			u32 sz = is_key ? map->key_size : map->value_size;
12559 
12560 			if (sz > val)
12561 				val = sz;
12562 			if (map->inner_map_meta) {
12563 				sz = is_key ? map->inner_map_meta->key_size
12564 					    : map->inner_map_meta->value_size;
12565 				if (sz > val)
12566 					val = sz;
12567 			}
12568 		}
12569 		if (!val)
12570 			return S64_MIN;
12571 		size = val;
12572 		goto out;
12573 	}
12574 	case ARG_PTR_TO_MEM:
12575 		if (at & MEM_FIXED_SIZE) {
12576 			size = fn->arg_size[arg];
12577 			goto out;
12578 		}
12579 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12580 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12581 			int size_reg = BPF_REG_1 + arg + 1;
12582 
12583 			if (aux->const_reg_mask & BIT(size_reg)) {
12584 				size = (s64)aux->const_reg_vals[size_reg];
12585 				goto out;
12586 			}
12587 			/*
12588 			 * Size arg is const on each path but differs across merged
12589 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12590 			 */
12591 			if (at & MEM_UNINIT)
12592 				return 0;
12593 			return MAX_BPF_STACK;
12594 		}
12595 		return S64_MIN;
12596 	case ARG_PTR_TO_DYNPTR:
12597 		size = BPF_DYNPTR_SIZE;
12598 		break;
12599 	case ARG_PTR_TO_STACK:
12600 		/*
12601 		 * Only used by bpf_calls_callback() helpers. The helper itself
12602 		 * doesn't access stack. The callback subprog does and it's
12603 		 * analyzed separately.
12604 		 */
12605 		return 0;
12606 	default:
12607 		return S64_MIN;
12608 	}
12609 out:
12610 	/*
12611 	 * MEM_UNINIT args are write-only: the helper initializes the
12612 	 * buffer without reading it.
12613 	 */
12614 	if (at & MEM_UNINIT)
12615 		return -size;
12616 	return size;
12617 }
12618 
12619 /*
12620  * Determine how many bytes a kfunc accesses through a stack pointer at
12621  * argument position @arg (0-based, corresponding to R1-R5).
12622  *
12623  * Returns:
12624  *   > 0      known read access size in bytes
12625  *     0      doesn't access memory through that argument (ex: not a pointer)
12626  *   S64_MIN  unknown
12627  *   < 0      known write access of (-return) bytes
12628  */
12629 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12630 				 int arg, int insn_idx)
12631 {
12632 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12633 	struct bpf_kfunc_call_arg_meta meta;
12634 	const struct btf_param *args;
12635 	const struct btf_type *t, *ref_t;
12636 	const struct btf *btf;
12637 	u32 nargs, type_size;
12638 	s64 size;
12639 
12640 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12641 		return S64_MIN;
12642 
12643 	btf = meta.btf;
12644 	args = btf_params(meta.func_proto);
12645 	nargs = btf_type_vlen(meta.func_proto);
12646 	if (arg >= nargs)
12647 		return 0;
12648 
12649 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12650 	if (!btf_type_is_ptr(t))
12651 		return 0;
12652 
12653 	/* dynptr: fixed 16-byte on-stack representation */
12654 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12655 		size = BPF_DYNPTR_SIZE;
12656 		goto out;
12657 	}
12658 
12659 	/* ptr + __sz/__szk pair: size is in the next register */
12660 	if (arg + 1 < nargs &&
12661 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12662 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12663 		int size_reg = BPF_REG_1 + arg + 1;
12664 
12665 		if (aux->const_reg_mask & BIT(size_reg)) {
12666 			size = (s64)aux->const_reg_vals[size_reg];
12667 			goto out;
12668 		}
12669 		return MAX_BPF_STACK;
12670 	}
12671 
12672 	/* fixed-size pointed-to type: resolve via BTF */
12673 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12674 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12675 		size = type_size;
12676 		goto out;
12677 	}
12678 
12679 	return S64_MIN;
12680 out:
12681 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12682 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12683 		return -size;
12684 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12685 		return -size;
12686 	return size;
12687 }
12688 
12689 /* check special kfuncs and return:
12690  *  1  - not fall-through to 'else' branch, continue verification
12691  *  0  - fall-through to 'else' branch
12692  * < 0 - not fall-through to 'else' branch, return error
12693  */
12694 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12695 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12696 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12697 {
12698 	const struct btf_type *ret_t;
12699 	int err = 0;
12700 
12701 	if (meta->btf != btf_vmlinux)
12702 		return 0;
12703 
12704 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12705 		struct btf_struct_meta *struct_meta;
12706 		struct btf *ret_btf;
12707 		u32 ret_btf_id;
12708 
12709 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12710 			return -ENOMEM;
12711 
12712 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12713 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12714 			return -EINVAL;
12715 		}
12716 
12717 		ret_btf = env->prog->aux->btf;
12718 		ret_btf_id = meta->arg_constant.value;
12719 
12720 		/* This may be NULL due to user not supplying a BTF */
12721 		if (!ret_btf) {
12722 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12723 			return -EINVAL;
12724 		}
12725 
12726 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12727 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12728 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12729 			return -EINVAL;
12730 		}
12731 
12732 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12733 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12734 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12735 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12736 				return -EINVAL;
12737 			}
12738 
12739 			if (!bpf_global_percpu_ma_set) {
12740 				mutex_lock(&bpf_percpu_ma_lock);
12741 				if (!bpf_global_percpu_ma_set) {
12742 					/* Charge memory allocated with bpf_global_percpu_ma to
12743 					 * root memcg. The obj_cgroup for root memcg is NULL.
12744 					 */
12745 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12746 					if (!err)
12747 						bpf_global_percpu_ma_set = true;
12748 				}
12749 				mutex_unlock(&bpf_percpu_ma_lock);
12750 				if (err)
12751 					return err;
12752 			}
12753 
12754 			mutex_lock(&bpf_percpu_ma_lock);
12755 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12756 			mutex_unlock(&bpf_percpu_ma_lock);
12757 			if (err)
12758 				return err;
12759 		}
12760 
12761 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12762 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12763 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12764 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12765 				return -EINVAL;
12766 			}
12767 
12768 			if (struct_meta) {
12769 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12770 				return -EINVAL;
12771 			}
12772 		}
12773 
12774 		mark_reg_known_zero(env, regs, BPF_REG_0);
12775 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12776 		regs[BPF_REG_0].btf = ret_btf;
12777 		regs[BPF_REG_0].btf_id = ret_btf_id;
12778 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12779 			regs[BPF_REG_0].type |= MEM_PERCPU;
12780 
12781 		insn_aux->obj_new_size = ret_t->size;
12782 		insn_aux->kptr_struct_meta = struct_meta;
12783 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12784 		mark_reg_known_zero(env, regs, BPF_REG_0);
12785 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12786 		regs[BPF_REG_0].btf = meta->arg_btf;
12787 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12788 
12789 		insn_aux->kptr_struct_meta =
12790 			btf_find_struct_meta(meta->arg_btf,
12791 					     meta->arg_btf_id);
12792 	} else if (is_list_node_type(ptr_type)) {
12793 		struct btf_field *field = meta->arg_list_head.field;
12794 
12795 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12796 	} else if (is_rbtree_node_type(ptr_type)) {
12797 		struct btf_field *field = meta->arg_rbtree_root.field;
12798 
12799 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12800 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12801 		mark_reg_known_zero(env, regs, BPF_REG_0);
12802 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12803 		regs[BPF_REG_0].btf = desc_btf;
12804 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12805 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12806 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12807 		if (!ret_t) {
12808 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12809 				meta->arg_constant.value);
12810 			return -EINVAL;
12811 		} else if (btf_type_is_struct(ret_t)) {
12812 			mark_reg_known_zero(env, regs, BPF_REG_0);
12813 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12814 			regs[BPF_REG_0].btf = desc_btf;
12815 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12816 		} else if (btf_type_is_void(ret_t)) {
12817 			mark_reg_known_zero(env, regs, BPF_REG_0);
12818 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12819 			regs[BPF_REG_0].mem_size = 0;
12820 		} else {
12821 			verbose(env,
12822 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12823 			return -EINVAL;
12824 		}
12825 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12826 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12827 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->dynptr.type);
12828 
12829 		mark_reg_known_zero(env, regs, BPF_REG_0);
12830 
12831 		if (!meta->arg_constant.found) {
12832 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12833 			return -EFAULT;
12834 		}
12835 
12836 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12837 
12838 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12839 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12840 
12841 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12842 			regs[BPF_REG_0].type |= MEM_RDONLY;
12843 		} else {
12844 			/* this will set env->seen_direct_write to true */
12845 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12846 				verbose(env, "the prog does not allow writes to packet data\n");
12847 				return -EINVAL;
12848 			}
12849 		}
12850 
12851 		if (!meta->dynptr.id) {
12852 			verifier_bug(env, "no dynptr id");
12853 			return -EFAULT;
12854 		}
12855 		regs[BPF_REG_0].parent_id = meta->dynptr.id;
12856 	} else {
12857 		return 0;
12858 	}
12859 
12860 	return 1;
12861 }
12862 
12863 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12864 
12865 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12866 			    int *insn_idx_p)
12867 {
12868 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12869 	struct bpf_reg_state *regs = cur_regs(env);
12870 	const char *func_name, *ptr_type_name;
12871 	const struct btf_type *t, *ptr_type;
12872 	struct bpf_kfunc_call_arg_meta meta;
12873 	struct bpf_insn_aux_data *insn_aux;
12874 	int err, insn_idx = *insn_idx_p;
12875 	const struct btf_param *args;
12876 	u32 i, nargs, ptr_type_id;
12877 	struct btf *desc_btf;
12878 	int id;
12879 
12880 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12881 	if (!insn->imm)
12882 		return 0;
12883 
12884 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12885 	if (err == -EACCES && meta.func_name)
12886 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12887 	if (err)
12888 		return err;
12889 	desc_btf = meta.btf;
12890 	func_name = meta.func_name;
12891 	insn_aux = &env->insn_aux_data[insn_idx];
12892 
12893 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
12894 
12895 	if (!insn->off &&
12896 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
12897 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
12898 		struct bpf_verifier_state *branch;
12899 		struct bpf_reg_state *regs;
12900 
12901 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
12902 		if (IS_ERR(branch)) {
12903 			verbose(env, "failed to push state for failed lock acquisition\n");
12904 			return PTR_ERR(branch);
12905 		}
12906 
12907 		regs = branch->frame[branch->curframe]->regs;
12908 
12909 		/* Clear r0-r5 registers in forked state */
12910 		for (i = 0; i < CALLER_SAVED_REGS; i++)
12911 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
12912 
12913 		mark_reg_unknown(env, regs, BPF_REG_0);
12914 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
12915 		if (err) {
12916 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
12917 			return err;
12918 		}
12919 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
12920 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
12921 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
12922 		return -EFAULT;
12923 	}
12924 
12925 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
12926 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12927 		return -EACCES;
12928 	}
12929 
12930 	sleepable = bpf_is_kfunc_sleepable(&meta);
12931 	if (sleepable && !in_sleepable(env)) {
12932 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
12933 		return -EACCES;
12934 	}
12935 
12936 	/* Track non-sleepable context for kfuncs, same as for helpers. */
12937 	if (!in_sleepable_context(env))
12938 		insn_aux->non_sleepable = true;
12939 
12940 	/* Check the arguments */
12941 	err = check_kfunc_args(env, &meta, insn_idx);
12942 	if (err < 0)
12943 		return err;
12944 
12945 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
12946 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12947 					 set_rbtree_add_callback_state);
12948 		if (err) {
12949 			verbose(env, "kfunc %s#%d failed callback verification\n",
12950 				func_name, meta.func_id);
12951 			return err;
12952 		}
12953 	}
12954 
12955 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
12956 		meta.r0_size = sizeof(u64);
12957 		meta.r0_rdonly = false;
12958 	}
12959 
12960 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
12961 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12962 					 set_timer_callback_state);
12963 		if (err) {
12964 			verbose(env, "kfunc %s#%d failed callback verification\n",
12965 				func_name, meta.func_id);
12966 			return err;
12967 		}
12968 	}
12969 
12970 	if (is_task_work_add_kfunc(meta.func_id)) {
12971 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12972 					 set_task_work_schedule_callback_state);
12973 		if (err) {
12974 			verbose(env, "kfunc %s#%d failed callback verification\n",
12975 				func_name, meta.func_id);
12976 			return err;
12977 		}
12978 	}
12979 
12980 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
12981 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
12982 
12983 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
12984 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
12985 
12986 	if (rcu_lock) {
12987 		env->cur_state->active_rcu_locks++;
12988 	} else if (rcu_unlock) {
12989 		if (env->cur_state->active_rcu_locks == 0) {
12990 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
12991 			return -EINVAL;
12992 		}
12993 		if (--env->cur_state->active_rcu_locks == 0)
12994 			invalidate_rcu_protected_refs(env);
12995 	} else if (preempt_disable) {
12996 		env->cur_state->active_preempt_locks++;
12997 	} else if (preempt_enable) {
12998 		if (env->cur_state->active_preempt_locks == 0) {
12999 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13000 			return -EINVAL;
13001 		}
13002 		env->cur_state->active_preempt_locks--;
13003 	}
13004 
13005 	if (sleepable && !in_sleepable_context(env)) {
13006 		verbose(env, "kernel func %s is sleepable within %s\n",
13007 			func_name, non_sleepable_context_description(env));
13008 		return -EACCES;
13009 	}
13010 
13011 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13012 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13013 		return -EACCES;
13014 	}
13015 
13016 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13017 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13018 		return -EACCES;
13019 	}
13020 
13021 	/* In case of release function, we get register number of refcounted
13022 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13023 	 */
13024 	if (meta.release_regno) {
13025 		err = release_reg(env, &regs[meta.release_regno], false, !!meta.dynptr.id);
13026 		if (err)
13027 			return err;
13028 	}
13029 
13030 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13031 		id = regs[BPF_REG_2].id;
13032 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13033 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13034 		ref_convert_owning_non_owning(env, id);
13035 	}
13036 
13037 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13038 		if (!bpf_jit_supports_exceptions()) {
13039 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13040 				func_name, meta.func_id);
13041 			return -ENOTSUPP;
13042 		}
13043 		env->seen_exception = true;
13044 
13045 		/* In the case of the default callback, the cookie value passed
13046 		 * to bpf_throw becomes the return value of the program.
13047 		 */
13048 		if (!env->exception_callback_subprog) {
13049 			err = check_return_code(env, BPF_REG_1, "R1");
13050 			if (err < 0)
13051 				return err;
13052 		}
13053 	}
13054 
13055 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13056 		u32 regno = caller_saved[i];
13057 
13058 		bpf_mark_reg_not_init(env, &regs[regno]);
13059 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13060 	}
13061 	invalidate_outgoing_stack_args(env, cur_func(env));
13062 
13063 	/* Check return type */
13064 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13065 
13066 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13067 		if (meta.btf != btf_vmlinux ||
13068 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13069 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13070 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13071 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13072 			return -EINVAL;
13073 		}
13074 	}
13075 
13076 	if (btf_type_is_scalar(t)) {
13077 		mark_reg_unknown(env, regs, BPF_REG_0);
13078 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13079 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13080 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13081 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13082 	} else if (btf_type_is_ptr(t)) {
13083 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13084 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13085 		if (err) {
13086 			if (err < 0)
13087 				return err;
13088 		} else if (btf_type_is_void(ptr_type)) {
13089 			/* kfunc returning 'void *' is equivalent to returning scalar */
13090 			mark_reg_unknown(env, regs, BPF_REG_0);
13091 		} else if (!__btf_type_is_struct(ptr_type)) {
13092 			if (!meta.r0_size) {
13093 				__u32 sz;
13094 
13095 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13096 					meta.r0_size = sz;
13097 					meta.r0_rdonly = true;
13098 				}
13099 			}
13100 			if (!meta.r0_size) {
13101 				ptr_type_name = btf_name_by_offset(desc_btf,
13102 								   ptr_type->name_off);
13103 				verbose(env,
13104 					"kernel function %s returns pointer type %s %s is not supported\n",
13105 					func_name,
13106 					btf_type_str(ptr_type),
13107 					ptr_type_name);
13108 				return -EINVAL;
13109 			}
13110 
13111 			mark_reg_known_zero(env, regs, BPF_REG_0);
13112 			regs[BPF_REG_0].type = PTR_TO_MEM;
13113 			regs[BPF_REG_0].mem_size = meta.r0_size;
13114 
13115 			if (meta.r0_rdonly)
13116 				regs[BPF_REG_0].type |= MEM_RDONLY;
13117 
13118 			/* Ensures we don't access the memory after a release_reference() */
13119 			if (meta.ref_obj.id) {
13120 				err = validate_ref_obj(env, &meta.ref_obj);
13121 				if (err)
13122 					return err;
13123 				regs[BPF_REG_0].parent_id = meta.ref_obj.id;
13124 			}
13125 
13126 			if (is_kfunc_rcu_protected(&meta))
13127 				regs[BPF_REG_0].type |= MEM_RCU;
13128 		} else {
13129 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13130 
13131 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13132 				type |= PTR_UNTRUSTED;
13133 			else if (is_kfunc_rcu_protected(&meta) ||
13134 				 (bpf_is_iter_next_kfunc(&meta) &&
13135 				  (get_iter_from_state(env->cur_state, &meta)
13136 					   ->type & MEM_RCU))) {
13137 				/*
13138 				 * If the iterator's constructor (the _new
13139 				 * function e.g., bpf_iter_task_new) has been
13140 				 * annotated with BPF kfunc flag
13141 				 * KF_RCU_PROTECTED and was called within a RCU
13142 				 * read-side critical section, also propagate
13143 				 * the MEM_RCU flag to the pointer returned from
13144 				 * the iterator's next function (e.g.,
13145 				 * bpf_iter_task_next).
13146 				 */
13147 				type |= MEM_RCU;
13148 			} else {
13149 				/*
13150 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13151 				 * kfunc should by default be treated as
13152 				 * implicitly trusted.
13153 				 */
13154 				type |= PTR_TRUSTED;
13155 			}
13156 
13157 			mark_reg_known_zero(env, regs, BPF_REG_0);
13158 			regs[BPF_REG_0].btf = desc_btf;
13159 			regs[BPF_REG_0].type = type;
13160 			regs[BPF_REG_0].btf_id = ptr_type_id;
13161 		}
13162 
13163 		if (is_kfunc_ret_null(&meta)) {
13164 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13165 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13166 			regs[BPF_REG_0].id = ++env->id_gen;
13167 		}
13168 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13169 		if (is_kfunc_acquire(&meta)) {
13170 			id = acquire_reference(env, insn_idx, 0);
13171 			if (id < 0)
13172 				return id;
13173 			regs[BPF_REG_0].id = id;
13174 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13175 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13176 		}
13177 
13178 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13179 			regs[BPF_REG_0].id = ++env->id_gen;
13180 	} else if (btf_type_is_void(t)) {
13181 		if (meta.btf == btf_vmlinux) {
13182 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13183 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13184 				insn_aux->kptr_struct_meta =
13185 					btf_find_struct_meta(meta.arg_btf,
13186 							     meta.arg_btf_id);
13187 			}
13188 		}
13189 	}
13190 
13191 	if (bpf_is_kfunc_pkt_changing(&meta))
13192 		clear_all_pkt_pointers(env);
13193 
13194 	nargs = btf_type_vlen(meta.func_proto);
13195 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
13196 		struct bpf_func_state *caller = cur_func(env);
13197 		struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno];
13198 		u16 out_stack_arg_cnt = nargs - MAX_BPF_FUNC_REG_ARGS;
13199 		u16 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + out_stack_arg_cnt;
13200 
13201 		if (stack_arg_cnt > caller_info->stack_arg_cnt)
13202 			caller_info->stack_arg_cnt = stack_arg_cnt;
13203 	}
13204 
13205 	args = (const struct btf_param *)(meta.func_proto + 1);
13206 	for (i = 0; i < min_t(int, nargs, MAX_BPF_FUNC_REG_ARGS); i++) {
13207 		u32 regno = i + 1;
13208 
13209 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13210 		if (btf_type_is_ptr(t))
13211 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13212 		else
13213 			/* scalar. ensured by check_kfunc_args() */
13214 			mark_btf_func_reg_size(env, regno, t->size);
13215 	}
13216 
13217 	if (bpf_is_iter_next_kfunc(&meta)) {
13218 		err = process_iter_next_call(env, insn_idx, &meta);
13219 		if (err)
13220 			return err;
13221 	}
13222 
13223 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13224 		env->prog->call_session_cookie = true;
13225 
13226 	if (bpf_is_throw_kfunc(insn))
13227 		return process_bpf_exit_full(env, NULL, true);
13228 
13229 	return 0;
13230 }
13231 
13232 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13233 					 const struct bpf_reg_state *reg,
13234 					 enum bpf_reg_type type)
13235 {
13236 	bool known = tnum_is_const(reg->var_off);
13237 	s64 val = reg->var_off.value;
13238 	s64 smin = reg_smin(reg);
13239 
13240 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13241 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13242 			reg_type_str(env, type), val);
13243 		return false;
13244 	}
13245 
13246 	if (smin == S64_MIN) {
13247 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13248 			reg_type_str(env, type));
13249 		return false;
13250 	}
13251 
13252 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13253 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13254 			smin, reg_type_str(env, type));
13255 		return false;
13256 	}
13257 
13258 	return true;
13259 }
13260 
13261 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13262 				      const struct bpf_reg_state *reg,
13263 				      enum bpf_reg_type type)
13264 {
13265 	bool known = tnum_is_const(reg->var_off);
13266 	s64 val = reg->var_off.value;
13267 	s64 smin = reg_smin(reg);
13268 
13269 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13270 		verbose(env, "%s pointer offset %lld is not allowed\n",
13271 			reg_type_str(env, type), val);
13272 		return false;
13273 	}
13274 
13275 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13276 		verbose(env, "%s pointer offset %lld is not allowed\n",
13277 			reg_type_str(env, type), smin);
13278 		return false;
13279 	}
13280 
13281 	return true;
13282 }
13283 
13284 enum {
13285 	REASON_BOUNDS	= -1,
13286 	REASON_TYPE	= -2,
13287 	REASON_PATHS	= -3,
13288 	REASON_LIMIT	= -4,
13289 	REASON_STACK	= -5,
13290 };
13291 
13292 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13293 			      u32 *alu_limit, bool mask_to_left)
13294 {
13295 	u32 max = 0, ptr_limit = 0;
13296 
13297 	switch (ptr_reg->type) {
13298 	case PTR_TO_STACK:
13299 		/* Offset 0 is out-of-bounds, but acceptable start for the
13300 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13301 		 * offset where we would need to deal with min/max bounds is
13302 		 * currently prohibited for unprivileged.
13303 		 */
13304 		max = MAX_BPF_STACK + mask_to_left;
13305 		ptr_limit = -ptr_reg->var_off.value;
13306 		break;
13307 	case PTR_TO_MAP_VALUE:
13308 		max = ptr_reg->map_ptr->value_size;
13309 		ptr_limit = mask_to_left ? reg_smin(ptr_reg) : reg_umax(ptr_reg);
13310 		break;
13311 	default:
13312 		return REASON_TYPE;
13313 	}
13314 
13315 	if (ptr_limit >= max)
13316 		return REASON_LIMIT;
13317 	*alu_limit = ptr_limit;
13318 	return 0;
13319 }
13320 
13321 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13322 				    const struct bpf_insn *insn)
13323 {
13324 	return env->bypass_spec_v1 ||
13325 		BPF_SRC(insn->code) == BPF_K ||
13326 		cur_aux(env)->nospec;
13327 }
13328 
13329 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13330 				       u32 alu_state, u32 alu_limit)
13331 {
13332 	/* If we arrived here from different branches with different
13333 	 * state or limits to sanitize, then this won't work.
13334 	 */
13335 	if (aux->alu_state &&
13336 	    (aux->alu_state != alu_state ||
13337 	     aux->alu_limit != alu_limit))
13338 		return REASON_PATHS;
13339 
13340 	/* Corresponding fixup done in do_misc_fixups(). */
13341 	aux->alu_state = alu_state;
13342 	aux->alu_limit = alu_limit;
13343 	return 0;
13344 }
13345 
13346 static int sanitize_val_alu(struct bpf_verifier_env *env,
13347 			    struct bpf_insn *insn)
13348 {
13349 	struct bpf_insn_aux_data *aux = cur_aux(env);
13350 
13351 	if (can_skip_alu_sanitation(env, insn))
13352 		return 0;
13353 
13354 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13355 }
13356 
13357 static bool sanitize_needed(u8 opcode)
13358 {
13359 	return opcode == BPF_ADD || opcode == BPF_SUB;
13360 }
13361 
13362 struct bpf_sanitize_info {
13363 	struct bpf_insn_aux_data aux;
13364 	bool mask_to_left;
13365 };
13366 
13367 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13368 				     const struct bpf_insn *insn,
13369 				     u32 next_idx, u32 curr_idx)
13370 {
13371 	struct bpf_verifier_state *branch;
13372 	struct bpf_reg_state *regs;
13373 
13374 	branch = push_stack(env, next_idx, curr_idx, true);
13375 	if (!IS_ERR(branch) && insn) {
13376 		regs = branch->frame[branch->curframe]->regs;
13377 		if (BPF_SRC(insn->code) == BPF_K) {
13378 			mark_reg_unknown(env, regs, insn->dst_reg);
13379 		} else if (BPF_SRC(insn->code) == BPF_X) {
13380 			mark_reg_unknown(env, regs, insn->dst_reg);
13381 			mark_reg_unknown(env, regs, insn->src_reg);
13382 		}
13383 	}
13384 	return PTR_ERR_OR_ZERO(branch);
13385 }
13386 
13387 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13388 			    struct bpf_insn *insn,
13389 			    const struct bpf_reg_state *ptr_reg,
13390 			    const struct bpf_reg_state *off_reg,
13391 			    struct bpf_reg_state *dst_reg,
13392 			    struct bpf_sanitize_info *info,
13393 			    const bool commit_window)
13394 {
13395 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13396 	struct bpf_verifier_state *vstate = env->cur_state;
13397 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13398 	bool off_is_neg = reg_smin(off_reg) < 0;
13399 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13400 	u8 opcode = BPF_OP(insn->code);
13401 	u32 alu_state, alu_limit;
13402 	struct bpf_reg_state tmp;
13403 	int err;
13404 
13405 	if (can_skip_alu_sanitation(env, insn))
13406 		return 0;
13407 
13408 	/* We already marked aux for masking from non-speculative
13409 	 * paths, thus we got here in the first place. We only care
13410 	 * to explore bad access from here.
13411 	 */
13412 	if (vstate->speculative)
13413 		goto do_sim;
13414 
13415 	if (!commit_window) {
13416 		if (!tnum_is_const(off_reg->var_off) &&
13417 		    (reg_smin(off_reg) < 0) != (reg_smax(off_reg) < 0))
13418 			return REASON_BOUNDS;
13419 
13420 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13421 				     (opcode == BPF_SUB && !off_is_neg);
13422 	}
13423 
13424 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13425 	if (err < 0)
13426 		return err;
13427 
13428 	if (commit_window) {
13429 		/* In commit phase we narrow the masking window based on
13430 		 * the observed pointer move after the simulated operation.
13431 		 */
13432 		alu_state = info->aux.alu_state;
13433 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13434 	} else {
13435 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13436 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13437 		alu_state |= ptr_is_dst_reg ?
13438 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13439 
13440 		/* Limit pruning on unknown scalars to enable deep search for
13441 		 * potential masking differences from other program paths.
13442 		 */
13443 		if (!off_is_imm)
13444 			env->explore_alu_limits = true;
13445 	}
13446 
13447 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13448 	if (err < 0)
13449 		return err;
13450 do_sim:
13451 	/* If we're in commit phase, we're done here given we already
13452 	 * pushed the truncated dst_reg into the speculative verification
13453 	 * stack.
13454 	 *
13455 	 * Also, when register is a known constant, we rewrite register-based
13456 	 * operation to immediate-based, and thus do not need masking (and as
13457 	 * a consequence, do not need to simulate the zero-truncation either).
13458 	 */
13459 	if (commit_window || off_is_imm)
13460 		return 0;
13461 
13462 	/* Simulate and find potential out-of-bounds access under
13463 	 * speculative execution from truncation as a result of
13464 	 * masking when off was not within expected range. If off
13465 	 * sits in dst, then we temporarily need to move ptr there
13466 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13467 	 * for cases where we use K-based arithmetic in one direction
13468 	 * and truncated reg-based in the other in order to explore
13469 	 * bad access.
13470 	 */
13471 	if (!ptr_is_dst_reg) {
13472 		tmp = *dst_reg;
13473 		*dst_reg = *ptr_reg;
13474 	}
13475 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13476 	if (err < 0)
13477 		return REASON_STACK;
13478 	if (!ptr_is_dst_reg)
13479 		*dst_reg = tmp;
13480 	return 0;
13481 }
13482 
13483 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13484 {
13485 	struct bpf_verifier_state *vstate = env->cur_state;
13486 
13487 	/* If we simulate paths under speculation, we don't update the
13488 	 * insn as 'seen' such that when we verify unreachable paths in
13489 	 * the non-speculative domain, sanitize_dead_code() can still
13490 	 * rewrite/sanitize them.
13491 	 */
13492 	if (!vstate->speculative)
13493 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13494 }
13495 
13496 static int sanitize_err(struct bpf_verifier_env *env,
13497 			const struct bpf_insn *insn, int reason,
13498 			const struct bpf_reg_state *off_reg,
13499 			const struct bpf_reg_state *dst_reg)
13500 {
13501 	static const char *err = "pointer arithmetic with it prohibited for !root";
13502 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13503 	u32 dst = insn->dst_reg, src = insn->src_reg;
13504 
13505 	switch (reason) {
13506 	case REASON_BOUNDS:
13507 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13508 			off_reg == dst_reg ? dst : src, err);
13509 		break;
13510 	case REASON_TYPE:
13511 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13512 			off_reg == dst_reg ? src : dst, err);
13513 		break;
13514 	case REASON_PATHS:
13515 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13516 			dst, op, err);
13517 		break;
13518 	case REASON_LIMIT:
13519 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13520 			dst, op, err);
13521 		break;
13522 	case REASON_STACK:
13523 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13524 			dst, err);
13525 		return -ENOMEM;
13526 	default:
13527 		verifier_bug(env, "unknown reason (%d)", reason);
13528 		break;
13529 	}
13530 
13531 	return -EACCES;
13532 }
13533 
13534 /* check that stack access falls within stack limits and that 'reg' doesn't
13535  * have a variable offset.
13536  *
13537  * Variable offset is prohibited for unprivileged mode for simplicity since it
13538  * requires corresponding support in Spectre masking for stack ALU.  See also
13539  * retrieve_ptr_limit().
13540  */
13541 static int check_stack_access_for_ptr_arithmetic(
13542 				struct bpf_verifier_env *env,
13543 				int regno,
13544 				const struct bpf_reg_state *reg,
13545 				int off)
13546 {
13547 	if (!tnum_is_const(reg->var_off)) {
13548 		char tn_buf[48];
13549 
13550 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13551 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13552 			regno, tn_buf, off);
13553 		return -EACCES;
13554 	}
13555 
13556 	if (off >= 0 || off < -MAX_BPF_STACK) {
13557 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13558 			"prohibited for !root; off=%d\n", regno, off);
13559 		return -EACCES;
13560 	}
13561 
13562 	return 0;
13563 }
13564 
13565 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13566 				 const struct bpf_insn *insn,
13567 				 struct bpf_reg_state *dst_reg)
13568 {
13569 	u32 dst = insn->dst_reg;
13570 
13571 	/* For unprivileged we require that resulting offset must be in bounds
13572 	 * in order to be able to sanitize access later on.
13573 	 */
13574 	if (env->bypass_spec_v1)
13575 		return 0;
13576 
13577 	switch (dst_reg->type) {
13578 	case PTR_TO_STACK:
13579 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13580 							  dst_reg->var_off.value))
13581 			return -EACCES;
13582 		break;
13583 	case PTR_TO_MAP_VALUE:
13584 		if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) {
13585 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13586 				"prohibited for !root\n", dst);
13587 			return -EACCES;
13588 		}
13589 		break;
13590 	default:
13591 		return -EOPNOTSUPP;
13592 	}
13593 
13594 	return 0;
13595 }
13596 
13597 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13598  * Caller should also handle BPF_MOV case separately.
13599  * If we return -EACCES, caller may want to try again treating pointer as a
13600  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13601  */
13602 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13603 				   struct bpf_insn *insn,
13604 				   const struct bpf_reg_state *ptr_reg,
13605 				   const struct bpf_reg_state *off_reg)
13606 {
13607 	struct bpf_verifier_state *vstate = env->cur_state;
13608 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13609 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13610 	bool known = tnum_is_const(off_reg->var_off);
13611 	s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg);
13612 	u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg);
13613 	struct bpf_sanitize_info info = {};
13614 	u8 opcode = BPF_OP(insn->code);
13615 	u32 dst = insn->dst_reg;
13616 	int ret, bounds_ret;
13617 
13618 	dst_reg = &regs[dst];
13619 
13620 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13621 	    smin_val > smax_val || umin_val > umax_val) {
13622 		/* Taint dst register if offset had invalid bounds derived from
13623 		 * e.g. dead branches.
13624 		 */
13625 		__mark_reg_unknown(env, dst_reg);
13626 		return 0;
13627 	}
13628 
13629 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13630 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13631 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13632 			__mark_reg_unknown(env, dst_reg);
13633 			return 0;
13634 		}
13635 
13636 		verbose(env,
13637 			"R%d 32-bit pointer arithmetic prohibited\n",
13638 			dst);
13639 		return -EACCES;
13640 	}
13641 
13642 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13643 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13644 			dst, reg_type_str(env, ptr_reg->type));
13645 		return -EACCES;
13646 	}
13647 
13648 	/*
13649 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13650 	 * instructions, hence no need to track offsets.
13651 	 */
13652 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13653 		return 0;
13654 
13655 	switch (base_type(ptr_reg->type)) {
13656 	case PTR_TO_CTX:
13657 	case PTR_TO_MAP_VALUE:
13658 	case PTR_TO_MAP_KEY:
13659 	case PTR_TO_STACK:
13660 	case PTR_TO_PACKET_META:
13661 	case PTR_TO_PACKET:
13662 	case PTR_TO_TP_BUFFER:
13663 	case PTR_TO_BTF_ID:
13664 	case PTR_TO_MEM:
13665 	case PTR_TO_BUF:
13666 	case PTR_TO_FUNC:
13667 	case CONST_PTR_TO_DYNPTR:
13668 		break;
13669 	case PTR_TO_FLOW_KEYS:
13670 		if (known)
13671 			break;
13672 		fallthrough;
13673 	case CONST_PTR_TO_MAP:
13674 		/* smin_val represents the known value */
13675 		if (known && smin_val == 0 && opcode == BPF_ADD)
13676 			break;
13677 		fallthrough;
13678 	default:
13679 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13680 			dst, reg_type_str(env, ptr_reg->type));
13681 		return -EACCES;
13682 	}
13683 
13684 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13685 	 * The id may be overwritten later if we create a new variable offset.
13686 	 */
13687 	dst_reg->type = ptr_reg->type;
13688 	dst_reg->id = ptr_reg->id;
13689 
13690 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13691 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13692 		return -EINVAL;
13693 
13694 	/* pointer types do not carry 32-bit bounds at the moment. */
13695 	__mark_reg32_unbounded(dst_reg);
13696 
13697 	if (sanitize_needed(opcode)) {
13698 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13699 				       &info, false);
13700 		if (ret < 0)
13701 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13702 	}
13703 
13704 	switch (opcode) {
13705 	case BPF_ADD:
13706 		/*
13707 		 * dst_reg gets the pointer type and since some positive
13708 		 * integer value was added to the pointer, give it a new 'id'
13709 		 * if it's a PTR_TO_PACKET.
13710 		 * this creates a new 'base' pointer, off_reg (variable) gets
13711 		 * added into the variable offset, and we copy the fixed offset
13712 		 * from ptr_reg.
13713 		 */
13714 		dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64);
13715 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13716 		dst_reg->raw = ptr_reg->raw;
13717 		if (reg_is_pkt_pointer(ptr_reg)) {
13718 			if (!known)
13719 				dst_reg->id = ++env->id_gen;
13720 			/*
13721 			 * Clear range for unknown addends since we can't know
13722 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13723 			 * BEYOND_PKT_END from prior comparison as any pointer
13724 			 * arithmetic invalidates them.
13725 			 */
13726 			if (!known || dst_reg->range < 0)
13727 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13728 		}
13729 		break;
13730 	case BPF_SUB:
13731 		if (dst_reg == off_reg) {
13732 			/* scalar -= pointer.  Creates an unknown scalar */
13733 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13734 				dst);
13735 			return -EACCES;
13736 		}
13737 		/* We don't allow subtraction from FP, because (according to
13738 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13739 		 * be able to deal with it.
13740 		 */
13741 		if (ptr_reg->type == PTR_TO_STACK) {
13742 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13743 				dst);
13744 			return -EACCES;
13745 		}
13746 		dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64));
13747 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13748 		dst_reg->raw = ptr_reg->raw;
13749 		if (reg_is_pkt_pointer(ptr_reg)) {
13750 			if (!known)
13751 				dst_reg->id = ++env->id_gen;
13752 			/*
13753 			 * Clear range if the subtrahend may be negative since
13754 			 * pkt pointer could move past its bounds. A positive
13755 			 * subtrahend moves it backwards keeping positive range
13756 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13757 			 * prior comparison as arithmetic invalidates them.
13758 			 */
13759 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13760 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13761 		}
13762 		break;
13763 	case BPF_AND:
13764 	case BPF_OR:
13765 	case BPF_XOR:
13766 		/* bitwise ops on pointers are troublesome, prohibit. */
13767 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13768 			dst, bpf_alu_string[opcode >> 4]);
13769 		return -EACCES;
13770 	default:
13771 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13772 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13773 			dst, bpf_alu_string[opcode >> 4]);
13774 		return -EACCES;
13775 	}
13776 
13777 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13778 		return -EINVAL;
13779 	reg_bounds_sync(dst_reg);
13780 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13781 	if (bounds_ret == -EACCES)
13782 		return bounds_ret;
13783 	if (sanitize_needed(opcode)) {
13784 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13785 				       &info, true);
13786 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13787 				    && !env->cur_state->speculative
13788 				    && bounds_ret
13789 				    && !ret,
13790 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13791 			return -EFAULT;
13792 		}
13793 		if (ret < 0)
13794 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13795 	}
13796 
13797 	return 0;
13798 }
13799 
13800 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13801 				 struct bpf_reg_state *src_reg)
13802 {
13803 	dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32);
13804 }
13805 
13806 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13807 			       struct bpf_reg_state *src_reg)
13808 {
13809 	dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64);
13810 }
13811 
13812 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13813 				 struct bpf_reg_state *src_reg)
13814 {
13815 	dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32));
13816 }
13817 
13818 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13819 			       struct bpf_reg_state *src_reg)
13820 {
13821 	dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64));
13822 }
13823 
13824 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13825 				 struct bpf_reg_state *src_reg)
13826 {
13827 	s32 smin = reg_s32_min(dst_reg);
13828 	s32 smax = reg_s32_max(dst_reg);
13829 	u32 umin = reg_u32_min(dst_reg);
13830 	u32 umax = reg_u32_max(dst_reg);
13831 	s32 tmp_prod[4];
13832 
13833 	if (check_mul_overflow(umax, reg_u32_max(src_reg), &umax) ||
13834 	    check_mul_overflow(umin, reg_u32_min(src_reg), &umin)) {
13835 		/* Overflow possible, we know nothing */
13836 		umin = 0;
13837 		umax = U32_MAX;
13838 	}
13839 	if (check_mul_overflow(smin, reg_s32_min(src_reg), &tmp_prod[0]) ||
13840 	    check_mul_overflow(smin, reg_s32_max(src_reg), &tmp_prod[1]) ||
13841 	    check_mul_overflow(smax, reg_s32_min(src_reg), &tmp_prod[2]) ||
13842 	    check_mul_overflow(smax, reg_s32_max(src_reg), &tmp_prod[3])) {
13843 		/* Overflow possible, we know nothing */
13844 		smin = S32_MIN;
13845 		smax = S32_MAX;
13846 	} else {
13847 		smin = min_array(tmp_prod, 4);
13848 		smax = max_array(tmp_prod, 4);
13849 	}
13850 
13851 	dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax),
13852 					cnum32_from_srange(smin, smax));
13853 }
13854 
13855 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13856 			       struct bpf_reg_state *src_reg)
13857 {
13858 	s64 smin = reg_smin(dst_reg);
13859 	s64 smax = reg_smax(dst_reg);
13860 	u64 umin = reg_umin(dst_reg);
13861 	u64 umax = reg_umax(dst_reg);
13862 	s64 tmp_prod[4];
13863 
13864 	if (check_mul_overflow(umax, reg_umax(src_reg), &umax) ||
13865 	    check_mul_overflow(umin, reg_umin(src_reg), &umin)) {
13866 		/* Overflow possible, we know nothing */
13867 		umin = 0;
13868 		umax = U64_MAX;
13869 	}
13870 	if (check_mul_overflow(smin, reg_smin(src_reg), &tmp_prod[0]) ||
13871 	    check_mul_overflow(smin, reg_smax(src_reg), &tmp_prod[1]) ||
13872 	    check_mul_overflow(smax, reg_smin(src_reg), &tmp_prod[2]) ||
13873 	    check_mul_overflow(smax, reg_smax(src_reg), &tmp_prod[3])) {
13874 		/* Overflow possible, we know nothing */
13875 		smin = S64_MIN;
13876 		smax = S64_MAX;
13877 	} else {
13878 		smin = min_array(tmp_prod, 4);
13879 		smax = max_array(tmp_prod, 4);
13880 	}
13881 
13882 	dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax),
13883 					cnum64_from_srange(smin, smax));
13884 }
13885 
13886 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
13887 				  struct bpf_reg_state *src_reg)
13888 {
13889 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
13890 
13891 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) / src_val,
13892 			 reg_u32_max(dst_reg) / src_val);
13893 
13894 	/* Reset other ranges/tnum to unbounded/unknown. */
13895 	reset_reg64_and_tnum(dst_reg);
13896 }
13897 
13898 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
13899 				struct bpf_reg_state *src_reg)
13900 {
13901 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
13902 
13903 	reg_set_urange64(dst_reg, div64_u64(reg_umin(dst_reg), src_val),
13904 			 div64_u64(reg_umax(dst_reg), src_val));
13905 
13906 	/* Reset other ranges/tnum to unbounded/unknown. */
13907 	reset_reg32_and_tnum(dst_reg);
13908 }
13909 
13910 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
13911 				  struct bpf_reg_state *src_reg)
13912 {
13913 	s32 smin = reg_s32_min(dst_reg);
13914 	s32 smax = reg_s32_max(dst_reg);
13915 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
13916 	s32 res1, res2;
13917 
13918 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
13919 	if (smin == S32_MIN && src_val == -1) {
13920 		/*
13921 		 * If the dividend range contains more than just S32_MIN,
13922 		 * we cannot precisely track the result, so it becomes unbounded.
13923 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
13924 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
13925 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
13926 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
13927 		 */
13928 		if (smax != S32_MIN) {
13929 			smin = S32_MIN;
13930 			smax = S32_MAX;
13931 		}
13932 		goto reset;
13933 	}
13934 
13935 	res1 = smin / src_val;
13936 	res2 = smax / src_val;
13937 	smin = min(res1, res2);
13938 	smax = max(res1, res2);
13939 
13940 reset:
13941 	reg_set_srange32(dst_reg, smin, smax);
13942 	/* Reset other ranges/tnum to unbounded/unknown. */
13943 	reset_reg64_and_tnum(dst_reg);
13944 }
13945 
13946 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
13947 				struct bpf_reg_state *src_reg)
13948 {
13949 	s64 smin = reg_smin(dst_reg);
13950 	s64 smax = reg_smax(dst_reg);
13951 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
13952 	s64 res1, res2;
13953 
13954 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
13955 	if (smin == S64_MIN && src_val == -1) {
13956 		/*
13957 		 * If the dividend range contains more than just S64_MIN,
13958 		 * we cannot precisely track the result, so it becomes unbounded.
13959 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
13960 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
13961 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
13962 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
13963 		 */
13964 		if (smax != S64_MIN) {
13965 			smin = S64_MIN;
13966 			smax = S64_MAX;
13967 		}
13968 		goto reset;
13969 	}
13970 
13971 	res1 = div64_s64(smin, src_val);
13972 	res2 = div64_s64(smax, src_val);
13973 	smin = min(res1, res2);
13974 	smax = max(res1, res2);
13975 
13976 reset:
13977 	reg_set_srange64(dst_reg, smin, smax);
13978 	/* Reset other ranges/tnum to unbounded/unknown. */
13979 	reset_reg32_and_tnum(dst_reg);
13980 }
13981 
13982 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
13983 				  struct bpf_reg_state *src_reg)
13984 {
13985 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
13986 	u32 res_max = src_val - 1;
13987 
13988 	/*
13989 	 * If dst_umax <= res_max, the result remains unchanged.
13990 	 * e.g., [2, 5] % 10 = [2, 5].
13991 	 */
13992 	if (reg_u32_max(dst_reg) <= res_max)
13993 		return;
13994 
13995 	reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max));
13996 
13997 	/* Reset other ranges/tnum to unbounded/unknown. */
13998 	reset_reg64_and_tnum(dst_reg);
13999 }
14000 
14001 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14002 				struct bpf_reg_state *src_reg)
14003 {
14004 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
14005 	u64 res_max = src_val - 1;
14006 
14007 	/*
14008 	 * If dst_umax <= res_max, the result remains unchanged.
14009 	 * e.g., [2, 5] % 10 = [2, 5].
14010 	 */
14011 	if (reg_umax(dst_reg) <= res_max)
14012 		return;
14013 
14014 	reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max));
14015 
14016 	/* Reset other ranges/tnum to unbounded/unknown. */
14017 	reset_reg32_and_tnum(dst_reg);
14018 }
14019 
14020 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14021 				  struct bpf_reg_state *src_reg)
14022 {
14023 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
14024 
14025 	/*
14026 	 * Safe absolute value calculation:
14027 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14028 	 * Here use unsigned integer to avoid overflow.
14029 	 */
14030 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14031 
14032 	/*
14033 	 * Calculate the maximum possible absolute value of the result.
14034 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14035 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14036 	 */
14037 	s32 res_max_abs = src_abs - 1;
14038 
14039 	/*
14040 	 * If the dividend is already within the result range,
14041 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14042 	 */
14043 	if (reg_s32_min(dst_reg) >= -res_max_abs && reg_s32_max(dst_reg) <= res_max_abs)
14044 		return;
14045 
14046 	/* General case: result has the same sign as the dividend. */
14047 	if (reg_s32_min(dst_reg) >= 0) {
14048 		reg_set_srange32(dst_reg, 0, min(reg_s32_max(dst_reg), res_max_abs));
14049 	} else if (reg_s32_max(dst_reg) <= 0) {
14050 		reg_set_srange32(dst_reg, max(reg_s32_min(dst_reg), -res_max_abs), 0);
14051 	} else {
14052 		reg_set_srange32(dst_reg, -res_max_abs, res_max_abs);
14053 	}
14054 
14055 	/* Reset other ranges/tnum to unbounded/unknown. */
14056 	reset_reg64_and_tnum(dst_reg);
14057 }
14058 
14059 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14060 				struct bpf_reg_state *src_reg)
14061 {
14062 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
14063 
14064 	/*
14065 	 * Safe absolute value calculation:
14066 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14067 	 * Here use unsigned integer to avoid overflow.
14068 	 */
14069 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14070 
14071 	/*
14072 	 * Calculate the maximum possible absolute value of the result.
14073 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14074 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14075 	 */
14076 	s64 res_max_abs = src_abs - 1;
14077 
14078 	/*
14079 	 * If the dividend is already within the result range,
14080 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14081 	 */
14082 	if (reg_smin(dst_reg) >= -res_max_abs && reg_smax(dst_reg) <= res_max_abs)
14083 		return;
14084 
14085 	/* General case: result has the same sign as the dividend. */
14086 	if (reg_smin(dst_reg) >= 0) {
14087 		reg_set_srange64(dst_reg, 0, min(reg_smax(dst_reg), res_max_abs));
14088 	} else if (reg_smax(dst_reg) <= 0) {
14089 		reg_set_srange64(dst_reg, max(reg_smin(dst_reg), -res_max_abs), 0);
14090 	} else {
14091 		reg_set_srange64(dst_reg, -res_max_abs, res_max_abs);
14092 	}
14093 
14094 	/* Reset other ranges/tnum to unbounded/unknown. */
14095 	reset_reg32_and_tnum(dst_reg);
14096 }
14097 
14098 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14099 				 struct bpf_reg_state *src_reg)
14100 {
14101 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14102 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14103 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14104 	u32 umax_val = reg_u32_max(src_reg);
14105 
14106 	if (src_known && dst_known) {
14107 		__mark_reg32_known(dst_reg, var32_off.value);
14108 		return;
14109 	}
14110 
14111 	/* We get our minimum from the var_off, since that's inherently
14112 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14113 	 */
14114 	reg_set_urange32(dst_reg,
14115 			 var32_off.value,
14116 			 min(reg_u32_max(dst_reg), umax_val));
14117 }
14118 
14119 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14120 			       struct bpf_reg_state *src_reg)
14121 {
14122 	bool src_known = tnum_is_const(src_reg->var_off);
14123 	bool dst_known = tnum_is_const(dst_reg->var_off);
14124 	u64 umax_val = reg_umax(src_reg);
14125 
14126 	if (src_known && dst_known) {
14127 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14128 		return;
14129 	}
14130 
14131 	/* We get our minimum from the var_off, since that's inherently
14132 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14133 	 */
14134 	reg_set_urange64(dst_reg,
14135 			 dst_reg->var_off.value,
14136 			 min(reg_umax(dst_reg), umax_val));
14137 
14138 	/* We may learn something more from the var_off */
14139 	__update_reg_bounds(dst_reg);
14140 }
14141 
14142 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14143 				struct bpf_reg_state *src_reg)
14144 {
14145 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14146 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14147 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14148 	u32 umin_val = reg_u32_min(src_reg);
14149 
14150 	if (src_known && dst_known) {
14151 		__mark_reg32_known(dst_reg, var32_off.value);
14152 		return;
14153 	}
14154 
14155 	/* We get our maximum from the var_off, and our minimum is the
14156 	 * maximum of the operands' minima
14157 	 */
14158 	reg_set_urange32(dst_reg,
14159 			 max(reg_u32_min(dst_reg), umin_val),
14160 			 var32_off.value | var32_off.mask);
14161 }
14162 
14163 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14164 			      struct bpf_reg_state *src_reg)
14165 {
14166 	bool src_known = tnum_is_const(src_reg->var_off);
14167 	bool dst_known = tnum_is_const(dst_reg->var_off);
14168 	u64 umin_val = reg_umin(src_reg);
14169 
14170 	if (src_known && dst_known) {
14171 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14172 		return;
14173 	}
14174 
14175 	/* We get our maximum from the var_off, and our minimum is the
14176 	 * maximum of the operands' minima
14177 	 */
14178 	reg_set_urange64(dst_reg,
14179 			 max(reg_umin(dst_reg), umin_val),
14180 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14181 
14182 	/* We may learn something more from the var_off */
14183 	__update_reg_bounds(dst_reg);
14184 }
14185 
14186 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14187 				 struct bpf_reg_state *src_reg)
14188 {
14189 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14190 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14191 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14192 
14193 	if (src_known && dst_known) {
14194 		__mark_reg32_known(dst_reg, var32_off.value);
14195 		return;
14196 	}
14197 
14198 	/* We get both minimum and maximum from the var32_off. */
14199 	reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask);
14200 }
14201 
14202 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14203 			       struct bpf_reg_state *src_reg)
14204 {
14205 	bool src_known = tnum_is_const(src_reg->var_off);
14206 	bool dst_known = tnum_is_const(dst_reg->var_off);
14207 
14208 	if (src_known && dst_known) {
14209 		/* dst_reg->var_off.value has been updated earlier */
14210 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14211 		return;
14212 	}
14213 
14214 	/* We get both minimum and maximum from the var_off. */
14215 	reg_set_urange64(dst_reg,
14216 			 dst_reg->var_off.value,
14217 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14218 }
14219 
14220 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14221 				   u64 umin_val, u64 umax_val)
14222 {
14223 	/* If we might shift our top bit out, then we know nothing */
14224 	if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val))
14225 		reg_set_urange32(dst_reg, 0, U32_MAX);
14226 	else
14227 		/* We lose all sign bit information (except what we can pick
14228 		 * up from var_off)
14229 		 */
14230 		reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val,
14231 				 reg_u32_max(dst_reg) << umax_val);
14232 }
14233 
14234 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14235 				 struct bpf_reg_state *src_reg)
14236 {
14237 	u32 umax_val = reg_u32_max(src_reg);
14238 	u32 umin_val = reg_u32_min(src_reg);
14239 	/* u32 alu operation will zext upper bits */
14240 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14241 
14242 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14243 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14244 	/* Not required but being careful mark reg64 bounds as unknown so
14245 	 * that we are forced to pick them up from tnum and zext later and
14246 	 * if some path skips this step we are still safe.
14247 	 */
14248 	__mark_reg64_unbounded(dst_reg);
14249 	__update_reg32_bounds(dst_reg);
14250 }
14251 
14252 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14253 				   u64 umin_val, u64 umax_val)
14254 {
14255 	struct cnum64 u, s;
14256 
14257 	/* Special case <<32 because it is a common compiler pattern to sign
14258 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14259 	 * because s32 bounds don't flip sign when shifting to the left by
14260 	 * 32bits.
14261 	 */
14262 	if (umin_val == 32 && umax_val == 32)
14263 		s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32,
14264 				       (s64)reg_s32_max(dst_reg) << 32);
14265 	else
14266 		s = CNUM64_UNBOUNDED;
14267 
14268 	/* If we might shift our top bit out, then we know nothing */
14269 	if (reg_umax(dst_reg) > 1ULL << (63 - umax_val))
14270 		u = CNUM64_UNBOUNDED;
14271 	else
14272 		u = cnum64_from_urange(reg_umin(dst_reg) << umin_val,
14273 				       reg_umax(dst_reg) << umax_val);
14274 
14275 	dst_reg->r64 = cnum64_intersect(u, s);
14276 }
14277 
14278 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14279 			       struct bpf_reg_state *src_reg)
14280 {
14281 	u64 umax_val = reg_umax(src_reg);
14282 	u64 umin_val = reg_umin(src_reg);
14283 
14284 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14285 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14286 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14287 
14288 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14289 	/* We may learn something more from the var_off */
14290 	__update_reg_bounds(dst_reg);
14291 }
14292 
14293 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14294 				 struct bpf_reg_state *src_reg)
14295 {
14296 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14297 	u32 umax_val = reg_u32_max(src_reg);
14298 	u32 umin_val = reg_u32_min(src_reg);
14299 
14300 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14301 	 * be negative, then either:
14302 	 * 1) src_reg might be zero, so the sign bit of the result is
14303 	 *    unknown, so we lose our signed bounds
14304 	 * 2) it's known negative, thus the unsigned bounds capture the
14305 	 *    signed bounds
14306 	 * 3) the signed bounds cross zero, so they tell us nothing
14307 	 *    about the result
14308 	 * If the value in dst_reg is known nonnegative, then again the
14309 	 * unsigned bounds capture the signed bounds.
14310 	 * Thus, in all cases it suffices to blow away our signed bounds
14311 	 * and rely on inferring new ones from the unsigned bounds and
14312 	 * var_off of the result.
14313 	 */
14314 
14315 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14316 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val,
14317 			 reg_u32_max(dst_reg) >> umin_val);
14318 
14319 	__mark_reg64_unbounded(dst_reg);
14320 	__update_reg32_bounds(dst_reg);
14321 }
14322 
14323 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14324 			       struct bpf_reg_state *src_reg)
14325 {
14326 	u64 umax_val = reg_umax(src_reg);
14327 	u64 umin_val = reg_umin(src_reg);
14328 
14329 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14330 	 * be negative, then either:
14331 	 * 1) src_reg might be zero, so the sign bit of the result is
14332 	 *    unknown, so we lose our signed bounds
14333 	 * 2) it's known negative, thus the unsigned bounds capture the
14334 	 *    signed bounds
14335 	 * 3) the signed bounds cross zero, so they tell us nothing
14336 	 *    about the result
14337 	 * If the value in dst_reg is known nonnegative, then again the
14338 	 * unsigned bounds capture the signed bounds.
14339 	 * Thus, in all cases it suffices to blow away our signed bounds
14340 	 * and rely on inferring new ones from the unsigned bounds and
14341 	 * var_off of the result.
14342 	 */
14343 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14344 	reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val,
14345 			 reg_umax(dst_reg) >> umin_val);
14346 
14347 	/* Its not easy to operate on alu32 bounds here because it depends
14348 	 * on bits being shifted in. Take easy way out and mark unbounded
14349 	 * so we can recalculate later from tnum.
14350 	 */
14351 	__mark_reg32_unbounded(dst_reg);
14352 	__update_reg_bounds(dst_reg);
14353 }
14354 
14355 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14356 				  struct bpf_reg_state *src_reg)
14357 {
14358 	u64 umin_val = reg_u32_min(src_reg);
14359 
14360 	/* Upon reaching here, src_known is true and
14361 	 * umax_val is equal to umin_val.
14362 	 * Blow away the dst_reg umin_value/umax_value and rely on
14363 	 * dst_reg var_off to refine the result.
14364 	 */
14365 	reg_set_srange32(dst_reg,
14366 			 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val),
14367 			 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val));
14368 
14369 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14370 
14371 	__mark_reg64_unbounded(dst_reg);
14372 	__update_reg32_bounds(dst_reg);
14373 }
14374 
14375 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14376 				struct bpf_reg_state *src_reg)
14377 {
14378 	u64 umin_val = reg_umin(src_reg);
14379 
14380 	/* Upon reaching here, src_known is true and umax_val is equal
14381 	 * to umin_val.
14382 	 */
14383 	reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val,
14384 			 reg_smax(dst_reg) >> umin_val);
14385 
14386 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14387 
14388 	/* Its not easy to operate on alu32 bounds here because it depends
14389 	 * on bits being shifted in from upper 32-bits. Take easy way out
14390 	 * and mark unbounded so we can recalculate later from tnum.
14391 	 */
14392 	__mark_reg32_unbounded(dst_reg);
14393 	__update_reg_bounds(dst_reg);
14394 }
14395 
14396 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14397 {
14398 	/*
14399 	 * Byte swap operation - update var_off using tnum_bswap.
14400 	 * Three cases:
14401 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14402 	 *    unconditional swap
14403 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14404 	 *    swap on big-endian, truncation or no-op on little-endian
14405 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14406 	 *    swap on little-endian, truncation or no-op on big-endian
14407 	 */
14408 
14409 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14410 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14411 	bool is_big_endian;
14412 #ifdef CONFIG_CPU_BIG_ENDIAN
14413 	is_big_endian = true;
14414 #else
14415 	is_big_endian = false;
14416 #endif
14417 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14418 	bool need_bswap = alu64 || (to_le == is_big_endian);
14419 
14420 	/*
14421 	 * If the register is mutated, manually reset its scalar ID to break
14422 	 * any existing ties and avoid incorrect bounds propagation.
14423 	 */
14424 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14425 		clear_scalar_id(dst_reg);
14426 
14427 	if (need_bswap) {
14428 		if (insn->imm == 16)
14429 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14430 		else if (insn->imm == 32)
14431 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14432 		else if (insn->imm == 64)
14433 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14434 		/*
14435 		 * Byteswap scrambles the range, so we must reset bounds.
14436 		 * Bounds will be re-derived from the new tnum later.
14437 		 */
14438 		__mark_reg_unbounded(dst_reg);
14439 	}
14440 	/* For bswap16/32, truncate dst register to match the swapped size */
14441 	if (insn->imm == 16 || insn->imm == 32)
14442 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14443 }
14444 
14445 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14446 					     const struct bpf_reg_state *src_reg)
14447 {
14448 	bool src_is_const = false;
14449 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14450 
14451 	if (insn_bitness == 32) {
14452 		if (tnum_subreg_is_const(src_reg->var_off)
14453 		    && reg_s32_min(src_reg) == reg_s32_max(src_reg)
14454 		    && reg_u32_min(src_reg) == reg_u32_max(src_reg))
14455 			src_is_const = true;
14456 	} else {
14457 		if (tnum_is_const(src_reg->var_off)
14458 		    && reg_smin(src_reg) == reg_smax(src_reg)
14459 		    && reg_umin(src_reg) == reg_umax(src_reg))
14460 			src_is_const = true;
14461 	}
14462 
14463 	switch (BPF_OP(insn->code)) {
14464 	case BPF_ADD:
14465 	case BPF_SUB:
14466 	case BPF_NEG:
14467 	case BPF_AND:
14468 	case BPF_XOR:
14469 	case BPF_OR:
14470 	case BPF_MUL:
14471 	case BPF_END:
14472 		return true;
14473 
14474 	/*
14475 	 * Division and modulo operators range is only safe to compute when the
14476 	 * divisor is a constant.
14477 	 */
14478 	case BPF_DIV:
14479 	case BPF_MOD:
14480 		return src_is_const;
14481 
14482 	/* Shift operators range is only computable if shift dimension operand
14483 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14484 	 * includes shifts by a negative number.
14485 	 */
14486 	case BPF_LSH:
14487 	case BPF_RSH:
14488 	case BPF_ARSH:
14489 		return (src_is_const && reg_umax(src_reg) < insn_bitness);
14490 	default:
14491 		return false;
14492 	}
14493 }
14494 
14495 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14496 			      struct bpf_reg_state *dst_reg)
14497 {
14498 	struct bpf_verifier_state *branch;
14499 	struct bpf_reg_state *regs;
14500 	bool alu32;
14501 
14502 	if (reg_smin(dst_reg) == -1 && reg_smax(dst_reg) == 0)
14503 		alu32 = false;
14504 	else if (reg_s32_min(dst_reg) == -1 && reg_s32_max(dst_reg) == 0)
14505 		alu32 = true;
14506 	else
14507 		return 0;
14508 
14509 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14510 	if (IS_ERR(branch))
14511 		return PTR_ERR(branch);
14512 
14513 	regs = branch->frame[branch->curframe]->regs;
14514 	if (alu32) {
14515 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14516 		__mark_reg32_known(dst_reg, -1ull);
14517 	} else {
14518 		__mark_reg_known(&regs[insn->dst_reg], 0);
14519 		__mark_reg_known(dst_reg, -1ull);
14520 	}
14521 	return 0;
14522 }
14523 
14524 /* WARNING: This function does calculations on 64-bit values, but the actual
14525  * execution may occur on 32-bit values. Therefore, things like bitshifts
14526  * need extra checks in the 32-bit case.
14527  */
14528 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14529 				      struct bpf_insn *insn,
14530 				      struct bpf_reg_state *dst_reg,
14531 				      struct bpf_reg_state src_reg)
14532 {
14533 	u8 opcode = BPF_OP(insn->code);
14534 	s16 off = insn->off;
14535 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14536 	int ret;
14537 
14538 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14539 		__mark_reg_unknown(env, dst_reg);
14540 		return 0;
14541 	}
14542 
14543 	if (sanitize_needed(opcode)) {
14544 		ret = sanitize_val_alu(env, insn);
14545 		if (ret < 0)
14546 			return sanitize_err(env, insn, ret, NULL, NULL);
14547 	}
14548 
14549 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14550 	 * There are two classes of instructions: The first class we track both
14551 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14552 	 * greatest amount of precision when alu operations are mixed with jmp32
14553 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14554 	 * and BPF_OR. This is possible because these ops have fairly easy to
14555 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14556 	 * See alu32 verifier tests for examples. The second class of
14557 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14558 	 * with regards to tracking sign/unsigned bounds because the bits may
14559 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14560 	 * the reg unbounded in the subreg bound space and use the resulting
14561 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14562 	 */
14563 	switch (opcode) {
14564 	case BPF_ADD:
14565 		scalar32_min_max_add(dst_reg, &src_reg);
14566 		scalar_min_max_add(dst_reg, &src_reg);
14567 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14568 		break;
14569 	case BPF_SUB:
14570 		scalar32_min_max_sub(dst_reg, &src_reg);
14571 		scalar_min_max_sub(dst_reg, &src_reg);
14572 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14573 		break;
14574 	case BPF_NEG:
14575 		env->fake_reg[0] = *dst_reg;
14576 		__mark_reg_known(dst_reg, 0);
14577 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14578 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14579 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14580 		break;
14581 	case BPF_MUL:
14582 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14583 		scalar32_min_max_mul(dst_reg, &src_reg);
14584 		scalar_min_max_mul(dst_reg, &src_reg);
14585 		break;
14586 	case BPF_DIV:
14587 		/* BPF div specification: x / 0 = 0 */
14588 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) {
14589 			___mark_reg_known(dst_reg, 0);
14590 			break;
14591 		}
14592 		if (alu32)
14593 			if (off == 1)
14594 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14595 			else
14596 				scalar32_min_max_udiv(dst_reg, &src_reg);
14597 		else
14598 			if (off == 1)
14599 				scalar_min_max_sdiv(dst_reg, &src_reg);
14600 			else
14601 				scalar_min_max_udiv(dst_reg, &src_reg);
14602 		break;
14603 	case BPF_MOD:
14604 		/* BPF mod specification: x % 0 = x */
14605 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0))
14606 			break;
14607 		if (alu32)
14608 			if (off == 1)
14609 				scalar32_min_max_smod(dst_reg, &src_reg);
14610 			else
14611 				scalar32_min_max_umod(dst_reg, &src_reg);
14612 		else
14613 			if (off == 1)
14614 				scalar_min_max_smod(dst_reg, &src_reg);
14615 			else
14616 				scalar_min_max_umod(dst_reg, &src_reg);
14617 		break;
14618 	case BPF_AND:
14619 		if (tnum_is_const(src_reg.var_off)) {
14620 			ret = maybe_fork_scalars(env, insn, dst_reg);
14621 			if (ret)
14622 				return ret;
14623 		}
14624 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14625 		scalar32_min_max_and(dst_reg, &src_reg);
14626 		scalar_min_max_and(dst_reg, &src_reg);
14627 		break;
14628 	case BPF_OR:
14629 		if (tnum_is_const(src_reg.var_off)) {
14630 			ret = maybe_fork_scalars(env, insn, dst_reg);
14631 			if (ret)
14632 				return ret;
14633 		}
14634 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14635 		scalar32_min_max_or(dst_reg, &src_reg);
14636 		scalar_min_max_or(dst_reg, &src_reg);
14637 		break;
14638 	case BPF_XOR:
14639 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14640 		scalar32_min_max_xor(dst_reg, &src_reg);
14641 		scalar_min_max_xor(dst_reg, &src_reg);
14642 		break;
14643 	case BPF_LSH:
14644 		if (alu32)
14645 			scalar32_min_max_lsh(dst_reg, &src_reg);
14646 		else
14647 			scalar_min_max_lsh(dst_reg, &src_reg);
14648 		break;
14649 	case BPF_RSH:
14650 		if (alu32)
14651 			scalar32_min_max_rsh(dst_reg, &src_reg);
14652 		else
14653 			scalar_min_max_rsh(dst_reg, &src_reg);
14654 		break;
14655 	case BPF_ARSH:
14656 		if (alu32)
14657 			scalar32_min_max_arsh(dst_reg, &src_reg);
14658 		else
14659 			scalar_min_max_arsh(dst_reg, &src_reg);
14660 		break;
14661 	case BPF_END:
14662 		scalar_byte_swap(dst_reg, insn);
14663 		break;
14664 	default:
14665 		break;
14666 	}
14667 
14668 	/*
14669 	 * ALU32 ops are zero extended into 64bit register.
14670 	 *
14671 	 * BPF_END is already handled inside the helper (truncation),
14672 	 * so skip zext here to avoid unexpected zero extension.
14673 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
14674 	 * This is a 64bit byte swap operation with alu32==true,
14675 	 * but we should not zero extend the result.
14676 	 */
14677 	if (alu32 && opcode != BPF_END)
14678 		zext_32_to_64(dst_reg);
14679 	reg_bounds_sync(dst_reg);
14680 	return 0;
14681 }
14682 
14683 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14684  * and var_off.
14685  */
14686 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14687 				   struct bpf_insn *insn)
14688 {
14689 	struct bpf_verifier_state *vstate = env->cur_state;
14690 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14691 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14692 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14693 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14694 	u8 opcode = BPF_OP(insn->code);
14695 	int err;
14696 
14697 	dst_reg = &regs[insn->dst_reg];
14698 	if (BPF_SRC(insn->code) == BPF_X)
14699 		src_reg = &regs[insn->src_reg];
14700 	else
14701 		src_reg = NULL;
14702 
14703 	/* Case where at least one operand is an arena. */
14704 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
14705 		struct bpf_insn_aux_data *aux = cur_aux(env);
14706 
14707 		if (dst_reg->type != PTR_TO_ARENA)
14708 			*dst_reg = *src_reg;
14709 
14710 		dst_reg->subreg_def = env->insn_idx + 1;
14711 
14712 		if (BPF_CLASS(insn->code) == BPF_ALU64)
14713 			/*
14714 			 * 32-bit operations zero upper bits automatically.
14715 			 * 64-bit operations need to be converted to 32.
14716 			 */
14717 			aux->needs_zext = true;
14718 
14719 		/* Any arithmetic operations are allowed on arena pointers */
14720 		return 0;
14721 	}
14722 
14723 	if (dst_reg->type != SCALAR_VALUE)
14724 		ptr_reg = dst_reg;
14725 
14726 	if (BPF_SRC(insn->code) == BPF_X) {
14727 		if (src_reg->type != SCALAR_VALUE) {
14728 			if (dst_reg->type != SCALAR_VALUE) {
14729 				/* Combining two pointers by any ALU op yields
14730 				 * an arbitrary scalar. Disallow all math except
14731 				 * pointer subtraction
14732 				 */
14733 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14734 					mark_reg_unknown(env, regs, insn->dst_reg);
14735 					return 0;
14736 				}
14737 				verbose(env, "R%d pointer %s pointer prohibited\n",
14738 					insn->dst_reg,
14739 					bpf_alu_string[opcode >> 4]);
14740 				return -EACCES;
14741 			} else {
14742 				/* scalar += pointer
14743 				 * This is legal, but we have to reverse our
14744 				 * src/dest handling in computing the range
14745 				 */
14746 				err = mark_chain_precision(env, insn->dst_reg);
14747 				if (err)
14748 					return err;
14749 				return adjust_ptr_min_max_vals(env, insn,
14750 							       src_reg, dst_reg);
14751 			}
14752 		} else if (ptr_reg) {
14753 			/* pointer += scalar */
14754 			err = mark_chain_precision(env, insn->src_reg);
14755 			if (err)
14756 				return err;
14757 			return adjust_ptr_min_max_vals(env, insn,
14758 						       dst_reg, src_reg);
14759 		} else if (dst_reg->precise) {
14760 			/* if dst_reg is precise, src_reg should be precise as well */
14761 			err = mark_chain_precision(env, insn->src_reg);
14762 			if (err)
14763 				return err;
14764 		}
14765 	} else {
14766 		/* Pretend the src is a reg with a known value, since we only
14767 		 * need to be able to read from this state.
14768 		 */
14769 		off_reg.type = SCALAR_VALUE;
14770 		__mark_reg_known(&off_reg, insn->imm);
14771 		src_reg = &off_reg;
14772 		if (ptr_reg) /* pointer += K */
14773 			return adjust_ptr_min_max_vals(env, insn,
14774 						       ptr_reg, src_reg);
14775 	}
14776 
14777 	/* Got here implies adding two SCALAR_VALUEs */
14778 	if (WARN_ON_ONCE(ptr_reg)) {
14779 		print_verifier_state(env, vstate, vstate->curframe, true);
14780 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
14781 		return -EFAULT;
14782 	}
14783 	if (WARN_ON(!src_reg)) {
14784 		print_verifier_state(env, vstate, vstate->curframe, true);
14785 		verbose(env, "verifier internal error: no src_reg\n");
14786 		return -EFAULT;
14787 	}
14788 	/*
14789 	 * For alu32 linked register tracking, we need to check dst_reg's
14790 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
14791 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
14792 	 */
14793 	u64 dst_umax = reg_umax(dst_reg);
14794 
14795 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14796 	if (err)
14797 		return err;
14798 	/*
14799 	 * Compilers can generate the code
14800 	 * r1 = r2
14801 	 * r1 += 0x1
14802 	 * if r2 < 1000 goto ...
14803 	 * use r1 in memory access
14804 	 * So remember constant delta between r2 and r1 and update r1 after
14805 	 * 'if' condition.
14806 	 */
14807 	if (env->bpf_capable &&
14808 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
14809 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
14810 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
14811 		u64 val = reg_const_value(src_reg, alu32);
14812 		s32 off;
14813 
14814 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
14815 			goto clear_id;
14816 
14817 		if (alu32 && (dst_umax > U32_MAX))
14818 			goto clear_id;
14819 
14820 		off = (s32)val;
14821 
14822 		if (BPF_OP(insn->code) == BPF_SUB) {
14823 			/* Negating S32_MIN would overflow */
14824 			if (off == S32_MIN)
14825 				goto clear_id;
14826 			off = -off;
14827 		}
14828 
14829 		if (dst_reg->id & BPF_ADD_CONST) {
14830 			/*
14831 			 * If the register already went through rX += val
14832 			 * we cannot accumulate another val into rx->off.
14833 			 */
14834 clear_id:
14835 			clear_scalar_id(dst_reg);
14836 		} else {
14837 			if (alu32)
14838 				dst_reg->id |= BPF_ADD_CONST32;
14839 			else
14840 				dst_reg->id |= BPF_ADD_CONST64;
14841 			dst_reg->delta = off;
14842 		}
14843 	} else {
14844 		/*
14845 		 * Make sure ID is cleared otherwise dst_reg min/max could be
14846 		 * incorrectly propagated into other registers by sync_linked_regs()
14847 		 */
14848 		clear_scalar_id(dst_reg);
14849 	}
14850 	return 0;
14851 }
14852 
14853 /* check validity of 32-bit and 64-bit arithmetic operations */
14854 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14855 {
14856 	struct bpf_reg_state *regs = cur_regs(env);
14857 	u8 opcode = BPF_OP(insn->code);
14858 	int err;
14859 
14860 	if (opcode == BPF_END || opcode == BPF_NEG) {
14861 		/* check src operand */
14862 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14863 		if (err)
14864 			return err;
14865 
14866 		if (is_pointer_value(env, insn->dst_reg)) {
14867 			verbose(env, "R%d pointer arithmetic prohibited\n",
14868 				insn->dst_reg);
14869 			return -EACCES;
14870 		}
14871 
14872 		/* check dest operand */
14873 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
14874 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14875 			err = err ?: adjust_scalar_min_max_vals(env, insn,
14876 							 &regs[insn->dst_reg],
14877 							 regs[insn->dst_reg]);
14878 		} else {
14879 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
14880 		}
14881 		if (err)
14882 			return err;
14883 
14884 	} else if (opcode == BPF_MOV) {
14885 
14886 		if (BPF_SRC(insn->code) == BPF_X) {
14887 			if (insn->off == BPF_ADDR_SPACE_CAST) {
14888 				if (!env->prog->aux->arena) {
14889 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14890 					return -EINVAL;
14891 				}
14892 			}
14893 
14894 			/* check src operand */
14895 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
14896 			if (err)
14897 				return err;
14898 		}
14899 
14900 		/* check dest operand, mark as required later */
14901 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14902 		if (err)
14903 			return err;
14904 
14905 		if (BPF_SRC(insn->code) == BPF_X) {
14906 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
14907 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14908 
14909 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14910 				if (insn->imm) {
14911 					/* off == BPF_ADDR_SPACE_CAST */
14912 					mark_reg_unknown(env, regs, insn->dst_reg);
14913 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
14914 						dst_reg->type = PTR_TO_ARENA;
14915 						/* PTR_TO_ARENA is 32-bit */
14916 						dst_reg->subreg_def = env->insn_idx + 1;
14917 					}
14918 				} else if (insn->off == 0) {
14919 					/* case: R1 = R2
14920 					 * copy register state to dest reg
14921 					 */
14922 					assign_scalar_id_before_mov(env, src_reg);
14923 					*dst_reg = *src_reg;
14924 					dst_reg->subreg_def = DEF_NOT_SUBREG;
14925 				} else {
14926 					/* case: R1 = (s8, s16 s32)R2 */
14927 					if (is_pointer_value(env, insn->src_reg)) {
14928 						verbose(env,
14929 							"R%d sign-extension part of pointer\n",
14930 							insn->src_reg);
14931 						return -EACCES;
14932 					} else if (src_reg->type == SCALAR_VALUE) {
14933 						bool no_sext;
14934 
14935 						no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14936 						if (no_sext)
14937 							assign_scalar_id_before_mov(env, src_reg);
14938 						*dst_reg = *src_reg;
14939 						if (!no_sext)
14940 							clear_scalar_id(dst_reg);
14941 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
14942 						dst_reg->subreg_def = DEF_NOT_SUBREG;
14943 					} else {
14944 						mark_reg_unknown(env, regs, insn->dst_reg);
14945 					}
14946 				}
14947 			} else {
14948 				/* R1 = (u32) R2 */
14949 				if (is_pointer_value(env, insn->src_reg)) {
14950 					verbose(env,
14951 						"R%d partial copy of pointer\n",
14952 						insn->src_reg);
14953 					return -EACCES;
14954 				} else if (src_reg->type == SCALAR_VALUE) {
14955 					if (insn->off == 0) {
14956 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
14957 
14958 						if (is_src_reg_u32)
14959 							assign_scalar_id_before_mov(env, src_reg);
14960 						*dst_reg = *src_reg;
14961 						/* Make sure ID is cleared if src_reg is not in u32
14962 						 * range otherwise dst_reg min/max could be incorrectly
14963 						 * propagated into src_reg by sync_linked_regs()
14964 						 */
14965 						if (!is_src_reg_u32)
14966 							clear_scalar_id(dst_reg);
14967 						dst_reg->subreg_def = env->insn_idx + 1;
14968 					} else {
14969 						/* case: W1 = (s8, s16)W2 */
14970 						bool no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14971 
14972 						if (no_sext)
14973 							assign_scalar_id_before_mov(env, src_reg);
14974 						*dst_reg = *src_reg;
14975 						if (!no_sext)
14976 							clear_scalar_id(dst_reg);
14977 						dst_reg->subreg_def = env->insn_idx + 1;
14978 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
14979 					}
14980 				} else {
14981 					mark_reg_unknown(env, regs,
14982 							 insn->dst_reg);
14983 				}
14984 				zext_32_to_64(dst_reg);
14985 				reg_bounds_sync(dst_reg);
14986 			}
14987 		} else {
14988 			/* case: R = imm
14989 			 * remember the value we stored into this reg
14990 			 */
14991 			/* clear any state __mark_reg_known doesn't set */
14992 			mark_reg_unknown(env, regs, insn->dst_reg);
14993 			regs[insn->dst_reg].type = SCALAR_VALUE;
14994 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14995 				__mark_reg_known(regs + insn->dst_reg,
14996 						 insn->imm);
14997 			} else {
14998 				__mark_reg_known(regs + insn->dst_reg,
14999 						 (u32)insn->imm);
15000 			}
15001 		}
15002 
15003 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15004 
15005 		if (BPF_SRC(insn->code) == BPF_X) {
15006 			/* check src1 operand */
15007 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15008 			if (err)
15009 				return err;
15010 		}
15011 
15012 		/* check src2 operand */
15013 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15014 		if (err)
15015 			return err;
15016 
15017 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15018 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15019 			verbose(env, "div by zero\n");
15020 			return -EINVAL;
15021 		}
15022 
15023 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15024 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15025 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15026 
15027 			if (insn->imm < 0 || insn->imm >= size) {
15028 				verbose(env, "invalid shift %d\n", insn->imm);
15029 				return -EINVAL;
15030 			}
15031 		}
15032 
15033 		/* check dest operand */
15034 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15035 		err = err ?: adjust_reg_min_max_vals(env, insn);
15036 		if (err)
15037 			return err;
15038 	}
15039 
15040 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15041 }
15042 
15043 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15044 				   struct bpf_reg_state *dst_reg,
15045 				   enum bpf_reg_type type,
15046 				   bool range_right_open)
15047 {
15048 	struct bpf_func_state *state;
15049 	struct bpf_reg_state *reg;
15050 	int new_range;
15051 
15052 	if (reg_umax(dst_reg) == 0 && range_right_open)
15053 		/* This doesn't give us any range */
15054 		return;
15055 
15056 	if (reg_umax(dst_reg) > MAX_PACKET_OFF)
15057 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15058 		 * than pkt_end, but that's because it's also less than pkt.
15059 		 */
15060 		return;
15061 
15062 	new_range = reg_umax(dst_reg);
15063 	if (range_right_open)
15064 		new_range++;
15065 
15066 	/* Examples for register markings:
15067 	 *
15068 	 * pkt_data in dst register:
15069 	 *
15070 	 *   r2 = r3;
15071 	 *   r2 += 8;
15072 	 *   if (r2 > pkt_end) goto <handle exception>
15073 	 *   <access okay>
15074 	 *
15075 	 *   r2 = r3;
15076 	 *   r2 += 8;
15077 	 *   if (r2 < pkt_end) goto <access okay>
15078 	 *   <handle exception>
15079 	 *
15080 	 *   Where:
15081 	 *     r2 == dst_reg, pkt_end == src_reg
15082 	 *     r2=pkt(id=n,off=8,r=0)
15083 	 *     r3=pkt(id=n,off=0,r=0)
15084 	 *
15085 	 * pkt_data in src register:
15086 	 *
15087 	 *   r2 = r3;
15088 	 *   r2 += 8;
15089 	 *   if (pkt_end >= r2) goto <access okay>
15090 	 *   <handle exception>
15091 	 *
15092 	 *   r2 = r3;
15093 	 *   r2 += 8;
15094 	 *   if (pkt_end <= r2) goto <handle exception>
15095 	 *   <access okay>
15096 	 *
15097 	 *   Where:
15098 	 *     pkt_end == dst_reg, r2 == src_reg
15099 	 *     r2=pkt(id=n,off=8,r=0)
15100 	 *     r3=pkt(id=n,off=0,r=0)
15101 	 *
15102 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15103 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15104 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15105 	 * the check.
15106 	 */
15107 
15108 	/* If our ids match, then we must have the same max_value.  And we
15109 	 * don't care about the other reg's fixed offset, since if it's too big
15110 	 * the range won't allow anything.
15111 	 * reg_umax(dst_reg) is known < MAX_PACKET_OFF, therefore it fits in a u16.
15112 	 */
15113 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15114 		if (reg->type == type && reg->id == dst_reg->id)
15115 			/* keep the maximum range already checked */
15116 			reg->range = max(reg->range, new_range);
15117 	}));
15118 }
15119 
15120 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15121 				u8 opcode, bool is_jmp32);
15122 static u8 rev_opcode(u8 opcode);
15123 
15124 /*
15125  * Learn more information about live branches by simulating refinement on both branches.
15126  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15127  * that branch is dead.
15128  */
15129 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15130 {
15131 	/* Fallthrough (FALSE) branch */
15132 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15133 	reg_bounds_sync(&env->false_reg1);
15134 	reg_bounds_sync(&env->false_reg2);
15135 	/*
15136 	 * If there is a range bounds violation in *any* of the abstract values in either
15137 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15138 	 * TRUE branch will be taken.
15139 	 */
15140 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15141 		return 1;
15142 
15143 	/* Jump (TRUE) branch */
15144 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15145 	reg_bounds_sync(&env->true_reg1);
15146 	reg_bounds_sync(&env->true_reg2);
15147 	/*
15148 	 * If there is a range bounds violation in *any* of the abstract values in either
15149 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15150 	 * Only FALSE branch will be taken.
15151 	 */
15152 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15153 		return 0;
15154 
15155 	/* Both branches are possible, we can't determine which one will be taken. */
15156 	return -1;
15157 }
15158 
15159 /*
15160  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15161  */
15162 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15163 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15164 {
15165 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15166 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15167 	u64 umin1 = is_jmp32 ? (u64)reg_u32_min(reg1) : reg_umin(reg1);
15168 	u64 umax1 = is_jmp32 ? (u64)reg_u32_max(reg1) : reg_umax(reg1);
15169 	s64 smin1 = is_jmp32 ? (s64)reg_s32_min(reg1) : reg_smin(reg1);
15170 	s64 smax1 = is_jmp32 ? (s64)reg_s32_max(reg1) : reg_smax(reg1);
15171 	u64 umin2 = is_jmp32 ? (u64)reg_u32_min(reg2) : reg_umin(reg2);
15172 	u64 umax2 = is_jmp32 ? (u64)reg_u32_max(reg2) : reg_umax(reg2);
15173 	s64 smin2 = is_jmp32 ? (s64)reg_s32_min(reg2) : reg_smin(reg2);
15174 	s64 smax2 = is_jmp32 ? (s64)reg_s32_max(reg2) : reg_smax(reg2);
15175 
15176 	if (reg1 == reg2) {
15177 		switch (opcode) {
15178 		case BPF_JGE:
15179 		case BPF_JLE:
15180 		case BPF_JSGE:
15181 		case BPF_JSLE:
15182 		case BPF_JEQ:
15183 			return 1;
15184 		case BPF_JGT:
15185 		case BPF_JLT:
15186 		case BPF_JSGT:
15187 		case BPF_JSLT:
15188 		case BPF_JNE:
15189 			return 0;
15190 		case BPF_JSET:
15191 			if (tnum_is_const(t1))
15192 				return t1.value != 0;
15193 			else
15194 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15195 		default:
15196 			return -1;
15197 		}
15198 	}
15199 
15200 	switch (opcode) {
15201 	case BPF_JEQ:
15202 		/* constants, umin/umax and smin/smax checks would be
15203 		 * redundant in this case because they all should match
15204 		 */
15205 		if (tnum_is_const(t1) && tnum_is_const(t2))
15206 			return t1.value == t2.value;
15207 		if (!tnum_overlap(t1, t2))
15208 			return 0;
15209 		/* non-overlapping ranges */
15210 		if (umin1 > umax2 || umax1 < umin2)
15211 			return 0;
15212 		if (smin1 > smax2 || smax1 < smin2)
15213 			return 0;
15214 		if (!is_jmp32) {
15215 			/* if 64-bit ranges are inconclusive, see if we can
15216 			 * utilize 32-bit subrange knowledge to eliminate
15217 			 * branches that can't be taken a priori
15218 			 */
15219 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15220 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15221 				return 0;
15222 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15223 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15224 				return 0;
15225 		}
15226 		break;
15227 	case BPF_JNE:
15228 		/* constants, umin/umax and smin/smax checks would be
15229 		 * redundant in this case because they all should match
15230 		 */
15231 		if (tnum_is_const(t1) && tnum_is_const(t2))
15232 			return t1.value != t2.value;
15233 		if (!tnum_overlap(t1, t2))
15234 			return 1;
15235 		/* non-overlapping ranges */
15236 		if (umin1 > umax2 || umax1 < umin2)
15237 			return 1;
15238 		if (smin1 > smax2 || smax1 < smin2)
15239 			return 1;
15240 		if (!is_jmp32) {
15241 			/* if 64-bit ranges are inconclusive, see if we can
15242 			 * utilize 32-bit subrange knowledge to eliminate
15243 			 * branches that can't be taken a priori
15244 			 */
15245 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15246 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15247 				return 1;
15248 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15249 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15250 				return 1;
15251 		}
15252 		break;
15253 	case BPF_JSET:
15254 		if (!is_reg_const(reg2, is_jmp32)) {
15255 			swap(reg1, reg2);
15256 			swap(t1, t2);
15257 		}
15258 		if (!is_reg_const(reg2, is_jmp32))
15259 			return -1;
15260 		if ((~t1.mask & t1.value) & t2.value)
15261 			return 1;
15262 		if (!((t1.mask | t1.value) & t2.value))
15263 			return 0;
15264 		break;
15265 	case BPF_JGT:
15266 		if (umin1 > umax2)
15267 			return 1;
15268 		else if (umax1 <= umin2)
15269 			return 0;
15270 		break;
15271 	case BPF_JSGT:
15272 		if (smin1 > smax2)
15273 			return 1;
15274 		else if (smax1 <= smin2)
15275 			return 0;
15276 		break;
15277 	case BPF_JLT:
15278 		if (umax1 < umin2)
15279 			return 1;
15280 		else if (umin1 >= umax2)
15281 			return 0;
15282 		break;
15283 	case BPF_JSLT:
15284 		if (smax1 < smin2)
15285 			return 1;
15286 		else if (smin1 >= smax2)
15287 			return 0;
15288 		break;
15289 	case BPF_JGE:
15290 		if (umin1 >= umax2)
15291 			return 1;
15292 		else if (umax1 < umin2)
15293 			return 0;
15294 		break;
15295 	case BPF_JSGE:
15296 		if (smin1 >= smax2)
15297 			return 1;
15298 		else if (smax1 < smin2)
15299 			return 0;
15300 		break;
15301 	case BPF_JLE:
15302 		if (umax1 <= umin2)
15303 			return 1;
15304 		else if (umin1 > umax2)
15305 			return 0;
15306 		break;
15307 	case BPF_JSLE:
15308 		if (smax1 <= smin2)
15309 			return 1;
15310 		else if (smin1 > smax2)
15311 			return 0;
15312 		break;
15313 	}
15314 
15315 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15316 }
15317 
15318 static int flip_opcode(u32 opcode)
15319 {
15320 	/* How can we transform "a <op> b" into "b <op> a"? */
15321 	static const u8 opcode_flip[16] = {
15322 		/* these stay the same */
15323 		[BPF_JEQ  >> 4] = BPF_JEQ,
15324 		[BPF_JNE  >> 4] = BPF_JNE,
15325 		[BPF_JSET >> 4] = BPF_JSET,
15326 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15327 		[BPF_JGE  >> 4] = BPF_JLE,
15328 		[BPF_JGT  >> 4] = BPF_JLT,
15329 		[BPF_JLE  >> 4] = BPF_JGE,
15330 		[BPF_JLT  >> 4] = BPF_JGT,
15331 		[BPF_JSGE >> 4] = BPF_JSLE,
15332 		[BPF_JSGT >> 4] = BPF_JSLT,
15333 		[BPF_JSLE >> 4] = BPF_JSGE,
15334 		[BPF_JSLT >> 4] = BPF_JSGT
15335 	};
15336 	return opcode_flip[opcode >> 4];
15337 }
15338 
15339 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15340 				   struct bpf_reg_state *src_reg,
15341 				   u8 opcode)
15342 {
15343 	struct bpf_reg_state *pkt;
15344 
15345 	if (src_reg->type == PTR_TO_PACKET_END) {
15346 		pkt = dst_reg;
15347 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15348 		pkt = src_reg;
15349 		opcode = flip_opcode(opcode);
15350 	} else {
15351 		return -1;
15352 	}
15353 
15354 	if (pkt->range >= 0)
15355 		return -1;
15356 
15357 	switch (opcode) {
15358 	case BPF_JLE:
15359 		/* pkt <= pkt_end */
15360 		fallthrough;
15361 	case BPF_JGT:
15362 		/* pkt > pkt_end */
15363 		if (pkt->range == BEYOND_PKT_END)
15364 			/* pkt has at last one extra byte beyond pkt_end */
15365 			return opcode == BPF_JGT;
15366 		break;
15367 	case BPF_JLT:
15368 		/* pkt < pkt_end */
15369 		fallthrough;
15370 	case BPF_JGE:
15371 		/* pkt >= pkt_end */
15372 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15373 			return opcode == BPF_JGE;
15374 		break;
15375 	}
15376 	return -1;
15377 }
15378 
15379 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15380  * and return:
15381  *  1 - branch will be taken and "goto target" will be executed
15382  *  0 - branch will not be taken and fall-through to next insn
15383  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15384  *      range [0,10]
15385  */
15386 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15387 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15388 {
15389 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15390 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15391 
15392 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15393 		u64 val;
15394 
15395 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15396 		if (!is_reg_const(reg2, is_jmp32)) {
15397 			opcode = flip_opcode(opcode);
15398 			swap(reg1, reg2);
15399 		}
15400 		/* and ensure that reg2 is a constant */
15401 		if (!is_reg_const(reg2, is_jmp32))
15402 			return -1;
15403 
15404 		if (!reg_not_null(env, reg1))
15405 			return -1;
15406 
15407 		/* If pointer is valid tests against zero will fail so we can
15408 		 * use this to direct branch taken.
15409 		 */
15410 		val = reg_const_value(reg2, is_jmp32);
15411 		if (val != 0)
15412 			return -1;
15413 
15414 		switch (opcode) {
15415 		case BPF_JEQ:
15416 			return 0;
15417 		case BPF_JNE:
15418 			return 1;
15419 		default:
15420 			return -1;
15421 		}
15422 	}
15423 
15424 	/* now deal with two scalars, but not necessarily constants */
15425 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15426 }
15427 
15428 /* Opcode that corresponds to a *false* branch condition.
15429  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15430  */
15431 static u8 rev_opcode(u8 opcode)
15432 {
15433 	switch (opcode) {
15434 	case BPF_JEQ:		return BPF_JNE;
15435 	case BPF_JNE:		return BPF_JEQ;
15436 	/* JSET doesn't have it's reverse opcode in BPF, so add
15437 	 * BPF_X flag to denote the reverse of that operation
15438 	 */
15439 	case BPF_JSET:		return BPF_JSET | BPF_X;
15440 	case BPF_JSET | BPF_X:	return BPF_JSET;
15441 	case BPF_JGE:		return BPF_JLT;
15442 	case BPF_JGT:		return BPF_JLE;
15443 	case BPF_JLE:		return BPF_JGT;
15444 	case BPF_JLT:		return BPF_JGE;
15445 	case BPF_JSGE:		return BPF_JSLT;
15446 	case BPF_JSGT:		return BPF_JSLE;
15447 	case BPF_JSLE:		return BPF_JSGT;
15448 	case BPF_JSLT:		return BPF_JSGE;
15449 	default:		return 0;
15450 	}
15451 }
15452 
15453 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
15454 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15455 				u8 opcode, bool is_jmp32)
15456 {
15457 	struct tnum t;
15458 	u64 val;
15459 
15460 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15461 	switch (opcode) {
15462 	case BPF_JGE:
15463 	case BPF_JGT:
15464 	case BPF_JSGE:
15465 	case BPF_JSGT:
15466 		opcode = flip_opcode(opcode);
15467 		swap(reg1, reg2);
15468 		break;
15469 	default:
15470 		break;
15471 	}
15472 
15473 	switch (opcode) {
15474 	case BPF_JEQ:
15475 		if (is_jmp32) {
15476 			reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32);
15477 			reg2->r32 = reg1->r32;
15478 
15479 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15480 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15481 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15482 		} else {
15483 			reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64);
15484 			reg2->r64 = reg1->r64;
15485 
15486 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15487 			reg2->var_off = reg1->var_off;
15488 		}
15489 		break;
15490 	case BPF_JNE:
15491 		if (!is_reg_const(reg2, is_jmp32))
15492 			swap(reg1, reg2);
15493 		if (!is_reg_const(reg2, is_jmp32))
15494 			break;
15495 
15496 		/* try to recompute the bound of reg1 if reg2 is a const and
15497 		 * is exactly the edge of reg1.
15498 		 */
15499 		val = reg_const_value(reg2, is_jmp32);
15500 		if (is_jmp32) {
15501 			/* Complement of the range [val, val] as cnum32. */
15502 			cnum32_intersect_with(&reg1->r32, (struct cnum32){ val + 1, U32_MAX - 1 });
15503 		} else {
15504 			/* Complement of the range [val, val] as cnum64. */
15505 			cnum64_intersect_with(&reg1->r64, (struct cnum64){ val + 1, U64_MAX - 1 });
15506 		}
15507 		break;
15508 	case BPF_JSET:
15509 		if (!is_reg_const(reg2, is_jmp32))
15510 			swap(reg1, reg2);
15511 		if (!is_reg_const(reg2, is_jmp32))
15512 			break;
15513 		val = reg_const_value(reg2, is_jmp32);
15514 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15515 		 * requires single bit to learn something useful. E.g., if we
15516 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15517 		 * are actually set? We can learn something definite only if
15518 		 * it's a single-bit value to begin with.
15519 		 *
15520 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15521 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15522 		 * bit 1 is set, which we can readily use in adjustments.
15523 		 */
15524 		if (!is_power_of_2(val))
15525 			break;
15526 		if (is_jmp32) {
15527 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15528 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15529 		} else {
15530 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15531 		}
15532 		break;
15533 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15534 		if (!is_reg_const(reg2, is_jmp32))
15535 			swap(reg1, reg2);
15536 		if (!is_reg_const(reg2, is_jmp32))
15537 			break;
15538 		val = reg_const_value(reg2, is_jmp32);
15539 		/* Forget the ranges before narrowing tnums, to avoid invariant
15540 		 * violations if we're on a dead branch.
15541 		 */
15542 		__mark_reg_unbounded(reg1);
15543 		if (is_jmp32) {
15544 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15545 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15546 		} else {
15547 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15548 		}
15549 		break;
15550 	case BPF_JLE:
15551 		if (is_jmp32) {
15552 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2));
15553 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1), U32_MAX);
15554 		} else {
15555 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2));
15556 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1), U64_MAX);
15557 		}
15558 		break;
15559 	case BPF_JLT:
15560 		if (is_jmp32) {
15561 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2) - 1);
15562 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1) + 1, U32_MAX);
15563 		} else {
15564 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2) - 1);
15565 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1) + 1, U64_MAX);
15566 		}
15567 		break;
15568 	case BPF_JSLE:
15569 		if (is_jmp32) {
15570 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2));
15571 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1), S32_MAX);
15572 		} else {
15573 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2));
15574 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1), S64_MAX);
15575 		}
15576 		break;
15577 	case BPF_JSLT:
15578 		if (is_jmp32) {
15579 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2) - 1);
15580 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1) + 1, S32_MAX);
15581 		} else {
15582 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2) - 1);
15583 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1) + 1, S64_MAX);
15584 		}
15585 		break;
15586 	default:
15587 		return;
15588 	}
15589 }
15590 
15591 /* Check for invariant violations on the registers for both branches of a condition */
15592 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15593 {
15594 	int err;
15595 
15596 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15597 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15598 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15599 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15600 	return err;
15601 }
15602 
15603 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15604 				 struct bpf_reg_state *reg, u32 id,
15605 				 bool is_null)
15606 {
15607 	if (type_may_be_null(reg->type) && reg->id == id &&
15608 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15609 		/* Old offset should have been known-zero, because we don't
15610 		 * allow pointer arithmetic on pointers that might be NULL.
15611 		 * If we see this happening, don't convert the register.
15612 		 *
15613 		 * But in some cases, some helpers that return local kptrs
15614 		 * advance offset for the returned pointer. In those cases,
15615 		 * it is fine to expect to see reg->var_off.
15616 		 */
15617 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
15618 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
15619 			return;
15620 		if (is_null) {
15621 			/* We don't need id from this point
15622 			 * onwards anymore, thus we should better reset it,
15623 			 * so that state pruning has chances to take effect.
15624 			 */
15625 			__mark_reg_known_zero(reg);
15626 			reg->type = SCALAR_VALUE;
15627 
15628 			return;
15629 		}
15630 
15631 		mark_ptr_not_null_reg(reg);
15632 
15633 		/*
15634 		 * reg->id is preserved for object relationship tracking
15635 		 * and spin_lock lock state tracking
15636 		 */
15637 	}
15638 }
15639 
15640 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15641  * be folded together at some point.
15642  */
15643 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15644 				  bool is_null)
15645 {
15646 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15647 	struct bpf_reg_state *regs = state->regs, *reg;
15648 	u32 id = regs[regno].id;
15649 
15650 	if (is_null && find_reference_state(vstate, id))
15651 		/* regs[regno] is in the " == NULL" branch.
15652 		 * No one could have freed the reference state before
15653 		 * doing the NULL check.
15654 		 */
15655 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
15656 
15657 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15658 		mark_ptr_or_null_reg(state, reg, id, is_null);
15659 	}));
15660 }
15661 
15662 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15663 				   struct bpf_reg_state *dst_reg,
15664 				   struct bpf_reg_state *src_reg,
15665 				   struct bpf_verifier_state *this_branch,
15666 				   struct bpf_verifier_state *other_branch)
15667 {
15668 	if (BPF_SRC(insn->code) != BPF_X)
15669 		return false;
15670 
15671 	/* Pointers are always 64-bit. */
15672 	if (BPF_CLASS(insn->code) == BPF_JMP32)
15673 		return false;
15674 
15675 	switch (BPF_OP(insn->code)) {
15676 	case BPF_JGT:
15677 		if ((dst_reg->type == PTR_TO_PACKET &&
15678 		     src_reg->type == PTR_TO_PACKET_END) ||
15679 		    (dst_reg->type == PTR_TO_PACKET_META &&
15680 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15681 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15682 			find_good_pkt_pointers(this_branch, dst_reg,
15683 					       dst_reg->type, false);
15684 			mark_pkt_end(other_branch, insn->dst_reg, true);
15685 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15686 			    src_reg->type == PTR_TO_PACKET) ||
15687 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15688 			    src_reg->type == PTR_TO_PACKET_META)) {
15689 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
15690 			find_good_pkt_pointers(other_branch, src_reg,
15691 					       src_reg->type, true);
15692 			mark_pkt_end(this_branch, insn->src_reg, false);
15693 		} else {
15694 			return false;
15695 		}
15696 		break;
15697 	case BPF_JLT:
15698 		if ((dst_reg->type == PTR_TO_PACKET &&
15699 		     src_reg->type == PTR_TO_PACKET_END) ||
15700 		    (dst_reg->type == PTR_TO_PACKET_META &&
15701 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15702 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15703 			find_good_pkt_pointers(other_branch, dst_reg,
15704 					       dst_reg->type, true);
15705 			mark_pkt_end(this_branch, insn->dst_reg, false);
15706 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15707 			    src_reg->type == PTR_TO_PACKET) ||
15708 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15709 			    src_reg->type == PTR_TO_PACKET_META)) {
15710 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
15711 			find_good_pkt_pointers(this_branch, src_reg,
15712 					       src_reg->type, false);
15713 			mark_pkt_end(other_branch, insn->src_reg, true);
15714 		} else {
15715 			return false;
15716 		}
15717 		break;
15718 	case BPF_JGE:
15719 		if ((dst_reg->type == PTR_TO_PACKET &&
15720 		     src_reg->type == PTR_TO_PACKET_END) ||
15721 		    (dst_reg->type == PTR_TO_PACKET_META &&
15722 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15723 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15724 			find_good_pkt_pointers(this_branch, dst_reg,
15725 					       dst_reg->type, true);
15726 			mark_pkt_end(other_branch, insn->dst_reg, false);
15727 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15728 			    src_reg->type == PTR_TO_PACKET) ||
15729 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15730 			    src_reg->type == PTR_TO_PACKET_META)) {
15731 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15732 			find_good_pkt_pointers(other_branch, src_reg,
15733 					       src_reg->type, false);
15734 			mark_pkt_end(this_branch, insn->src_reg, true);
15735 		} else {
15736 			return false;
15737 		}
15738 		break;
15739 	case BPF_JLE:
15740 		if ((dst_reg->type == PTR_TO_PACKET &&
15741 		     src_reg->type == PTR_TO_PACKET_END) ||
15742 		    (dst_reg->type == PTR_TO_PACKET_META &&
15743 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15744 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15745 			find_good_pkt_pointers(other_branch, dst_reg,
15746 					       dst_reg->type, false);
15747 			mark_pkt_end(this_branch, insn->dst_reg, true);
15748 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15749 			    src_reg->type == PTR_TO_PACKET) ||
15750 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15751 			    src_reg->type == PTR_TO_PACKET_META)) {
15752 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15753 			find_good_pkt_pointers(this_branch, src_reg,
15754 					       src_reg->type, true);
15755 			mark_pkt_end(other_branch, insn->src_reg, false);
15756 		} else {
15757 			return false;
15758 		}
15759 		break;
15760 	default:
15761 		return false;
15762 	}
15763 
15764 	return true;
15765 }
15766 
15767 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
15768 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
15769 {
15770 	struct linked_reg *e;
15771 
15772 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
15773 		return;
15774 
15775 	e = linked_regs_push(reg_set);
15776 	if (e) {
15777 		e->frameno = frameno;
15778 		e->is_reg = is_reg;
15779 		e->regno = spi_or_reg;
15780 	} else {
15781 		clear_scalar_id(reg);
15782 	}
15783 }
15784 
15785 /* For all R being scalar registers or spilled scalar registers
15786  * in verifier state, save R in linked_regs if R->id == id.
15787  * If there are too many Rs sharing same id, reset id for leftover Rs.
15788  */
15789 static void collect_linked_regs(struct bpf_verifier_env *env,
15790 				struct bpf_verifier_state *vstate,
15791 				u32 id,
15792 				struct linked_regs *linked_regs)
15793 {
15794 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
15795 	struct bpf_func_state *func;
15796 	struct bpf_reg_state *reg;
15797 	u16 live_regs;
15798 	int i, j;
15799 
15800 	id = id & ~BPF_ADD_CONST;
15801 	for (i = vstate->curframe; i >= 0; i--) {
15802 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
15803 		func = vstate->frame[i];
15804 		for (j = 0; j < BPF_REG_FP; j++) {
15805 			if (!(live_regs & BIT(j)))
15806 				continue;
15807 			reg = &func->regs[j];
15808 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
15809 		}
15810 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
15811 			if (!bpf_is_spilled_reg(&func->stack[j]))
15812 				continue;
15813 			reg = &func->stack[j].spilled_ptr;
15814 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
15815 		}
15816 	}
15817 }
15818 
15819 /* For all R in linked_regs, copy known_reg range into R
15820  * if R->id == known_reg->id.
15821  */
15822 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
15823 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
15824 {
15825 	struct bpf_reg_state fake_reg;
15826 	struct bpf_reg_state *reg;
15827 	struct linked_reg *e;
15828 	int i;
15829 
15830 	for (i = 0; i < linked_regs->cnt; ++i) {
15831 		e = &linked_regs->entries[i];
15832 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
15833 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
15834 		if (reg->type != SCALAR_VALUE || reg == known_reg)
15835 			continue;
15836 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15837 			continue;
15838 		/*
15839 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
15840 		 * hold across different ALU widths.
15841 		 */
15842 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
15843 			continue;
15844 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15845 		    reg->delta == known_reg->delta) {
15846 			s32 saved_subreg_def = reg->subreg_def;
15847 
15848 			*reg = *known_reg;
15849 			reg->subreg_def = saved_subreg_def;
15850 		} else {
15851 			s32 saved_subreg_def = reg->subreg_def;
15852 			s32 saved_off = reg->delta;
15853 			u32 saved_id = reg->id;
15854 
15855 			fake_reg.type = SCALAR_VALUE;
15856 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
15857 
15858 			/* reg = known_reg; reg += delta */
15859 			*reg = *known_reg;
15860 			/*
15861 			 * Must preserve off, id and subreg_def flag,
15862 			 * otherwise another sync_linked_regs() will be incorrect.
15863 			 */
15864 			reg->delta = saved_off;
15865 			reg->id = saved_id;
15866 			reg->subreg_def = saved_subreg_def;
15867 
15868 			scalar32_min_max_add(reg, &fake_reg);
15869 			scalar_min_max_add(reg, &fake_reg);
15870 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15871 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
15872 				zext_32_to_64(reg);
15873 			reg_bounds_sync(reg);
15874 		}
15875 		if (e->is_reg)
15876 			mark_reg_scratched(env, e->regno);
15877 		else
15878 			mark_stack_slot_scratched(env, e->spi);
15879 	}
15880 }
15881 
15882 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15883 			     struct bpf_insn *insn, int *insn_idx)
15884 {
15885 	struct bpf_verifier_state *this_branch = env->cur_state;
15886 	struct bpf_verifier_state *other_branch;
15887 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15888 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15889 	struct bpf_reg_state *eq_branch_regs;
15890 	struct linked_regs linked_regs = {};
15891 	u8 opcode = BPF_OP(insn->code);
15892 	int insn_flags = 0;
15893 	bool is_jmp32;
15894 	int pred = -1;
15895 	int err;
15896 
15897 	/* Only conditional jumps are expected to reach here. */
15898 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
15899 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15900 		return -EINVAL;
15901 	}
15902 
15903 	if (opcode == BPF_JCOND) {
15904 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15905 		int idx = *insn_idx;
15906 
15907 		prev_st = find_prev_entry(env, cur_st->parent, idx);
15908 
15909 		/* branch out 'fallthrough' insn as a new state to explore */
15910 		queued_st = push_stack(env, idx + 1, idx, false);
15911 		if (IS_ERR(queued_st))
15912 			return PTR_ERR(queued_st);
15913 
15914 		queued_st->may_goto_depth++;
15915 		if (prev_st)
15916 			widen_imprecise_scalars(env, prev_st, queued_st);
15917 		*insn_idx += insn->off;
15918 		return 0;
15919 	}
15920 
15921 	/* check src2 operand */
15922 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15923 	if (err)
15924 		return err;
15925 
15926 	dst_reg = &regs[insn->dst_reg];
15927 	if (BPF_SRC(insn->code) == BPF_X) {
15928 		/* check src1 operand */
15929 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
15930 		if (err)
15931 			return err;
15932 
15933 		src_reg = &regs[insn->src_reg];
15934 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
15935 		    is_pointer_value(env, insn->src_reg)) {
15936 			verbose(env, "R%d pointer comparison prohibited\n",
15937 				insn->src_reg);
15938 			return -EACCES;
15939 		}
15940 
15941 		if (src_reg->type == PTR_TO_STACK)
15942 			insn_flags |= INSN_F_SRC_REG_STACK;
15943 		if (dst_reg->type == PTR_TO_STACK)
15944 			insn_flags |= INSN_F_DST_REG_STACK;
15945 	} else {
15946 		src_reg = &env->fake_reg[0];
15947 		memset(src_reg, 0, sizeof(*src_reg));
15948 		src_reg->type = SCALAR_VALUE;
15949 		__mark_reg_known(src_reg, insn->imm);
15950 
15951 		if (dst_reg->type == PTR_TO_STACK)
15952 			insn_flags |= INSN_F_DST_REG_STACK;
15953 	}
15954 
15955 	if (insn_flags) {
15956 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0, 0, 0);
15957 		if (err)
15958 			return err;
15959 	}
15960 
15961 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
15962 	env->false_reg1 = *dst_reg;
15963 	env->false_reg2 = *src_reg;
15964 	env->true_reg1 = *dst_reg;
15965 	env->true_reg2 = *src_reg;
15966 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
15967 	if (pred >= 0) {
15968 		/* If we get here with a dst_reg pointer type it is because
15969 		 * above is_branch_taken() special cased the 0 comparison.
15970 		 */
15971 		if (!__is_pointer_value(false, dst_reg))
15972 			err = mark_chain_precision(env, insn->dst_reg);
15973 		if (BPF_SRC(insn->code) == BPF_X && !err &&
15974 		    !__is_pointer_value(false, src_reg))
15975 			err = mark_chain_precision(env, insn->src_reg);
15976 		if (err)
15977 			return err;
15978 	}
15979 
15980 	if (pred == 1) {
15981 		/* Only follow the goto, ignore fall-through. If needed, push
15982 		 * the fall-through branch for simulation under speculative
15983 		 * execution.
15984 		 */
15985 		if (!env->bypass_spec_v1) {
15986 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
15987 			if (err < 0)
15988 				return err;
15989 		}
15990 		if (env->log.level & BPF_LOG_LEVEL)
15991 			print_insn_state(env, this_branch, this_branch->curframe);
15992 		*insn_idx += insn->off;
15993 		return 0;
15994 	} else if (pred == 0) {
15995 		/* Only follow the fall-through branch, since that's where the
15996 		 * program will go. If needed, push the goto branch for
15997 		 * simulation under speculative execution.
15998 		 */
15999 		if (!env->bypass_spec_v1) {
16000 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16001 							*insn_idx);
16002 			if (err < 0)
16003 				return err;
16004 		}
16005 		if (env->log.level & BPF_LOG_LEVEL)
16006 			print_insn_state(env, this_branch, this_branch->curframe);
16007 		return 0;
16008 	}
16009 
16010 	/* Push scalar registers sharing same ID to jump history,
16011 	 * do this before creating 'other_branch', so that both
16012 	 * 'this_branch' and 'other_branch' share this history
16013 	 * if parent state is created.
16014 	 */
16015 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16016 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16017 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16018 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16019 	if (linked_regs.cnt > 1) {
16020 		err = bpf_push_jmp_history(env, this_branch, 0, 0, 0, linked_regs_pack(&linked_regs));
16021 		if (err)
16022 			return err;
16023 	}
16024 
16025 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16026 	if (IS_ERR(other_branch))
16027 		return PTR_ERR(other_branch);
16028 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16029 
16030 	err = regs_bounds_sanity_check_branches(env);
16031 	if (err)
16032 		return err;
16033 
16034 	*dst_reg = env->false_reg1;
16035 	*src_reg = env->false_reg2;
16036 	other_branch_regs[insn->dst_reg] = env->true_reg1;
16037 	if (BPF_SRC(insn->code) == BPF_X)
16038 		other_branch_regs[insn->src_reg] = env->true_reg2;
16039 
16040 	if (BPF_SRC(insn->code) == BPF_X &&
16041 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16042 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16043 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16044 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16045 				 &linked_regs);
16046 	}
16047 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16048 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16049 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16050 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16051 				 &linked_regs);
16052 	}
16053 
16054 	/* if one pointer register is compared to another pointer
16055 	 * register check if PTR_MAYBE_NULL could be lifted.
16056 	 * E.g. register A - maybe null
16057 	 *      register B - not null
16058 	 * for JNE A, B, ... - A is not null in the false branch;
16059 	 * for JEQ A, B, ... - A is not null in the true branch.
16060 	 *
16061 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16062 	 * not need to be null checked by the BPF program, i.e.,
16063 	 * could be null even without PTR_MAYBE_NULL marking, so
16064 	 * only propagate nullness when neither reg is that type.
16065 	 */
16066 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16067 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16068 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16069 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16070 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16071 		eq_branch_regs = NULL;
16072 		switch (opcode) {
16073 		case BPF_JEQ:
16074 			eq_branch_regs = other_branch_regs;
16075 			break;
16076 		case BPF_JNE:
16077 			eq_branch_regs = regs;
16078 			break;
16079 		default:
16080 			/* do nothing */
16081 			break;
16082 		}
16083 		if (eq_branch_regs) {
16084 			if (type_may_be_null(src_reg->type))
16085 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16086 			else
16087 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16088 		}
16089 	}
16090 
16091 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16092 	 * Also does the same detection for a register whose the value is
16093 	 * known to be 0.
16094 	 * NOTE: these optimizations below are related with pointer comparison
16095 	 *       which will never be JMP32.
16096 	 */
16097 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16098 	    type_may_be_null(dst_reg->type) &&
16099 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16100 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16101 		/* Mark all identical registers in each branch as either
16102 		 * safe or unknown depending R == 0 or R != 0 conditional.
16103 		 */
16104 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16105 				      opcode == BPF_JNE);
16106 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16107 				      opcode == BPF_JEQ);
16108 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16109 					   this_branch, other_branch) &&
16110 		   is_pointer_value(env, insn->dst_reg)) {
16111 		verbose(env, "R%d pointer comparison prohibited\n",
16112 			insn->dst_reg);
16113 		return -EACCES;
16114 	}
16115 	if (env->log.level & BPF_LOG_LEVEL)
16116 		print_insn_state(env, this_branch, this_branch->curframe);
16117 	return 0;
16118 }
16119 
16120 /* verify BPF_LD_IMM64 instruction */
16121 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16122 {
16123 	struct bpf_insn_aux_data *aux = cur_aux(env);
16124 	struct bpf_reg_state *regs = cur_regs(env);
16125 	struct bpf_reg_state *dst_reg;
16126 	struct bpf_map *map;
16127 	int err;
16128 
16129 	if (BPF_SIZE(insn->code) != BPF_DW) {
16130 		verbose(env, "invalid BPF_LD_IMM insn\n");
16131 		return -EINVAL;
16132 	}
16133 
16134 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16135 	if (err)
16136 		return err;
16137 
16138 	dst_reg = &regs[insn->dst_reg];
16139 	if (insn->src_reg == 0) {
16140 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16141 
16142 		dst_reg->type = SCALAR_VALUE;
16143 		__mark_reg_known(&regs[insn->dst_reg], imm);
16144 		return 0;
16145 	}
16146 
16147 	/* All special src_reg cases are listed below. From this point onwards
16148 	 * we either succeed and assign a corresponding dst_reg->type after
16149 	 * zeroing the offset, or fail and reject the program.
16150 	 */
16151 	mark_reg_known_zero(env, regs, insn->dst_reg);
16152 
16153 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16154 		dst_reg->type = aux->btf_var.reg_type;
16155 		switch (base_type(dst_reg->type)) {
16156 		case PTR_TO_MEM:
16157 			dst_reg->mem_size = aux->btf_var.mem_size;
16158 			break;
16159 		case PTR_TO_BTF_ID:
16160 			dst_reg->btf = aux->btf_var.btf;
16161 			dst_reg->btf_id = aux->btf_var.btf_id;
16162 			break;
16163 		default:
16164 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16165 			return -EFAULT;
16166 		}
16167 		return 0;
16168 	}
16169 
16170 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16171 		struct bpf_prog_aux *aux = env->prog->aux;
16172 		u32 subprogno = bpf_find_subprog(env,
16173 						 env->insn_idx + insn->imm + 1);
16174 
16175 		if (!aux->func_info) {
16176 			verbose(env, "missing btf func_info\n");
16177 			return -EINVAL;
16178 		}
16179 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16180 			verbose(env, "callback function not static\n");
16181 			return -EINVAL;
16182 		}
16183 
16184 		dst_reg->type = PTR_TO_FUNC;
16185 		dst_reg->subprogno = subprogno;
16186 		return 0;
16187 	}
16188 
16189 	map = env->used_maps[aux->map_index];
16190 
16191 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16192 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16193 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16194 			__mark_reg_unknown(env, dst_reg);
16195 			dst_reg->map_ptr = map;
16196 			return 0;
16197 		}
16198 		__mark_reg_known(dst_reg, aux->map_off);
16199 		dst_reg->type = PTR_TO_MAP_VALUE;
16200 		dst_reg->map_ptr = map;
16201 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16202 			     map->max_entries != 1);
16203 		/* We want reg->id to be same (0) as map_value is not distinct */
16204 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16205 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16206 		dst_reg->type = CONST_PTR_TO_MAP;
16207 		dst_reg->map_ptr = map;
16208 	} else {
16209 		verifier_bug(env, "unexpected src reg value for ldimm64");
16210 		return -EFAULT;
16211 	}
16212 
16213 	return 0;
16214 }
16215 
16216 static bool may_access_skb(enum bpf_prog_type type)
16217 {
16218 	switch (type) {
16219 	case BPF_PROG_TYPE_SOCKET_FILTER:
16220 	case BPF_PROG_TYPE_SCHED_CLS:
16221 	case BPF_PROG_TYPE_SCHED_ACT:
16222 		return true;
16223 	default:
16224 		return false;
16225 	}
16226 }
16227 
16228 /* verify safety of LD_ABS|LD_IND instructions:
16229  * - they can only appear in the programs where ctx == skb
16230  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16231  *   preserve R6-R9, and store return value into R0
16232  *
16233  * Implicit input:
16234  *   ctx == skb == R6 == CTX
16235  *
16236  * Explicit input:
16237  *   SRC == any register
16238  *   IMM == 32-bit immediate
16239  *
16240  * Output:
16241  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16242  */
16243 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16244 {
16245 	struct bpf_reg_state *regs = cur_regs(env);
16246 	static const int ctx_reg = BPF_REG_6;
16247 	u8 mode = BPF_MODE(insn->code);
16248 	int i, err;
16249 
16250 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16251 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16252 		return -EINVAL;
16253 	}
16254 
16255 	if (!env->ops->gen_ld_abs) {
16256 		verifier_bug(env, "gen_ld_abs is null");
16257 		return -EFAULT;
16258 	}
16259 
16260 	/* check whether implicit source operand (register R6) is readable */
16261 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16262 	if (err)
16263 		return err;
16264 
16265 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16266 	 * gen_ld_abs() may terminate the program at runtime, leading to
16267 	 * reference leak.
16268 	 */
16269 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16270 	if (err)
16271 		return err;
16272 
16273 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16274 		verbose(env,
16275 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16276 		return -EINVAL;
16277 	}
16278 
16279 	if (mode == BPF_IND) {
16280 		/* check explicit source operand */
16281 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16282 		if (err)
16283 			return err;
16284 	}
16285 
16286 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16287 	if (err < 0)
16288 		return err;
16289 
16290 	/* reset caller saved regs to unreadable */
16291 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16292 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16293 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16294 	}
16295 
16296 	/* mark destination R0 register as readable, since it contains
16297 	 * the value fetched from the packet.
16298 	 * Already marked as written above.
16299 	 */
16300 	mark_reg_unknown(env, regs, BPF_REG_0);
16301 	/* ld_abs load up to 32-bit skb data. */
16302 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16303 	/*
16304 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16305 	 * which must be explored by the verifier when in a subprog.
16306 	 */
16307 	if (env->cur_state->curframe) {
16308 		struct bpf_verifier_state *branch;
16309 
16310 		mark_reg_scratched(env, BPF_REG_0);
16311 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16312 		if (IS_ERR(branch))
16313 			return PTR_ERR(branch);
16314 		mark_reg_known_zero(env, regs, BPF_REG_0);
16315 		err = prepare_func_exit(env, &env->insn_idx);
16316 		if (err)
16317 			return err;
16318 		env->insn_idx--;
16319 	}
16320 	return 0;
16321 }
16322 
16323 
16324 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16325 {
16326 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16327 
16328 	/* Default return value range. */
16329 	*range = retval_range(0, 1);
16330 
16331 	switch (prog_type) {
16332 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16333 		switch (env->prog->expected_attach_type) {
16334 		case BPF_CGROUP_UDP4_RECVMSG:
16335 		case BPF_CGROUP_UDP6_RECVMSG:
16336 		case BPF_CGROUP_UNIX_RECVMSG:
16337 		case BPF_CGROUP_INET4_GETPEERNAME:
16338 		case BPF_CGROUP_INET6_GETPEERNAME:
16339 		case BPF_CGROUP_UNIX_GETPEERNAME:
16340 		case BPF_CGROUP_INET4_GETSOCKNAME:
16341 		case BPF_CGROUP_INET6_GETSOCKNAME:
16342 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16343 			*range = retval_range(1, 1);
16344 			break;
16345 		case BPF_CGROUP_INET4_BIND:
16346 		case BPF_CGROUP_INET6_BIND:
16347 			*range = retval_range(0, 3);
16348 			break;
16349 		default:
16350 			break;
16351 		}
16352 		break;
16353 	case BPF_PROG_TYPE_CGROUP_SKB:
16354 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16355 			*range = retval_range(0, 3);
16356 		break;
16357 	case BPF_PROG_TYPE_CGROUP_SOCK:
16358 	case BPF_PROG_TYPE_SOCK_OPS:
16359 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16360 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16361 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16362 		break;
16363 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16364 		if (!env->prog->aux->attach_btf_id)
16365 			return false;
16366 		*range = retval_range(0, 0);
16367 		break;
16368 	case BPF_PROG_TYPE_TRACING:
16369 		switch (env->prog->expected_attach_type) {
16370 		case BPF_TRACE_FENTRY:
16371 		case BPF_TRACE_FEXIT:
16372 		case BPF_TRACE_FSESSION:
16373 			*range = retval_range(0, 0);
16374 			break;
16375 		case BPF_TRACE_RAW_TP:
16376 		case BPF_MODIFY_RETURN:
16377 			return false;
16378 		case BPF_TRACE_ITER:
16379 		default:
16380 			break;
16381 		}
16382 		break;
16383 	case BPF_PROG_TYPE_KPROBE:
16384 		switch (env->prog->expected_attach_type) {
16385 		case BPF_TRACE_KPROBE_SESSION:
16386 		case BPF_TRACE_UPROBE_SESSION:
16387 			break;
16388 		default:
16389 			return false;
16390 		}
16391 		break;
16392 	case BPF_PROG_TYPE_SK_LOOKUP:
16393 		*range = retval_range(SK_DROP, SK_PASS);
16394 		break;
16395 
16396 	case BPF_PROG_TYPE_LSM:
16397 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16398 			/* no range found, any return value is allowed */
16399 			if (!get_func_retval_range(env->prog, range))
16400 				return false;
16401 			/* no restricted range, any return value is allowed */
16402 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16403 				return false;
16404 			range->return_32bit = true;
16405 		} else if (!env->prog->aux->attach_func_proto->type) {
16406 			/* Make sure programs that attach to void
16407 			 * hooks don't try to modify return value.
16408 			 */
16409 			*range = retval_range(1, 1);
16410 		}
16411 		break;
16412 
16413 	case BPF_PROG_TYPE_NETFILTER:
16414 		*range = retval_range(NF_DROP, NF_ACCEPT);
16415 		break;
16416 	case BPF_PROG_TYPE_STRUCT_OPS:
16417 		*range = retval_range(0, 0);
16418 		break;
16419 	case BPF_PROG_TYPE_EXT:
16420 		/* freplace program can return anything as its return value
16421 		 * depends on the to-be-replaced kernel func or bpf program.
16422 		 */
16423 	default:
16424 		return false;
16425 	}
16426 
16427 	/* Continue calculating. */
16428 
16429 	return true;
16430 }
16431 
16432 static bool program_returns_void(struct bpf_verifier_env *env)
16433 {
16434 	const struct bpf_prog *prog = env->prog;
16435 	enum bpf_prog_type prog_type = prog->type;
16436 
16437 	switch (prog_type) {
16438 	case BPF_PROG_TYPE_LSM:
16439 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16440 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16441 		    !prog->aux->attach_func_proto->type)
16442 			return true;
16443 		break;
16444 	case BPF_PROG_TYPE_STRUCT_OPS:
16445 		if (!prog->aux->attach_func_proto->type)
16446 			return true;
16447 		break;
16448 	case BPF_PROG_TYPE_EXT:
16449 		/*
16450 		 * If the actual program is an extension, let it
16451 		 * return void - attaching will succeed only if the
16452 		 * program being replaced also returns void, and since
16453 		 * it has passed verification its actual type doesn't matter.
16454 		 */
16455 		if (subprog_returns_void(env, 0))
16456 			return true;
16457 		break;
16458 	default:
16459 		break;
16460 	}
16461 	return false;
16462 }
16463 
16464 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16465 {
16466 	const char *exit_ctx = "At program exit";
16467 	struct tnum enforce_attach_type_range = tnum_unknown;
16468 	const struct bpf_prog *prog = env->prog;
16469 	struct bpf_reg_state *reg = reg_state(env, regno);
16470 	struct bpf_retval_range range = retval_range(0, 1);
16471 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16472 	struct bpf_func_state *frame = env->cur_state->frame[0];
16473 	const struct btf_type *reg_type, *ret_type = NULL;
16474 	int err;
16475 
16476 	/* LSM and struct_ops func-ptr's return type could be "void" */
16477 	if (!frame->in_async_callback_fn && program_returns_void(env))
16478 		return 0;
16479 
16480 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16481 		/* Allow a struct_ops program to return a referenced kptr if it
16482 		 * matches the operator's return type and is in its unmodified
16483 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16484 		 */
16485 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16486 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16487 						prog->aux->attach_func_proto->type,
16488 						NULL);
16489 		if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg))
16490 			return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
16491 	}
16492 
16493 	/* eBPF calling convention is such that R0 is used
16494 	 * to return the value from eBPF program.
16495 	 * Make sure that it's readable at this time
16496 	 * of bpf_exit, which means that program wrote
16497 	 * something into it earlier
16498 	 */
16499 	err = check_reg_arg(env, regno, SRC_OP);
16500 	if (err)
16501 		return err;
16502 
16503 	if (is_pointer_value(env, regno)) {
16504 		verbose(env, "R%d leaks addr as return value\n", regno);
16505 		return -EACCES;
16506 	}
16507 
16508 	if (frame->in_async_callback_fn) {
16509 		exit_ctx = "At async callback return";
16510 		range = frame->callback_ret_range;
16511 		goto enforce_retval;
16512 	}
16513 
16514 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16515 		return 0;
16516 
16517 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16518 		enforce_attach_type_range = tnum_range(2, 3);
16519 
16520 	if (!return_retval_range(env, &range))
16521 		return 0;
16522 
16523 enforce_retval:
16524 	if (reg->type != SCALAR_VALUE) {
16525 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16526 			exit_ctx, regno, reg_type_str(env, reg->type));
16527 		return -EINVAL;
16528 	}
16529 
16530 	err = mark_chain_precision(env, regno);
16531 	if (err)
16532 		return err;
16533 
16534 	if (!retval_range_within(range, reg)) {
16535 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16536 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16537 		    prog_type == BPF_PROG_TYPE_LSM &&
16538 		    !prog->aux->attach_func_proto->type)
16539 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16540 		return -EINVAL;
16541 	}
16542 
16543 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16544 	    tnum_in(enforce_attach_type_range, reg->var_off))
16545 		env->prog->enforce_expected_attach_type = 1;
16546 	return 0;
16547 }
16548 
16549 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16550 {
16551 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16552 	struct bpf_func_state *cur_frame = cur_func(env);
16553 	int err;
16554 
16555 	if (subprog_returns_void(env, cur_frame->subprogno))
16556 		return 0;
16557 
16558 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16559 	if (err)
16560 		return err;
16561 
16562 	/* Pointers to arena are safe to pass between subprograms. */
16563 	if (is_arena_reg(env, BPF_REG_0))
16564 		return 0;
16565 
16566 	if (is_pointer_value(env, BPF_REG_0)) {
16567 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16568 		return -EACCES;
16569 	}
16570 
16571 	if (reg->type != SCALAR_VALUE) {
16572 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16573 			reg_type_str(env, reg->type));
16574 		return -EINVAL;
16575 	}
16576 
16577 	return 0;
16578 }
16579 
16580 /* Bitmask with 1s for all caller saved registers */
16581 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16582 
16583 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16584  * replacement patch is presumed to follow bpf_fastcall contract
16585  * (see mark_fastcall_pattern_for_call() below).
16586  */
16587 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16588 {
16589 	switch (imm) {
16590 #ifdef CONFIG_X86_64
16591 	case BPF_FUNC_get_smp_processor_id:
16592 #ifdef CONFIG_SMP
16593 	case BPF_FUNC_get_current_task_btf:
16594 	case BPF_FUNC_get_current_task:
16595 #endif
16596 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16597 #endif
16598 	default:
16599 		return false;
16600 	}
16601 }
16602 
16603 /* If @call is a kfunc or helper call, fills @cs and returns true,
16604  * otherwise returns false.
16605  */
16606 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
16607 			  struct bpf_call_summary *cs)
16608 {
16609 	struct bpf_kfunc_call_arg_meta meta;
16610 	const struct bpf_func_proto *fn;
16611 	int i;
16612 
16613 	if (bpf_helper_call(call)) {
16614 
16615 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
16616 			/* error would be reported later */
16617 			return false;
16618 		cs->fastcall = fn->allow_fastcall &&
16619 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
16620 				bpf_jit_inlines_helper_call(call->imm));
16621 		cs->is_void = fn->ret_type == RET_VOID;
16622 		cs->num_params = 0;
16623 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
16624 			if (fn->arg_type[i] == ARG_DONTCARE)
16625 				break;
16626 			cs->num_params++;
16627 		}
16628 		return true;
16629 	}
16630 
16631 	if (bpf_pseudo_kfunc_call(call)) {
16632 		int err;
16633 
16634 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
16635 		if (err < 0)
16636 			/* error would be reported later */
16637 			return false;
16638 		cs->num_params = btf_type_vlen(meta.func_proto);
16639 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
16640 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
16641 		return true;
16642 	}
16643 
16644 	return false;
16645 }
16646 
16647 /* LLVM define a bpf_fastcall function attribute.
16648  * This attribute means that function scratches only some of
16649  * the caller saved registers defined by ABI.
16650  * For BPF the set of such registers could be defined as follows:
16651  * - R0 is scratched only if function is non-void;
16652  * - R1-R5 are scratched only if corresponding parameter type is defined
16653  *   in the function prototype.
16654  *
16655  * The contract between kernel and clang allows to simultaneously use
16656  * such functions and maintain backwards compatibility with old
16657  * kernels that don't understand bpf_fastcall calls:
16658  *
16659  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
16660  *   registers are not scratched by the call;
16661  *
16662  * - as a post-processing step, clang visits each bpf_fastcall call and adds
16663  *   spill/fill for every live r0-r5;
16664  *
16665  * - stack offsets used for the spill/fill are allocated as lowest
16666  *   stack offsets in whole function and are not used for any other
16667  *   purposes;
16668  *
16669  * - when kernel loads a program, it looks for such patterns
16670  *   (bpf_fastcall function surrounded by spills/fills) and checks if
16671  *   spill/fill stack offsets are used exclusively in fastcall patterns;
16672  *
16673  * - if so, and if verifier or current JIT inlines the call to the
16674  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
16675  *   spill/fill pairs;
16676  *
16677  * - when old kernel loads a program, presence of spill/fill pairs
16678  *   keeps BPF program valid, albeit slightly less efficient.
16679  *
16680  * For example:
16681  *
16682  *   r1 = 1;
16683  *   r2 = 2;
16684  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16685  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
16686  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16687  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
16688  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
16689  *   r0 = r1;                            exit;
16690  *   r0 += r2;
16691  *   exit;
16692  *
16693  * The purpose of mark_fastcall_pattern_for_call is to:
16694  * - look for such patterns;
16695  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
16696  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
16697  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
16698  *   at which bpf_fastcall spill/fill stack slots start;
16699  * - update env->subprog_info[*]->keep_fastcall_stack.
16700  *
16701  * The .fastcall_pattern and .fastcall_stack_off are used by
16702  * check_fastcall_stack_contract() to check if every stack access to
16703  * fastcall spill/fill stack slot originates from spill/fill
16704  * instructions, members of fastcall patterns.
16705  *
16706  * If such condition holds true for a subprogram, fastcall patterns could
16707  * be rewritten by remove_fastcall_spills_fills().
16708  * Otherwise bpf_fastcall patterns are not changed in the subprogram
16709  * (code, presumably, generated by an older clang version).
16710  *
16711  * For example, it is *not* safe to remove spill/fill below:
16712  *
16713  *   r1 = 1;
16714  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16715  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16716  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
16717  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
16718  *   r0 += r1;                           exit;
16719  *   exit;
16720  */
16721 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
16722 					   struct bpf_subprog_info *subprog,
16723 					   int insn_idx, s16 lowest_off)
16724 {
16725 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
16726 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
16727 	u32 clobbered_regs_mask;
16728 	struct bpf_call_summary cs;
16729 	u32 expected_regs_mask;
16730 	s16 off;
16731 	int i;
16732 
16733 	if (!bpf_get_call_summary(env, call, &cs))
16734 		return;
16735 
16736 	/* A bitmask specifying which caller saved registers are clobbered
16737 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
16738 	 * bpf_fastcall contract:
16739 	 * - includes R0 if function is non-void;
16740 	 * - includes R1-R5 if corresponding parameter has is described
16741 	 *   in the function prototype.
16742 	 */
16743 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
16744 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
16745 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
16746 
16747 	/* match pairs of form:
16748 	 *
16749 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
16750 	 * ...
16751 	 * call %[to_be_inlined]
16752 	 * ...
16753 	 * rX = *(u64 *)(r10 - Y)
16754 	 */
16755 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
16756 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
16757 			break;
16758 		stx = &insns[insn_idx - i];
16759 		ldx = &insns[insn_idx + i];
16760 		/* must be a stack spill/fill pair */
16761 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16762 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
16763 		    stx->dst_reg != BPF_REG_10 ||
16764 		    ldx->src_reg != BPF_REG_10)
16765 			break;
16766 		/* must be a spill/fill for the same reg */
16767 		if (stx->src_reg != ldx->dst_reg)
16768 			break;
16769 		/* must be one of the previously unseen registers */
16770 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
16771 			break;
16772 		/* must be a spill/fill for the same expected offset,
16773 		 * no need to check offset alignment, BPF_DW stack access
16774 		 * is always 8-byte aligned.
16775 		 */
16776 		if (stx->off != off || ldx->off != off)
16777 			break;
16778 		expected_regs_mask &= ~BIT(stx->src_reg);
16779 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
16780 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
16781 	}
16782 	if (i == 1)
16783 		return;
16784 
16785 	/* Conditionally set 'fastcall_spills_num' to allow forward
16786 	 * compatibility when more helper functions are marked as
16787 	 * bpf_fastcall at compile time than current kernel supports, e.g:
16788 	 *
16789 	 *   1: *(u64 *)(r10 - 8) = r1
16790 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
16791 	 *   3: r1 = *(u64 *)(r10 - 8)
16792 	 *   4: *(u64 *)(r10 - 8) = r1
16793 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
16794 	 *   6: r1 = *(u64 *)(r10 - 8)
16795 	 *
16796 	 * There is no need to block bpf_fastcall rewrite for such program.
16797 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
16798 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
16799 	 * does not remove spill/fill pair {4,6}.
16800 	 */
16801 	if (cs.fastcall)
16802 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
16803 	else
16804 		subprog->keep_fastcall_stack = 1;
16805 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
16806 }
16807 
16808 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
16809 {
16810 	struct bpf_subprog_info *subprog = env->subprog_info;
16811 	struct bpf_insn *insn;
16812 	s16 lowest_off;
16813 	int s, i;
16814 
16815 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
16816 		/* find lowest stack spill offset used in this subprog */
16817 		lowest_off = 0;
16818 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16819 			insn = env->prog->insnsi + i;
16820 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16821 			    insn->dst_reg != BPF_REG_10)
16822 				continue;
16823 			lowest_off = min(lowest_off, insn->off);
16824 		}
16825 		/* use this offset to find fastcall patterns */
16826 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16827 			insn = env->prog->insnsi + i;
16828 			if (insn->code != (BPF_JMP | BPF_CALL))
16829 				continue;
16830 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
16831 		}
16832 	}
16833 	return 0;
16834 }
16835 
16836 static void adjust_btf_func(struct bpf_verifier_env *env)
16837 {
16838 	struct bpf_prog_aux *aux = env->prog->aux;
16839 	int i;
16840 
16841 	if (!aux->func_info)
16842 		return;
16843 
16844 	/* func_info is not available for hidden subprogs */
16845 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16846 		aux->func_info[i].insn_off = env->subprog_info[i].start;
16847 }
16848 
16849 /* Find id in idset and increment its count, or add new entry */
16850 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
16851 {
16852 	u32 i;
16853 
16854 	for (i = 0; i < idset->num_ids; i++) {
16855 		if (idset->entries[i].id == id) {
16856 			idset->entries[i].cnt++;
16857 			return;
16858 		}
16859 	}
16860 	/* New id */
16861 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
16862 		idset->entries[idset->num_ids].id = id;
16863 		idset->entries[idset->num_ids].cnt = 1;
16864 		idset->num_ids++;
16865 	}
16866 }
16867 
16868 /* Find id in idset and return its count, or 0 if not found */
16869 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
16870 {
16871 	u32 i;
16872 
16873 	for (i = 0; i < idset->num_ids; i++) {
16874 		if (idset->entries[i].id == id)
16875 			return idset->entries[i].cnt;
16876 	}
16877 	return 0;
16878 }
16879 
16880 /*
16881  * Clear singular scalar ids in a state.
16882  * A register with a non-zero id is called singular if no other register shares
16883  * the same base id. Such registers can be treated as independent (id=0).
16884  */
16885 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
16886 			    struct bpf_verifier_state *st)
16887 {
16888 	struct bpf_idset *idset = &env->idset_scratch;
16889 	struct bpf_func_state *func;
16890 	struct bpf_reg_state *reg;
16891 
16892 	idset->num_ids = 0;
16893 
16894 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16895 		if (reg->type != SCALAR_VALUE)
16896 			continue;
16897 		if (!reg->id)
16898 			continue;
16899 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
16900 	}));
16901 
16902 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16903 		if (reg->type != SCALAR_VALUE)
16904 			continue;
16905 		if (!reg->id)
16906 			continue;
16907 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
16908 			clear_scalar_id(reg);
16909 	}));
16910 }
16911 
16912 /* Return true if it's OK to have the same insn return a different type. */
16913 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16914 {
16915 	switch (base_type(type)) {
16916 	case PTR_TO_CTX:
16917 	case PTR_TO_SOCKET:
16918 	case PTR_TO_SOCK_COMMON:
16919 	case PTR_TO_TCP_SOCK:
16920 	case PTR_TO_XDP_SOCK:
16921 	case PTR_TO_BTF_ID:
16922 	case PTR_TO_ARENA:
16923 		return false;
16924 	default:
16925 		return true;
16926 	}
16927 }
16928 
16929 /* If an instruction was previously used with particular pointer types, then we
16930  * need to be careful to avoid cases such as the below, where it may be ok
16931  * for one branch accessing the pointer, but not ok for the other branch:
16932  *
16933  * R1 = sock_ptr
16934  * goto X;
16935  * ...
16936  * R1 = some_other_valid_ptr;
16937  * goto X;
16938  * ...
16939  * R2 = *(u32 *)(R1 + 0);
16940  */
16941 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16942 {
16943 	return src != prev && (!reg_type_mismatch_ok(src) ||
16944 			       !reg_type_mismatch_ok(prev));
16945 }
16946 
16947 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
16948 {
16949 	switch (base_type(type)) {
16950 	case PTR_TO_MEM:
16951 	case PTR_TO_BTF_ID:
16952 		return true;
16953 	default:
16954 		return false;
16955 	}
16956 }
16957 
16958 static bool is_ptr_to_mem(enum bpf_reg_type type)
16959 {
16960 	return base_type(type) == PTR_TO_MEM;
16961 }
16962 
16963 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
16964 			     bool allow_trust_mismatch)
16965 {
16966 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
16967 	enum bpf_reg_type merged_type;
16968 
16969 	if (*prev_type == NOT_INIT) {
16970 		/* Saw a valid insn
16971 		 * dst_reg = *(u32 *)(src_reg + off)
16972 		 * save type to validate intersecting paths
16973 		 */
16974 		*prev_type = type;
16975 	} else if (reg_type_mismatch(type, *prev_type)) {
16976 		/* Abuser program is trying to use the same insn
16977 		 * dst_reg = *(u32*) (src_reg + off)
16978 		 * with different pointer types:
16979 		 * src_reg == ctx in one branch and
16980 		 * src_reg == stack|map in some other branch.
16981 		 * Reject it.
16982 		 */
16983 		if (allow_trust_mismatch &&
16984 		    is_ptr_to_mem_or_btf_id(type) &&
16985 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
16986 			/*
16987 			 * Have to support a use case when one path through
16988 			 * the program yields TRUSTED pointer while another
16989 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
16990 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
16991 			 * Same behavior of MEM_RDONLY flag.
16992 			 */
16993 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
16994 				merged_type = PTR_TO_MEM;
16995 			else
16996 				merged_type = PTR_TO_BTF_ID;
16997 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
16998 				merged_type |= PTR_UNTRUSTED;
16999 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17000 				merged_type |= MEM_RDONLY;
17001 			*prev_type = merged_type;
17002 		} else {
17003 			verbose(env, "same insn cannot be used with different pointers\n");
17004 			return -EINVAL;
17005 		}
17006 	}
17007 
17008 	return 0;
17009 }
17010 
17011 enum {
17012 	PROCESS_BPF_EXIT = 1,
17013 	INSN_IDX_UPDATED = 2,
17014 };
17015 
17016 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17017 				 bool *do_print_state,
17018 				 bool exception_exit)
17019 {
17020 	struct bpf_func_state *cur_frame = cur_func(env);
17021 
17022 	/* We must do check_reference_leak here before
17023 	 * prepare_func_exit to handle the case when
17024 	 * state->curframe > 0, it may be a callback function,
17025 	 * for which reference_state must match caller reference
17026 	 * state when it exits.
17027 	 */
17028 	int err = check_resource_leak(env, exception_exit,
17029 				      exception_exit || !env->cur_state->curframe,
17030 				      exception_exit ? "bpf_throw" :
17031 				      "BPF_EXIT instruction in main prog");
17032 	if (err)
17033 		return err;
17034 
17035 	/* The side effect of the prepare_func_exit which is
17036 	 * being skipped is that it frees bpf_func_state.
17037 	 * Typically, process_bpf_exit will only be hit with
17038 	 * outermost exit. copy_verifier_state in pop_stack will
17039 	 * handle freeing of any extra bpf_func_state left over
17040 	 * from not processing all nested function exits. We
17041 	 * also skip return code checks as they are not needed
17042 	 * for exceptional exits.
17043 	 */
17044 	if (exception_exit)
17045 		return PROCESS_BPF_EXIT;
17046 
17047 	if (env->cur_state->curframe) {
17048 		/* exit from nested function */
17049 		err = prepare_func_exit(env, &env->insn_idx);
17050 		if (err)
17051 			return err;
17052 		*do_print_state = true;
17053 		return INSN_IDX_UPDATED;
17054 	}
17055 
17056 	/*
17057 	 * Return from a regular global subprogram differs from return
17058 	 * from the main program or async/exception callback.
17059 	 * Main program exit implies return code restrictions
17060 	 * that depend on program type.
17061 	 * Exit from exception callback is equivalent to main program exit.
17062 	 * Exit from async callback implies return code restrictions
17063 	 * that depend on async scheduling mechanism.
17064 	 */
17065 	if (cur_frame->subprogno &&
17066 	    !cur_frame->in_async_callback_fn &&
17067 	    !cur_frame->in_exception_callback_fn)
17068 		err = check_global_subprog_return_code(env);
17069 	else
17070 		err = check_return_code(env, BPF_REG_0, "R0");
17071 	if (err)
17072 		return err;
17073 	return PROCESS_BPF_EXIT;
17074 }
17075 
17076 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17077 				       int regno,
17078 				       struct bpf_map *map,
17079 				       u32 *pmin_index, u32 *pmax_index)
17080 {
17081 	struct bpf_reg_state *reg = reg_state(env, regno);
17082 	u64 min_index = reg_umin(reg);
17083 	u64 max_index = reg_umax(reg);
17084 	const u32 size = 8;
17085 
17086 	if (min_index > (u64) U32_MAX * size) {
17087 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg_umin(reg));
17088 		return -ERANGE;
17089 	}
17090 	if (max_index > (u64) U32_MAX * size) {
17091 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg_umax(reg));
17092 		return -ERANGE;
17093 	}
17094 
17095 	min_index /= size;
17096 	max_index /= size;
17097 
17098 	if (max_index >= map->max_entries) {
17099 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17100 			     regno, min_index, max_index, map->max_entries);
17101 		return -EINVAL;
17102 	}
17103 
17104 	*pmin_index = min_index;
17105 	*pmax_index = max_index;
17106 	return 0;
17107 }
17108 
17109 /* gotox *dst_reg */
17110 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17111 {
17112 	struct bpf_verifier_state *other_branch;
17113 	struct bpf_reg_state *dst_reg;
17114 	struct bpf_map *map;
17115 	u32 min_index, max_index;
17116 	int err = 0;
17117 	int n;
17118 	int i;
17119 
17120 	dst_reg = reg_state(env, insn->dst_reg);
17121 	if (dst_reg->type != PTR_TO_INSN) {
17122 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17123 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17124 		return -EINVAL;
17125 	}
17126 
17127 	map = dst_reg->map_ptr;
17128 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17129 		return -EFAULT;
17130 
17131 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17132 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17133 		return -EFAULT;
17134 
17135 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17136 	if (err)
17137 		return err;
17138 
17139 	/* Ensure that the buffer is large enough */
17140 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17141 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17142 						        max_index - min_index + 1);
17143 		if (!env->gotox_tmp_buf)
17144 			return -ENOMEM;
17145 	}
17146 
17147 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17148 	if (n < 0)
17149 		return n;
17150 	if (n == 0) {
17151 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17152 			     insn->dst_reg, map->id);
17153 		return -EINVAL;
17154 	}
17155 
17156 	for (i = 0; i < n - 1; i++) {
17157 		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
17158 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17159 					  env->insn_idx, env->cur_state->speculative);
17160 		if (IS_ERR(other_branch))
17161 			return PTR_ERR(other_branch);
17162 	}
17163 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17164 	mark_indirect_target(env, env->insn_idx);
17165 	return INSN_IDX_UPDATED;
17166 }
17167 
17168 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17169 {
17170 	int err;
17171 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17172 	u8 class = BPF_CLASS(insn->code);
17173 
17174 	switch (class) {
17175 	case BPF_ALU:
17176 	case BPF_ALU64:
17177 		return check_alu_op(env, insn);
17178 
17179 	case BPF_LDX:
17180 		return check_load_mem(env, insn, false,
17181 				      BPF_MODE(insn->code) == BPF_MEMSX,
17182 				      true, "ldx");
17183 
17184 	case BPF_STX:
17185 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17186 			return check_atomic(env, insn);
17187 		return check_store_reg(env, insn, false);
17188 
17189 	case BPF_ST: {
17190 		/* Handle stack arg write (store immediate) */
17191 		if (is_stack_arg_st(insn)) {
17192 			struct bpf_verifier_state *vstate = env->cur_state;
17193 			struct bpf_func_state *state = vstate->frame[vstate->curframe];
17194 
17195 			return check_stack_arg_write(env, state, insn->off, NULL);
17196 		}
17197 
17198 		enum bpf_reg_type dst_reg_type;
17199 
17200 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17201 		if (err)
17202 			return err;
17203 
17204 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17205 
17206 		err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg),
17207 				       insn->off, BPF_SIZE(insn->code),
17208 				       BPF_WRITE, -1, false, false);
17209 		if (err)
17210 			return err;
17211 
17212 		return save_aux_ptr_type(env, dst_reg_type, false);
17213 	}
17214 	case BPF_JMP:
17215 	case BPF_JMP32: {
17216 		u8 opcode = BPF_OP(insn->code);
17217 
17218 		env->jmps_processed++;
17219 		if (opcode == BPF_CALL) {
17220 			if (env->cur_state->active_locks) {
17221 				if ((insn->src_reg == BPF_REG_0 &&
17222 				     insn->imm != BPF_FUNC_spin_unlock &&
17223 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17224 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17225 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17226 					verbose(env,
17227 						"function calls are not allowed while holding a lock\n");
17228 					return -EINVAL;
17229 				}
17230 			}
17231 			mark_reg_scratched(env, BPF_REG_0);
17232 			if (bpf_in_stack_arg_cnt(&env->subprog_info[cur_func(env)->subprogno]))
17233 				cur_func(env)->no_stack_arg_load = true;
17234 			if (insn->src_reg == BPF_PSEUDO_CALL)
17235 				return check_func_call(env, insn, &env->insn_idx);
17236 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17237 				return check_kfunc_call(env, insn, &env->insn_idx);
17238 			return check_helper_call(env, insn, &env->insn_idx);
17239 		} else if (opcode == BPF_JA) {
17240 			if (BPF_SRC(insn->code) == BPF_X)
17241 				return check_indirect_jump(env, insn);
17242 
17243 			if (class == BPF_JMP)
17244 				env->insn_idx += insn->off + 1;
17245 			else
17246 				env->insn_idx += insn->imm + 1;
17247 			return INSN_IDX_UPDATED;
17248 		} else if (opcode == BPF_EXIT) {
17249 			return process_bpf_exit_full(env, do_print_state, false);
17250 		}
17251 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17252 	}
17253 	case BPF_LD: {
17254 		u8 mode = BPF_MODE(insn->code);
17255 
17256 		if (mode == BPF_ABS || mode == BPF_IND)
17257 			return check_ld_abs(env, insn);
17258 
17259 		if (mode == BPF_IMM) {
17260 			err = check_ld_imm(env, insn);
17261 			if (err)
17262 				return err;
17263 
17264 			env->insn_idx++;
17265 			sanitize_mark_insn_seen(env);
17266 		}
17267 		return 0;
17268 	}
17269 	}
17270 	/* all class values are handled above. silence compiler warning */
17271 	return -EFAULT;
17272 }
17273 
17274 static int do_check(struct bpf_verifier_env *env)
17275 {
17276 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17277 	struct bpf_verifier_state *state = env->cur_state;
17278 	struct bpf_insn *insns = env->prog->insnsi;
17279 	int insn_cnt = env->prog->len;
17280 	bool do_print_state = false;
17281 	int prev_insn_idx = -1;
17282 
17283 	for (;;) {
17284 		struct bpf_insn *insn;
17285 		struct bpf_insn_aux_data *insn_aux;
17286 		int err;
17287 
17288 		/* reset current history entry on each new instruction */
17289 		env->cur_hist_ent = NULL;
17290 
17291 		env->prev_insn_idx = prev_insn_idx;
17292 		if (env->insn_idx >= insn_cnt) {
17293 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17294 				env->insn_idx, insn_cnt);
17295 			return -EFAULT;
17296 		}
17297 
17298 		insn = &insns[env->insn_idx];
17299 		insn_aux = &env->insn_aux_data[env->insn_idx];
17300 
17301 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17302 			verbose(env,
17303 				"BPF program is too large. Processed %d insn\n",
17304 				env->insn_processed);
17305 			return -E2BIG;
17306 		}
17307 
17308 		state->last_insn_idx = env->prev_insn_idx;
17309 		state->insn_idx = env->insn_idx;
17310 
17311 		if (bpf_is_prune_point(env, env->insn_idx)) {
17312 			err = bpf_is_state_visited(env, env->insn_idx);
17313 			if (err < 0)
17314 				return err;
17315 			if (err == 1) {
17316 				/* found equivalent state, can prune the search */
17317 				if (env->log.level & BPF_LOG_LEVEL) {
17318 					if (do_print_state)
17319 						verbose(env, "\nfrom %d to %d%s: safe\n",
17320 							env->prev_insn_idx, env->insn_idx,
17321 							env->cur_state->speculative ?
17322 							" (speculative execution)" : "");
17323 					else
17324 						verbose(env, "%d: safe\n", env->insn_idx);
17325 				}
17326 				goto process_bpf_exit;
17327 			}
17328 		}
17329 
17330 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17331 			err = bpf_push_jmp_history(env, state, 0, 0, 0, 0);
17332 			if (err)
17333 				return err;
17334 		}
17335 
17336 		if (signal_pending(current))
17337 			return -EAGAIN;
17338 
17339 		if (need_resched())
17340 			cond_resched();
17341 
17342 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17343 			verbose(env, "\nfrom %d to %d%s:",
17344 				env->prev_insn_idx, env->insn_idx,
17345 				env->cur_state->speculative ?
17346 				" (speculative execution)" : "");
17347 			print_verifier_state(env, state, state->curframe, true);
17348 			do_print_state = false;
17349 		}
17350 
17351 		if (env->log.level & BPF_LOG_LEVEL) {
17352 			if (verifier_state_scratched(env))
17353 				print_insn_state(env, state, state->curframe);
17354 
17355 			verbose_linfo(env, env->insn_idx, "; ");
17356 			env->prev_log_pos = env->log.end_pos;
17357 			verbose(env, "%d: ", env->insn_idx);
17358 			bpf_verbose_insn(env, insn);
17359 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17360 			env->prev_log_pos = env->log.end_pos;
17361 		}
17362 
17363 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17364 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17365 							   env->prev_insn_idx);
17366 			if (err)
17367 				return err;
17368 		}
17369 
17370 		sanitize_mark_insn_seen(env);
17371 		prev_insn_idx = env->insn_idx;
17372 
17373 		/* Sanity check: precomputed constants must match verifier state */
17374 		if (!state->speculative && insn_aux->const_reg_mask) {
17375 			struct bpf_reg_state *regs = cur_regs(env);
17376 			u16 mask = insn_aux->const_reg_mask;
17377 
17378 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17379 				u32 cval = insn_aux->const_reg_vals[r];
17380 
17381 				if (!(mask & BIT(r)))
17382 					continue;
17383 				if (regs[r].type != SCALAR_VALUE)
17384 					continue;
17385 				if (!tnum_is_const(regs[r].var_off))
17386 					continue;
17387 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17388 						    env, "const R%d: %u != %llu",
17389 						    r, cval, regs[r].var_off.value))
17390 					return -EFAULT;
17391 			}
17392 		}
17393 
17394 		/* Reduce verification complexity by stopping speculative path
17395 		 * verification when a nospec is encountered.
17396 		 */
17397 		if (state->speculative && insn_aux->nospec)
17398 			goto process_bpf_exit;
17399 
17400 		err = do_check_insn(env, &do_print_state);
17401 		if (error_recoverable_with_nospec(err) && state->speculative) {
17402 			/* Prevent this speculative path from ever reaching the
17403 			 * insn that would have been unsafe to execute.
17404 			 */
17405 			insn_aux->nospec = true;
17406 			/* If it was an ADD/SUB insn, potentially remove any
17407 			 * markings for alu sanitization.
17408 			 */
17409 			insn_aux->alu_state = 0;
17410 			goto process_bpf_exit;
17411 		} else if (err < 0) {
17412 			return err;
17413 		} else if (err == PROCESS_BPF_EXIT) {
17414 			goto process_bpf_exit;
17415 		} else if (err == INSN_IDX_UPDATED) {
17416 		} else if (err == 0) {
17417 			env->insn_idx++;
17418 		}
17419 
17420 		if (state->speculative && insn_aux->nospec_result) {
17421 			/* If we are on a path that performed a jump-op, this
17422 			 * may skip a nospec patched-in after the jump. This can
17423 			 * currently never happen because nospec_result is only
17424 			 * used for the write-ops
17425 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17426 			 * calls. These must never skip the following insn
17427 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17428 			 * is false). Still, add a warning to document this in
17429 			 * case nospec_result is used elsewhere in the future.
17430 			 *
17431 			 * All non-branch instructions have a single
17432 			 * fall-through edge. For these, nospec_result should
17433 			 * already work.
17434 			 */
17435 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17436 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17437 					    BPF_OP(insn->code) != BPF_CALL, env,
17438 					    "speculation barrier after jump instruction may not have the desired effect"))
17439 				return -EFAULT;
17440 process_bpf_exit:
17441 			mark_verifier_state_scratched(env);
17442 			err = bpf_update_branch_counts(env, env->cur_state);
17443 			if (err)
17444 				return err;
17445 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17446 					pop_log);
17447 			if (err < 0) {
17448 				if (err != -ENOENT)
17449 					return err;
17450 				break;
17451 			} else {
17452 				do_print_state = true;
17453 				continue;
17454 			}
17455 		}
17456 	}
17457 
17458 	return 0;
17459 }
17460 
17461 static int find_btf_percpu_datasec(struct btf *btf)
17462 {
17463 	const struct btf_type *t;
17464 	const char *tname;
17465 	int i, n;
17466 
17467 	/*
17468 	 * Both vmlinux and module each have their own ".data..percpu"
17469 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17470 	 * types to look at only module's own BTF types.
17471 	 */
17472 	n = btf_nr_types(btf);
17473 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17474 		t = btf_type_by_id(btf, i);
17475 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17476 			continue;
17477 
17478 		tname = btf_name_by_offset(btf, t->name_off);
17479 		if (!strcmp(tname, ".data..percpu"))
17480 			return i;
17481 	}
17482 
17483 	return -ENOENT;
17484 }
17485 
17486 /*
17487  * Add btf to the env->used_btfs array. If needed, refcount the
17488  * corresponding kernel module. To simplify caller's logic
17489  * in case of error or if btf was added before the function
17490  * decreases the btf refcount.
17491  */
17492 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17493 {
17494 	struct btf_mod_pair *btf_mod;
17495 	int ret = 0;
17496 	int i;
17497 
17498 	/* check whether we recorded this BTF (and maybe module) already */
17499 	for (i = 0; i < env->used_btf_cnt; i++)
17500 		if (env->used_btfs[i].btf == btf)
17501 			goto ret_put;
17502 
17503 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17504 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17505 			MAX_USED_BTFS);
17506 		ret = -E2BIG;
17507 		goto ret_put;
17508 	}
17509 
17510 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17511 	btf_mod->btf = btf;
17512 	btf_mod->module = NULL;
17513 
17514 	/* if we reference variables from kernel module, bump its refcount */
17515 	if (btf_is_module(btf)) {
17516 		btf_mod->module = btf_try_get_module(btf);
17517 		if (!btf_mod->module) {
17518 			ret = -ENXIO;
17519 			goto ret_put;
17520 		}
17521 	}
17522 
17523 	env->used_btf_cnt++;
17524 	return 0;
17525 
17526 ret_put:
17527 	/* Either error or this BTF was already added */
17528 	btf_put(btf);
17529 	return ret;
17530 }
17531 
17532 /* replace pseudo btf_id with kernel symbol address */
17533 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17534 				 struct bpf_insn *insn,
17535 				 struct bpf_insn_aux_data *aux,
17536 				 struct btf *btf)
17537 {
17538 	const struct btf_var_secinfo *vsi;
17539 	const struct btf_type *datasec;
17540 	const struct btf_type *t;
17541 	const char *sym_name;
17542 	bool percpu = false;
17543 	u32 type, id = insn->imm;
17544 	s32 datasec_id;
17545 	u64 addr;
17546 	int i;
17547 
17548 	t = btf_type_by_id(btf, id);
17549 	if (!t) {
17550 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17551 		return -ENOENT;
17552 	}
17553 
17554 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17555 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17556 		return -EINVAL;
17557 	}
17558 
17559 	sym_name = btf_name_by_offset(btf, t->name_off);
17560 	addr = kallsyms_lookup_name(sym_name);
17561 	if (!addr) {
17562 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17563 			sym_name);
17564 		return -ENOENT;
17565 	}
17566 	insn[0].imm = (u32)addr;
17567 	insn[1].imm = addr >> 32;
17568 
17569 	if (btf_type_is_func(t)) {
17570 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17571 		aux->btf_var.mem_size = 0;
17572 		return 0;
17573 	}
17574 
17575 	datasec_id = find_btf_percpu_datasec(btf);
17576 	if (datasec_id > 0) {
17577 		datasec = btf_type_by_id(btf, datasec_id);
17578 		for_each_vsi(i, datasec, vsi) {
17579 			if (vsi->type == id) {
17580 				percpu = true;
17581 				break;
17582 			}
17583 		}
17584 	}
17585 
17586 	type = t->type;
17587 	t = btf_type_skip_modifiers(btf, type, NULL);
17588 	if (percpu) {
17589 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17590 		aux->btf_var.btf = btf;
17591 		aux->btf_var.btf_id = type;
17592 	} else if (!btf_type_is_struct(t)) {
17593 		const struct btf_type *ret;
17594 		const char *tname;
17595 		u32 tsize;
17596 
17597 		/* resolve the type size of ksym. */
17598 		ret = btf_resolve_size(btf, t, &tsize);
17599 		if (IS_ERR(ret)) {
17600 			tname = btf_name_by_offset(btf, t->name_off);
17601 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17602 				tname, PTR_ERR(ret));
17603 			return -EINVAL;
17604 		}
17605 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17606 		aux->btf_var.mem_size = tsize;
17607 	} else {
17608 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17609 		aux->btf_var.btf = btf;
17610 		aux->btf_var.btf_id = type;
17611 	}
17612 
17613 	return 0;
17614 }
17615 
17616 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17617 			       struct bpf_insn *insn,
17618 			       struct bpf_insn_aux_data *aux)
17619 {
17620 	struct btf *btf;
17621 	int btf_fd;
17622 	int err;
17623 
17624 	btf_fd = insn[1].imm;
17625 	if (btf_fd) {
17626 		btf = btf_get_by_fd(btf_fd);
17627 		if (IS_ERR(btf)) {
17628 			verbose(env, "invalid module BTF object FD specified.\n");
17629 			return -EINVAL;
17630 		}
17631 	} else {
17632 		if (!btf_vmlinux) {
17633 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17634 			return -EINVAL;
17635 		}
17636 		btf_get(btf_vmlinux);
17637 		btf = btf_vmlinux;
17638 	}
17639 
17640 	err = __check_pseudo_btf_id(env, insn, aux, btf);
17641 	if (err) {
17642 		btf_put(btf);
17643 		return err;
17644 	}
17645 
17646 	return __add_used_btf(env, btf);
17647 }
17648 
17649 static bool is_tracing_prog_type(enum bpf_prog_type type)
17650 {
17651 	switch (type) {
17652 	case BPF_PROG_TYPE_KPROBE:
17653 	case BPF_PROG_TYPE_TRACEPOINT:
17654 	case BPF_PROG_TYPE_PERF_EVENT:
17655 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
17656 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17657 		return true;
17658 	default:
17659 		return false;
17660 	}
17661 }
17662 
17663 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17664 {
17665 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17666 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17667 }
17668 
17669 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17670 					struct bpf_map *map,
17671 					struct bpf_prog *prog)
17672 
17673 {
17674 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17675 
17676 	if (map->excl_prog_sha &&
17677 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
17678 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
17679 		return -EACCES;
17680 	}
17681 
17682 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
17683 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
17684 		if (is_tracing_prog_type(prog_type)) {
17685 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17686 			return -EINVAL;
17687 		}
17688 	}
17689 
17690 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
17691 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17692 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
17693 			return -EINVAL;
17694 		}
17695 
17696 		if (is_tracing_prog_type(prog_type)) {
17697 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
17698 			return -EINVAL;
17699 		}
17700 	}
17701 
17702 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
17703 	    !bpf_offload_prog_map_match(prog, map)) {
17704 		verbose(env, "offload device mismatch between prog and map\n");
17705 		return -EINVAL;
17706 	}
17707 
17708 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17709 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
17710 		return -EINVAL;
17711 	}
17712 
17713 	if (prog->sleepable)
17714 		switch (map->map_type) {
17715 		case BPF_MAP_TYPE_HASH:
17716 		case BPF_MAP_TYPE_RHASH:
17717 		case BPF_MAP_TYPE_LRU_HASH:
17718 		case BPF_MAP_TYPE_ARRAY:
17719 		case BPF_MAP_TYPE_PERCPU_HASH:
17720 		case BPF_MAP_TYPE_PERCPU_ARRAY:
17721 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17722 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17723 		case BPF_MAP_TYPE_HASH_OF_MAPS:
17724 		case BPF_MAP_TYPE_RINGBUF:
17725 		case BPF_MAP_TYPE_USER_RINGBUF:
17726 		case BPF_MAP_TYPE_INODE_STORAGE:
17727 		case BPF_MAP_TYPE_SK_STORAGE:
17728 		case BPF_MAP_TYPE_TASK_STORAGE:
17729 		case BPF_MAP_TYPE_CGRP_STORAGE:
17730 		case BPF_MAP_TYPE_QUEUE:
17731 		case BPF_MAP_TYPE_STACK:
17732 		case BPF_MAP_TYPE_ARENA:
17733 		case BPF_MAP_TYPE_INSN_ARRAY:
17734 		case BPF_MAP_TYPE_PROG_ARRAY:
17735 			break;
17736 		default:
17737 			verbose(env,
17738 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17739 			return -EINVAL;
17740 		}
17741 
17742 	if (bpf_map_is_cgroup_storage(map) &&
17743 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
17744 		verbose(env, "only one cgroup storage of each type is allowed\n");
17745 		return -EBUSY;
17746 	}
17747 
17748 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
17749 		if (env->prog->aux->arena) {
17750 			verbose(env, "Only one arena per program\n");
17751 			return -EBUSY;
17752 		}
17753 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
17754 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
17755 			return -EPERM;
17756 		}
17757 		if (!env->prog->jit_requested) {
17758 			verbose(env, "JIT is required to use arena\n");
17759 			return -EOPNOTSUPP;
17760 		}
17761 		if (!bpf_jit_supports_arena()) {
17762 			verbose(env, "JIT doesn't support arena\n");
17763 			return -EOPNOTSUPP;
17764 		}
17765 		env->prog->aux->arena = (void *)map;
17766 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
17767 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
17768 			return -EINVAL;
17769 		}
17770 	}
17771 
17772 	return 0;
17773 }
17774 
17775 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
17776 {
17777 	int i, err;
17778 
17779 	/* check whether we recorded this map already */
17780 	for (i = 0; i < env->used_map_cnt; i++)
17781 		if (env->used_maps[i] == map)
17782 			return i;
17783 
17784 	if (env->used_map_cnt >= MAX_USED_MAPS) {
17785 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
17786 			MAX_USED_MAPS);
17787 		return -E2BIG;
17788 	}
17789 
17790 	err = check_map_prog_compatibility(env, map, env->prog);
17791 	if (err)
17792 		return err;
17793 
17794 	if (env->prog->sleepable)
17795 		atomic64_inc(&map->sleepable_refcnt);
17796 
17797 	/* hold the map. If the program is rejected by verifier,
17798 	 * the map will be released by release_maps() or it
17799 	 * will be used by the valid program until it's unloaded
17800 	 * and all maps are released in bpf_free_used_maps()
17801 	 */
17802 	bpf_map_inc(map);
17803 
17804 	env->used_maps[env->used_map_cnt++] = map;
17805 
17806 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
17807 		err = bpf_insn_array_init(map, env->prog);
17808 		if (err) {
17809 			verbose(env, "Failed to properly initialize insn array\n");
17810 			return err;
17811 		}
17812 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
17813 	}
17814 
17815 	return env->used_map_cnt - 1;
17816 }
17817 
17818 /* Add map behind fd to used maps list, if it's not already there, and return
17819  * its index.
17820  * Returns <0 on error, or >= 0 index, on success.
17821  */
17822 static int add_used_map(struct bpf_verifier_env *env, int fd)
17823 {
17824 	struct bpf_map *map;
17825 	CLASS(fd, f)(fd);
17826 
17827 	map = __bpf_map_get(f);
17828 	if (IS_ERR(map)) {
17829 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
17830 		return PTR_ERR(map);
17831 	}
17832 
17833 	return __add_used_map(env, map);
17834 }
17835 
17836 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17837 {
17838 	u8 class = BPF_CLASS(insn->code);
17839 	u8 opcode = BPF_OP(insn->code);
17840 
17841 	switch (opcode) {
17842 	case BPF_NEG:
17843 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
17844 		    insn->off != 0 || insn->imm != 0) {
17845 			verbose(env, "BPF_NEG uses reserved fields\n");
17846 			return -EINVAL;
17847 		}
17848 		return 0;
17849 	case BPF_END:
17850 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
17851 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
17852 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
17853 			verbose(env, "BPF_END uses reserved fields\n");
17854 			return -EINVAL;
17855 		}
17856 		return 0;
17857 	case BPF_MOV:
17858 		if (BPF_SRC(insn->code) == BPF_X) {
17859 			if (class == BPF_ALU) {
17860 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
17861 				    insn->imm) {
17862 					verbose(env, "BPF_MOV uses reserved fields\n");
17863 					return -EINVAL;
17864 				}
17865 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
17866 				if (insn->imm != 1 && insn->imm != 1u << 16) {
17867 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
17868 					return -EINVAL;
17869 				}
17870 			} else if ((insn->off != 0 && insn->off != 8 &&
17871 				    insn->off != 16 && insn->off != 32) || insn->imm) {
17872 				verbose(env, "BPF_MOV uses reserved fields\n");
17873 				return -EINVAL;
17874 			}
17875 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
17876 			verbose(env, "BPF_MOV uses reserved fields\n");
17877 			return -EINVAL;
17878 		}
17879 		return 0;
17880 	case BPF_ADD:
17881 	case BPF_SUB:
17882 	case BPF_AND:
17883 	case BPF_OR:
17884 	case BPF_XOR:
17885 	case BPF_LSH:
17886 	case BPF_RSH:
17887 	case BPF_ARSH:
17888 	case BPF_MUL:
17889 	case BPF_DIV:
17890 	case BPF_MOD:
17891 		if (BPF_SRC(insn->code) == BPF_X) {
17892 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
17893 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17894 				verbose(env, "BPF_ALU uses reserved fields\n");
17895 				return -EINVAL;
17896 			}
17897 		} else if (insn->src_reg != BPF_REG_0 ||
17898 			   (insn->off != 0 && insn->off != 1) ||
17899 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17900 			verbose(env, "BPF_ALU uses reserved fields\n");
17901 			return -EINVAL;
17902 		}
17903 		return 0;
17904 	default:
17905 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17906 		return -EINVAL;
17907 	}
17908 }
17909 
17910 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17911 {
17912 	u8 class = BPF_CLASS(insn->code);
17913 	u8 opcode = BPF_OP(insn->code);
17914 
17915 	switch (opcode) {
17916 	case BPF_CALL:
17917 		if (BPF_SRC(insn->code) != BPF_K ||
17918 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
17919 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
17920 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
17921 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
17922 			verbose(env, "BPF_CALL uses reserved fields\n");
17923 			return -EINVAL;
17924 		}
17925 		return 0;
17926 	case BPF_JA:
17927 		if (BPF_SRC(insn->code) == BPF_X) {
17928 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
17929 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
17930 				return -EINVAL;
17931 			}
17932 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17933 			   (class == BPF_JMP && insn->imm != 0) ||
17934 			   (class == BPF_JMP32 && insn->off != 0)) {
17935 			verbose(env, "BPF_JA uses reserved fields\n");
17936 			return -EINVAL;
17937 		}
17938 		return 0;
17939 	case BPF_EXIT:
17940 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
17941 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17942 		    class == BPF_JMP32) {
17943 			verbose(env, "BPF_EXIT uses reserved fields\n");
17944 			return -EINVAL;
17945 		}
17946 		return 0;
17947 	case BPF_JCOND:
17948 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
17949 		    insn->dst_reg || insn->imm) {
17950 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
17951 			return -EINVAL;
17952 		}
17953 		return 0;
17954 	default:
17955 		if (BPF_SRC(insn->code) == BPF_X) {
17956 			if (insn->imm != 0) {
17957 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17958 				return -EINVAL;
17959 			}
17960 		} else if (insn->src_reg != BPF_REG_0) {
17961 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17962 			return -EINVAL;
17963 		}
17964 		return 0;
17965 	}
17966 }
17967 
17968 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17969 {
17970 	switch (BPF_CLASS(insn->code)) {
17971 	case BPF_ALU:
17972 	case BPF_ALU64:
17973 		return check_alu_fields(env, insn);
17974 	case BPF_LDX:
17975 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
17976 		    insn->imm != 0) {
17977 			verbose(env, "BPF_LDX uses reserved fields\n");
17978 			return -EINVAL;
17979 		}
17980 		return 0;
17981 	case BPF_STX:
17982 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17983 			return 0;
17984 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
17985 			verbose(env, "BPF_STX uses reserved fields\n");
17986 			return -EINVAL;
17987 		}
17988 		return 0;
17989 	case BPF_ST:
17990 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
17991 			verbose(env, "BPF_ST uses reserved fields\n");
17992 			return -EINVAL;
17993 		}
17994 		return 0;
17995 	case BPF_JMP:
17996 	case BPF_JMP32:
17997 		return check_jmp_fields(env, insn);
17998 	case BPF_LD: {
17999 		u8 mode = BPF_MODE(insn->code);
18000 
18001 		if (mode == BPF_ABS || mode == BPF_IND) {
18002 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18003 			    BPF_SIZE(insn->code) == BPF_DW ||
18004 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18005 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18006 				return -EINVAL;
18007 			}
18008 		} else if (mode != BPF_IMM) {
18009 			verbose(env, "invalid BPF_LD mode\n");
18010 			return -EINVAL;
18011 		}
18012 		return 0;
18013 	}
18014 	default:
18015 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18016 		return -EINVAL;
18017 	}
18018 }
18019 
18020 /*
18021  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18022  *
18023  * 1. if it accesses map FD, replace it with actual map pointer.
18024  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18025  *
18026  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18027  */
18028 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18029 {
18030 	struct bpf_insn *insn = env->prog->insnsi;
18031 	int insn_cnt = env->prog->len;
18032 	int i, err;
18033 
18034 	err = bpf_prog_calc_tag(env->prog);
18035 	if (err)
18036 		return err;
18037 
18038 	for (i = 0; i < insn_cnt; i++, insn++) {
18039 		if (insn->dst_reg >= MAX_BPF_REG &&
18040 		    !is_stack_arg_st(insn) && !is_stack_arg_stx(insn)) {
18041 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18042 			return -EINVAL;
18043 		}
18044 		if (insn->src_reg >= MAX_BPF_REG && !is_stack_arg_ldx(insn)) {
18045 			verbose(env, "R%d is invalid\n", insn->src_reg);
18046 			return -EINVAL;
18047 		}
18048 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18049 			struct bpf_insn_aux_data *aux;
18050 			struct bpf_map *map;
18051 			int map_idx;
18052 			u64 addr;
18053 			u32 fd;
18054 
18055 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18056 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18057 			    insn[1].off != 0) {
18058 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18059 				return -EINVAL;
18060 			}
18061 
18062 			if (insn[0].off != 0) {
18063 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18064 				return -EINVAL;
18065 			}
18066 
18067 			if (insn[0].src_reg == 0)
18068 				/* valid generic load 64-bit imm */
18069 				goto next_insn;
18070 
18071 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18072 				aux = &env->insn_aux_data[i];
18073 				err = check_pseudo_btf_id(env, insn, aux);
18074 				if (err)
18075 					return err;
18076 				goto next_insn;
18077 			}
18078 
18079 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18080 				aux = &env->insn_aux_data[i];
18081 				aux->ptr_type = PTR_TO_FUNC;
18082 				goto next_insn;
18083 			}
18084 
18085 			/* In final convert_pseudo_ld_imm64() step, this is
18086 			 * converted into regular 64-bit imm load insn.
18087 			 */
18088 			switch (insn[0].src_reg) {
18089 			case BPF_PSEUDO_MAP_VALUE:
18090 			case BPF_PSEUDO_MAP_IDX_VALUE:
18091 				break;
18092 			case BPF_PSEUDO_MAP_FD:
18093 			case BPF_PSEUDO_MAP_IDX:
18094 				if (insn[1].imm == 0)
18095 					break;
18096 				fallthrough;
18097 			default:
18098 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18099 				return -EINVAL;
18100 			}
18101 
18102 			switch (insn[0].src_reg) {
18103 			case BPF_PSEUDO_MAP_IDX_VALUE:
18104 			case BPF_PSEUDO_MAP_IDX:
18105 				if (bpfptr_is_null(env->fd_array)) {
18106 					verbose(env, "fd_idx without fd_array is invalid\n");
18107 					return -EPROTO;
18108 				}
18109 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18110 							    insn[0].imm * sizeof(fd),
18111 							    sizeof(fd)))
18112 					return -EFAULT;
18113 				break;
18114 			default:
18115 				fd = insn[0].imm;
18116 				break;
18117 			}
18118 
18119 			map_idx = add_used_map(env, fd);
18120 			if (map_idx < 0)
18121 				return map_idx;
18122 			map = env->used_maps[map_idx];
18123 
18124 			aux = &env->insn_aux_data[i];
18125 			aux->map_index = map_idx;
18126 
18127 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18128 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18129 				addr = (unsigned long)map;
18130 			} else {
18131 				u32 off = insn[1].imm;
18132 
18133 				if (!map->ops->map_direct_value_addr) {
18134 					verbose(env, "no direct value access support for this map type\n");
18135 					return -EINVAL;
18136 				}
18137 
18138 				err = map->ops->map_direct_value_addr(map, &addr, off);
18139 				if (err) {
18140 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18141 						map->value_size, off);
18142 					return err;
18143 				}
18144 
18145 				aux->map_off = off;
18146 				addr += off;
18147 			}
18148 
18149 			insn[0].imm = (u32)addr;
18150 			insn[1].imm = addr >> 32;
18151 
18152 next_insn:
18153 			insn++;
18154 			i++;
18155 			continue;
18156 		}
18157 
18158 		/* Basic sanity check before we invest more work here. */
18159 		if (!bpf_opcode_in_insntable(insn->code)) {
18160 			verbose(env, "unknown opcode %02x\n", insn->code);
18161 			return -EINVAL;
18162 		}
18163 
18164 		err = check_insn_fields(env, insn);
18165 		if (err)
18166 			return err;
18167 	}
18168 
18169 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18170 	 * 'struct bpf_map *' into a register instead of user map_fd.
18171 	 * These pointers will be used later by verifier to validate map access.
18172 	 */
18173 	return 0;
18174 }
18175 
18176 /* drop refcnt of maps used by the rejected program */
18177 static void release_maps(struct bpf_verifier_env *env)
18178 {
18179 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18180 			     env->used_map_cnt);
18181 }
18182 
18183 /* drop refcnt of maps used by the rejected program */
18184 static void release_btfs(struct bpf_verifier_env *env)
18185 {
18186 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18187 }
18188 
18189 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18190 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18191 {
18192 	struct bpf_insn *insn = env->prog->insnsi;
18193 	int insn_cnt = env->prog->len;
18194 	int i;
18195 
18196 	for (i = 0; i < insn_cnt; i++, insn++) {
18197 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18198 			continue;
18199 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18200 			continue;
18201 		insn->src_reg = 0;
18202 	}
18203 }
18204 
18205 static void release_insn_arrays(struct bpf_verifier_env *env)
18206 {
18207 	int i;
18208 
18209 	for (i = 0; i < env->insn_array_map_cnt; i++)
18210 		bpf_insn_array_release(env->insn_array_maps[i]);
18211 }
18212 
18213 
18214 
18215 /* The verifier does more data flow analysis than llvm and will not
18216  * explore branches that are dead at run time. Malicious programs can
18217  * have dead code too. Therefore replace all dead at-run-time code
18218  * with 'ja -1'.
18219  *
18220  * Just nops are not optimal, e.g. if they would sit at the end of the
18221  * program and through another bug we would manage to jump there, then
18222  * we'd execute beyond program memory otherwise. Returning exception
18223  * code also wouldn't work since we can have subprogs where the dead
18224  * code could be located.
18225  */
18226 static void sanitize_dead_code(struct bpf_verifier_env *env)
18227 {
18228 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18229 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18230 	struct bpf_insn *insn = env->prog->insnsi;
18231 	const int insn_cnt = env->prog->len;
18232 	int i;
18233 
18234 	for (i = 0; i < insn_cnt; i++) {
18235 		if (aux_data[i].seen)
18236 			continue;
18237 		memcpy(insn + i, &trap, sizeof(trap));
18238 		aux_data[i].zext_dst = false;
18239 	}
18240 }
18241 
18242 
18243 
18244 static void free_states(struct bpf_verifier_env *env)
18245 {
18246 	struct bpf_verifier_state_list *sl;
18247 	struct list_head *head, *pos, *tmp;
18248 	struct bpf_scc_info *info;
18249 	int i, j;
18250 
18251 	bpf_free_verifier_state(env->cur_state, true);
18252 	env->cur_state = NULL;
18253 	while (!pop_stack(env, NULL, NULL, false));
18254 
18255 	list_for_each_safe(pos, tmp, &env->free_list) {
18256 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18257 		bpf_free_verifier_state(&sl->state, false);
18258 		kfree(sl);
18259 	}
18260 	INIT_LIST_HEAD(&env->free_list);
18261 
18262 	for (i = 0; i < env->scc_cnt; ++i) {
18263 		info = env->scc_info[i];
18264 		if (!info)
18265 			continue;
18266 		for (j = 0; j < info->num_visits; j++)
18267 			bpf_free_backedges(&info->visits[j]);
18268 		kvfree(info);
18269 		env->scc_info[i] = NULL;
18270 	}
18271 
18272 	if (!env->explored_states)
18273 		return;
18274 
18275 	for (i = 0; i < state_htab_size(env); i++) {
18276 		head = &env->explored_states[i];
18277 
18278 		list_for_each_safe(pos, tmp, head) {
18279 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18280 			bpf_free_verifier_state(&sl->state, false);
18281 			kfree(sl);
18282 		}
18283 		INIT_LIST_HEAD(&env->explored_states[i]);
18284 	}
18285 }
18286 
18287 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18288 {
18289 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18290 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18291 	struct bpf_prog_aux *aux = env->prog->aux;
18292 	struct bpf_verifier_state *state;
18293 	struct bpf_reg_state *regs;
18294 	int ret, i;
18295 
18296 	env->prev_linfo = NULL;
18297 	env->pass_cnt++;
18298 
18299 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18300 	if (!state)
18301 		return -ENOMEM;
18302 	state->curframe = 0;
18303 	state->speculative = false;
18304 	state->branches = 1;
18305 	state->in_sleepable = env->prog->sleepable;
18306 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18307 	if (!state->frame[0]) {
18308 		kfree(state);
18309 		return -ENOMEM;
18310 	}
18311 	env->cur_state = state;
18312 	init_func_state(env, state->frame[0],
18313 			BPF_MAIN_FUNC /* callsite */,
18314 			0 /* frameno */,
18315 			subprog);
18316 	state->first_insn_idx = env->subprog_info[subprog].start;
18317 	state->last_insn_idx = -1;
18318 
18319 	regs = state->frame[state->curframe]->regs;
18320 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18321 		const char *sub_name = subprog_name(env, subprog);
18322 		struct bpf_subprog_arg_info *arg;
18323 		struct bpf_reg_state *reg;
18324 
18325 		if (env->log.level & BPF_LOG_LEVEL)
18326 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18327 		ret = btf_prepare_func_args(env, subprog);
18328 		if (ret)
18329 			goto out;
18330 
18331 		if (subprog_is_exc_cb(env, subprog)) {
18332 			state->frame[0]->in_exception_callback_fn = true;
18333 
18334 			/*
18335 			 * Global functions are scalar or void, make sure
18336 			 * we return a scalar.
18337 			 */
18338 			if (subprog_returns_void(env, subprog)) {
18339 				verbose(env, "exception cb cannot return void\n");
18340 				ret = -EINVAL;
18341 				goto out;
18342 			}
18343 
18344 			/* Also ensure the callback only has a single scalar argument. */
18345 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18346 				verbose(env, "exception cb only supports single integer argument\n");
18347 				ret = -EINVAL;
18348 				goto out;
18349 			}
18350 		}
18351 		for (i = BPF_REG_1; i <= min_t(u32, sub->arg_cnt, MAX_BPF_FUNC_REG_ARGS); i++) {
18352 			arg = &sub->args[i - BPF_REG_1];
18353 			reg = &regs[i];
18354 
18355 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18356 				reg->type = PTR_TO_CTX;
18357 				mark_reg_known_zero(env, regs, i);
18358 			} else if (arg->arg_type == ARG_ANYTHING) {
18359 				reg->type = SCALAR_VALUE;
18360 				mark_reg_unknown(env, regs, i);
18361 			} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
18362 				/* assume unspecial LOCAL dynptr type */
18363 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0);
18364 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18365 				reg->type = PTR_TO_MEM;
18366 				reg->type |= arg->arg_type &
18367 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18368 				mark_reg_known_zero(env, regs, i);
18369 				reg->mem_size = arg->mem_size;
18370 				if (arg->arg_type & PTR_MAYBE_NULL)
18371 					reg->id = ++env->id_gen;
18372 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18373 				reg->type = PTR_TO_BTF_ID;
18374 				if (arg->arg_type & PTR_MAYBE_NULL)
18375 					reg->type |= PTR_MAYBE_NULL;
18376 				if (arg->arg_type & PTR_UNTRUSTED)
18377 					reg->type |= PTR_UNTRUSTED;
18378 				if (arg->arg_type & PTR_TRUSTED)
18379 					reg->type |= PTR_TRUSTED;
18380 				mark_reg_known_zero(env, regs, i);
18381 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18382 				reg->btf_id = arg->btf_id;
18383 				reg->id = ++env->id_gen;
18384 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18385 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18386 				mark_reg_unknown(env, regs, i);
18387 			} else {
18388 				verifier_bug(env, "unhandled arg#%d type %d",
18389 					     i - BPF_REG_1 + 1, arg->arg_type);
18390 				ret = -EFAULT;
18391 				goto out;
18392 			}
18393 		}
18394 		if (env->prog->type == BPF_PROG_TYPE_EXT && sub->arg_cnt > MAX_BPF_FUNC_REG_ARGS) {
18395 			verbose(env, "freplace programs with >%d args not supported yet\n",
18396 				MAX_BPF_FUNC_REG_ARGS);
18397 			ret = -EINVAL;
18398 			goto out;
18399 		}
18400 	} else {
18401 		/* if main BPF program has associated BTF info, validate that
18402 		 * it's matching expected signature, and otherwise mark BTF
18403 		 * info for main program as unreliable
18404 		 */
18405 		if (env->prog->aux->func_info_aux) {
18406 			ret = btf_prepare_func_args(env, 0);
18407 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) {
18408 				env->prog->aux->func_info_aux[0].unreliable = true;
18409 				sub->arg_cnt = 1;
18410 				sub->stack_arg_cnt = 0;
18411 			}
18412 		}
18413 
18414 		/* 1st arg to a function */
18415 		regs[BPF_REG_1].type = PTR_TO_CTX;
18416 		mark_reg_known_zero(env, regs, BPF_REG_1);
18417 	}
18418 
18419 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18420 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18421 		for (i = 0; i < aux->ctx_arg_info_size; i++) {
18422 			ret = aux->ctx_arg_info[i].refcounted ? acquire_reference(env, 0, 0) : 0;
18423 			if (ret < 0)
18424 				goto out;
18425 
18426 			aux->ctx_arg_info[i].ref_id = ret;
18427 		}
18428 	}
18429 
18430 	ret = do_check(env);
18431 out:
18432 	if (!ret && pop_log)
18433 		bpf_vlog_reset(&env->log, 0);
18434 	free_states(env);
18435 	return ret;
18436 }
18437 
18438 /* Lazily verify all global functions based on their BTF, if they are called
18439  * from main BPF program or any of subprograms transitively.
18440  * BPF global subprogs called from dead code are not validated.
18441  * All callable global functions must pass verification.
18442  * Otherwise the whole program is rejected.
18443  * Consider:
18444  * int bar(int);
18445  * int foo(int f)
18446  * {
18447  *    return bar(f);
18448  * }
18449  * int bar(int b)
18450  * {
18451  *    ...
18452  * }
18453  * foo() will be verified first for R1=any_scalar_value. During verification it
18454  * will be assumed that bar() already verified successfully and call to bar()
18455  * from foo() will be checked for type match only. Later bar() will be verified
18456  * independently to check that it's safe for R1=any_scalar_value.
18457  */
18458 static int do_check_subprogs(struct bpf_verifier_env *env)
18459 {
18460 	struct bpf_prog_aux *aux = env->prog->aux;
18461 	struct bpf_func_info_aux *sub_aux;
18462 	int i, ret, new_cnt;
18463 	u32 insn_processed;
18464 
18465 	if (!aux->func_info)
18466 		return 0;
18467 
18468 	/* exception callback is presumed to be always called */
18469 	if (env->exception_callback_subprog)
18470 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18471 
18472 again:
18473 	new_cnt = 0;
18474 	for (i = 1; i < env->subprog_cnt; i++) {
18475 		if (!bpf_subprog_is_global(env, i))
18476 			continue;
18477 
18478 		insn_processed = env->insn_processed;
18479 
18480 		sub_aux = subprog_aux(env, i);
18481 		if (!sub_aux->called || sub_aux->verified)
18482 			continue;
18483 
18484 		env->insn_idx = env->subprog_info[i].start;
18485 		WARN_ON_ONCE(env->insn_idx == 0);
18486 		ret = do_check_common(env, i);
18487 		env->subprog_info[i].insn_processed = env->insn_processed - insn_processed;
18488 		if (ret) {
18489 			return ret;
18490 		} else if (env->log.level & BPF_LOG_LEVEL) {
18491 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18492 				i, subprog_name(env, i));
18493 		}
18494 
18495 		/* We verified new global subprog, it might have called some
18496 		 * more global subprogs that we haven't verified yet, so we
18497 		 * need to do another pass over subprogs to verify those.
18498 		 */
18499 		sub_aux->verified = true;
18500 		new_cnt++;
18501 	}
18502 
18503 	/* We can't loop forever as we verify at least one global subprog on
18504 	 * each pass.
18505 	 */
18506 	if (new_cnt)
18507 		goto again;
18508 
18509 	return 0;
18510 }
18511 
18512 static int do_check_main(struct bpf_verifier_env *env)
18513 {
18514 	u32 insn_processed = env->insn_processed;
18515 	int ret;
18516 
18517 	env->insn_idx = 0;
18518 	ret = do_check_common(env, 0);
18519 	env->subprog_info[0].insn_processed = env->insn_processed - insn_processed;
18520 	if (!ret)
18521 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18522 	return ret;
18523 }
18524 
18525 
18526 static void print_verification_stats(struct bpf_verifier_env *env)
18527 {
18528 	/* Skip over hidden subprogs which are not verified. */
18529 	int i, subprog_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18530 
18531 	if (env->log.level & BPF_LOG_STATS) {
18532 		verbose(env, "verification time %lld usec\n",
18533 			div_u64(env->verification_time, 1000));
18534 		verbose(env, "stack depth %d", env->subprog_info[0].stack_depth);
18535 		for (i = 1; i < subprog_cnt; i++)
18536 			verbose(env, "+%d", env->subprog_info[i].stack_depth);
18537 		verbose(env, " max %d\n", env->max_stack_depth);
18538 		verbose(env, "insns processed %d", env->subprog_info[0].insn_processed);
18539 		for (i = 1; i < subprog_cnt; i++)
18540 			if (bpf_subprog_is_global(env, i))
18541 				verbose(env, "+%d", env->subprog_info[i].insn_processed);
18542 		verbose(env, "\n");
18543 	}
18544 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18545 		"total_states %d peak_states %d mark_read %d\n",
18546 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18547 		env->max_states_per_insn, env->total_states,
18548 		env->peak_states, env->longest_mark_read_walk);
18549 }
18550 
18551 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18552 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18553 {
18554 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18555 	prog->aux->ctx_arg_info_size = cnt;
18556 
18557 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18558 }
18559 
18560 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18561 {
18562 	const struct btf_type *t, *func_proto;
18563 	const struct bpf_struct_ops_desc *st_ops_desc;
18564 	const struct bpf_struct_ops *st_ops;
18565 	const struct btf_member *member;
18566 	struct bpf_prog *prog = env->prog;
18567 	bool has_refcounted_arg = false;
18568 	u32 btf_id, member_idx, member_off;
18569 	struct btf *btf;
18570 	const char *mname;
18571 	int i, err;
18572 
18573 	if (!prog->gpl_compatible) {
18574 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18575 		return -EINVAL;
18576 	}
18577 
18578 	if (!prog->aux->attach_btf_id)
18579 		return -ENOTSUPP;
18580 
18581 	btf = prog->aux->attach_btf;
18582 	if (btf_is_module(btf)) {
18583 		/* Make sure st_ops is valid through the lifetime of env */
18584 		env->attach_btf_mod = btf_try_get_module(btf);
18585 		if (!env->attach_btf_mod) {
18586 			verbose(env, "struct_ops module %s is not found\n",
18587 				btf_get_name(btf));
18588 			return -ENOTSUPP;
18589 		}
18590 	}
18591 
18592 	btf_id = prog->aux->attach_btf_id;
18593 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18594 	if (!st_ops_desc) {
18595 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18596 			btf_id);
18597 		return -ENOTSUPP;
18598 	}
18599 	st_ops = st_ops_desc->st_ops;
18600 
18601 	t = st_ops_desc->type;
18602 	member_idx = prog->expected_attach_type;
18603 	if (member_idx >= btf_type_vlen(t)) {
18604 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18605 			member_idx, st_ops->name);
18606 		return -EINVAL;
18607 	}
18608 
18609 	member = &btf_type_member(t)[member_idx];
18610 	mname = btf_name_by_offset(btf, member->name_off);
18611 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18612 					       NULL);
18613 	if (!func_proto) {
18614 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18615 			mname, member_idx, st_ops->name);
18616 		return -EINVAL;
18617 	}
18618 
18619 	member_off = __btf_member_bit_offset(t, member) / 8;
18620 	err = bpf_struct_ops_supported(st_ops, member_off);
18621 	if (err) {
18622 		verbose(env, "attach to unsupported member %s of struct %s\n",
18623 			mname, st_ops->name);
18624 		return err;
18625 	}
18626 
18627 	if (st_ops->check_member) {
18628 		err = st_ops->check_member(t, member, prog);
18629 
18630 		if (err) {
18631 			verbose(env, "attach to unsupported member %s of struct %s\n",
18632 				mname, st_ops->name);
18633 			return err;
18634 		}
18635 	}
18636 
18637 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
18638 		verbose(env, "Private stack not supported by jit\n");
18639 		return -EACCES;
18640 	}
18641 
18642 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
18643 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
18644 			has_refcounted_arg = true;
18645 			break;
18646 		}
18647 	}
18648 
18649 	/* Tail call is not allowed for programs with refcounted arguments since we
18650 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
18651 	 */
18652 	for (i = 0; i < env->subprog_cnt; i++) {
18653 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
18654 			verbose(env, "program with __ref argument cannot tail call\n");
18655 			return -EINVAL;
18656 		}
18657 	}
18658 
18659 	prog->aux->st_ops = st_ops;
18660 	prog->aux->attach_st_ops_member_off = member_off;
18661 
18662 	prog->aux->attach_func_proto = func_proto;
18663 	prog->aux->attach_func_name = mname;
18664 	env->ops = st_ops->verifier_ops;
18665 
18666 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
18667 					  st_ops_desc->arg_info[member_idx].cnt);
18668 }
18669 #define SECURITY_PREFIX "security_"
18670 
18671 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
18672 
18673 /* list of non-sleepable functions that are otherwise on
18674  * ALLOW_ERROR_INJECTION list
18675  */
18676 BTF_SET_START(btf_non_sleepable_error_inject)
18677 /* Three functions below can be called from sleepable and non-sleepable context.
18678  * Assume non-sleepable from bpf safety point of view.
18679  */
18680 BTF_ID(func, __filemap_add_folio)
18681 #ifdef CONFIG_FAIL_PAGE_ALLOC
18682 BTF_ID(func, should_fail_alloc_page)
18683 #endif
18684 #ifdef CONFIG_FAILSLAB
18685 BTF_ID(func, should_failslab)
18686 #endif
18687 BTF_SET_END(btf_non_sleepable_error_inject)
18688 
18689 static int check_non_sleepable_error_inject(u32 btf_id)
18690 {
18691 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
18692 }
18693 
18694 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18695 {
18696 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
18697 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
18698 	 */
18699 	if (!check_non_sleepable_error_inject(btf_id) &&
18700 	    within_error_injection_list(addr))
18701 		return 0;
18702 
18703 	return -EINVAL;
18704 }
18705 
18706 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18707 {
18708 	if (within_error_injection_list(addr) ||
18709 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18710 		return 0;
18711 
18712 	return -EINVAL;
18713 }
18714 
18715 #else
18716 
18717 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
18718  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
18719  * but that just compares two concrete function names.
18720  */
18721 static bool has_arch_syscall_prefix(const char *func_name)
18722 {
18723 #if defined(__x86_64__)
18724 	return !strncmp(func_name, "__x64_", 6);
18725 #elif defined(__i386__)
18726 	return !strncmp(func_name, "__ia32_", 7);
18727 #elif defined(__s390x__)
18728 	return !strncmp(func_name, "__s390x_", 8);
18729 #elif defined(__aarch64__)
18730 	return !strncmp(func_name, "__arm64_", 8);
18731 #elif defined(__riscv)
18732 	return !strncmp(func_name, "__riscv_", 8);
18733 #elif defined(__powerpc__) || defined(__powerpc64__)
18734 	return !strncmp(func_name, "sys_", 4);
18735 #elif defined(__loongarch__)
18736 	return !strncmp(func_name, "sys_", 4);
18737 #else
18738 	return false;
18739 #endif
18740 }
18741 
18742 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
18743 
18744 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18745 {
18746 	if (has_arch_syscall_prefix(func_name))
18747 		return 0;
18748 
18749 	return -EINVAL;
18750 }
18751 
18752 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18753 {
18754 	if (has_arch_syscall_prefix(func_name) ||
18755 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18756 		return 0;
18757 
18758 	return -EINVAL;
18759 }
18760 
18761 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
18762 
18763 int bpf_check_attach_target(struct bpf_verifier_log *log,
18764 			    const struct bpf_prog *prog,
18765 			    const struct bpf_prog *tgt_prog,
18766 			    u32 btf_id,
18767 			    struct bpf_attach_target_info *tgt_info)
18768 {
18769 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
18770 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
18771 	char trace_symbol[KSYM_SYMBOL_LEN];
18772 	const char prefix[] = "btf_trace_";
18773 	struct bpf_raw_event_map *btp;
18774 	int ret = 0, subprog = -1, i;
18775 	const struct btf_type *t;
18776 	bool conservative = true;
18777 	const char *tname, *fname;
18778 	struct btf *btf;
18779 	long addr = 0;
18780 	struct module *mod = NULL;
18781 
18782 	if (!btf_id) {
18783 		bpf_log(log, "Tracing programs must provide btf_id\n");
18784 		return -EINVAL;
18785 	}
18786 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
18787 	if (!btf) {
18788 		bpf_log(log,
18789 			"Tracing program can only be attached to another program annotated with BTF\n");
18790 		return -EINVAL;
18791 	}
18792 	t = btf_type_by_id(btf, btf_id);
18793 	if (!t) {
18794 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
18795 		return -EINVAL;
18796 	}
18797 	tname = btf_name_by_offset(btf, t->name_off);
18798 	if (!tname) {
18799 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
18800 		return -EINVAL;
18801 	}
18802 	if (tgt_prog) {
18803 		struct bpf_prog_aux *aux = tgt_prog->aux;
18804 		bool tgt_changes_pkt_data;
18805 		bool tgt_might_sleep;
18806 
18807 		if (bpf_prog_is_dev_bound(prog->aux) &&
18808 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
18809 			bpf_log(log, "Target program bound device mismatch");
18810 			return -EINVAL;
18811 		}
18812 
18813 		for (i = 0; i < aux->func_info_cnt; i++)
18814 			if (aux->func_info[i].type_id == btf_id) {
18815 				subprog = i;
18816 				break;
18817 			}
18818 		if (subprog == -1) {
18819 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
18820 			return -EINVAL;
18821 		}
18822 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
18823 			bpf_log(log,
18824 				"%s programs cannot attach to exception callback\n",
18825 				prog_extension ? "Extension" : "Tracing");
18826 			return -EINVAL;
18827 		}
18828 		conservative = aux->func_info_aux[subprog].unreliable;
18829 		if (prog_extension) {
18830 			if (conservative) {
18831 				bpf_log(log,
18832 					"Cannot replace static functions\n");
18833 				return -EINVAL;
18834 			}
18835 			if (!prog->jit_requested) {
18836 				bpf_log(log,
18837 					"Extension programs should be JITed\n");
18838 				return -EINVAL;
18839 			}
18840 			tgt_changes_pkt_data = aux->func
18841 					       ? aux->func[subprog]->aux->changes_pkt_data
18842 					       : aux->changes_pkt_data;
18843 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
18844 				bpf_log(log,
18845 					"Extension program changes packet data, while original does not\n");
18846 				return -EINVAL;
18847 			}
18848 
18849 			tgt_might_sleep = aux->func
18850 					  ? aux->func[subprog]->aux->might_sleep
18851 					  : aux->might_sleep;
18852 			if (prog->aux->might_sleep && !tgt_might_sleep) {
18853 				bpf_log(log,
18854 					"Extension program may sleep, while original does not\n");
18855 				return -EINVAL;
18856 			}
18857 		}
18858 		if (!tgt_prog->jited) {
18859 			bpf_log(log, "Can attach to only JITed progs\n");
18860 			return -EINVAL;
18861 		}
18862 		if (prog_tracing) {
18863 			if (aux->attach_tracing_prog) {
18864 				/*
18865 				 * Target program is an fentry/fexit which is already attached
18866 				 * to another tracing program. More levels of nesting
18867 				 * attachment are not allowed.
18868 				 */
18869 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
18870 				return -EINVAL;
18871 			}
18872 		} else if (tgt_prog->type == prog->type) {
18873 			/*
18874 			 * To avoid potential call chain cycles, prevent attaching of a
18875 			 * program extension to another extension. It's ok to attach
18876 			 * fentry/fexit to extension program.
18877 			 */
18878 			bpf_log(log, "Cannot recursively attach\n");
18879 			return -EINVAL;
18880 		}
18881 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
18882 		    prog_extension &&
18883 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
18884 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
18885 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
18886 			/* Program extensions can extend all program types
18887 			 * except fentry/fexit. The reason is the following.
18888 			 * The fentry/fexit programs are used for performance
18889 			 * analysis, stats and can be attached to any program
18890 			 * type. When extension program is replacing XDP function
18891 			 * it is necessary to allow performance analysis of all
18892 			 * functions. Both original XDP program and its program
18893 			 * extension. Hence attaching fentry/fexit to
18894 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
18895 			 * fentry/fexit was allowed it would be possible to create
18896 			 * long call chain fentry->extension->fentry->extension
18897 			 * beyond reasonable stack size. Hence extending fentry
18898 			 * is not allowed.
18899 			 */
18900 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
18901 			return -EINVAL;
18902 		}
18903 	} else {
18904 		if (prog_extension) {
18905 			bpf_log(log, "Cannot replace kernel functions\n");
18906 			return -EINVAL;
18907 		}
18908 	}
18909 
18910 	switch (prog->expected_attach_type) {
18911 	case BPF_TRACE_RAW_TP:
18912 		if (tgt_prog) {
18913 			bpf_log(log,
18914 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
18915 			return -EINVAL;
18916 		}
18917 		if (!btf_type_is_typedef(t)) {
18918 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
18919 				btf_id);
18920 			return -EINVAL;
18921 		}
18922 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
18923 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
18924 				btf_id, tname);
18925 			return -EINVAL;
18926 		}
18927 		tname += sizeof(prefix) - 1;
18928 
18929 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
18930 		 * names. Thus using bpf_raw_event_map to get argument names.
18931 		 */
18932 		btp = bpf_get_raw_tracepoint(tname);
18933 		if (!btp)
18934 			return -EINVAL;
18935 		if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
18936 			bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n",
18937 				tname);
18938 			bpf_put_raw_tracepoint(btp);
18939 			return -EINVAL;
18940 		}
18941 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
18942 					trace_symbol);
18943 		bpf_put_raw_tracepoint(btp);
18944 
18945 		if (fname)
18946 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
18947 
18948 		if (!fname || ret < 0) {
18949 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
18950 				prefix, tname);
18951 			t = btf_type_by_id(btf, t->type);
18952 			if (!btf_type_is_ptr(t))
18953 				/* should never happen in valid vmlinux build */
18954 				return -EINVAL;
18955 		} else {
18956 			t = btf_type_by_id(btf, ret);
18957 			if (!btf_type_is_func(t))
18958 				/* should never happen in valid vmlinux build */
18959 				return -EINVAL;
18960 		}
18961 
18962 		t = btf_type_by_id(btf, t->type);
18963 		if (!btf_type_is_func_proto(t))
18964 			/* should never happen in valid vmlinux build */
18965 			return -EINVAL;
18966 
18967 		break;
18968 	case BPF_TRACE_ITER:
18969 		if (!btf_type_is_func(t)) {
18970 			bpf_log(log, "attach_btf_id %u is not a function\n",
18971 				btf_id);
18972 			return -EINVAL;
18973 		}
18974 		t = btf_type_by_id(btf, t->type);
18975 		if (!btf_type_is_func_proto(t))
18976 			return -EINVAL;
18977 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
18978 		if (ret)
18979 			return ret;
18980 		break;
18981 	default:
18982 		if (!prog_extension)
18983 			return -EINVAL;
18984 		fallthrough;
18985 	case BPF_MODIFY_RETURN:
18986 	case BPF_LSM_MAC:
18987 	case BPF_LSM_CGROUP:
18988 	case BPF_TRACE_FENTRY:
18989 	case BPF_TRACE_FEXIT:
18990 	case BPF_TRACE_FSESSION:
18991 		if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
18992 		    !bpf_jit_supports_fsession()) {
18993 			bpf_log(log, "JIT does not support fsession\n");
18994 			return -EOPNOTSUPP;
18995 		}
18996 		if (!btf_type_is_func(t)) {
18997 			bpf_log(log, "attach_btf_id %u is not a function\n",
18998 				btf_id);
18999 			return -EINVAL;
19000 		}
19001 		if (prog_extension &&
19002 		    btf_check_type_match(log, prog, btf, t))
19003 			return -EINVAL;
19004 		t = btf_type_by_id(btf, t->type);
19005 		if (!btf_type_is_func_proto(t))
19006 			return -EINVAL;
19007 
19008 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19009 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19010 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19011 			return -EINVAL;
19012 
19013 		if (tgt_prog && conservative)
19014 			t = NULL;
19015 
19016 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19017 		if (ret < 0)
19018 			return ret;
19019 
19020 		if (tgt_prog) {
19021 			if (subprog == 0)
19022 				addr = (long) tgt_prog->bpf_func;
19023 			else
19024 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19025 		} else {
19026 			if (btf_is_module(btf)) {
19027 				mod = btf_try_get_module(btf);
19028 				if (mod)
19029 					addr = find_kallsyms_symbol_value(mod, tname);
19030 				else
19031 					addr = 0;
19032 			} else {
19033 				addr = kallsyms_lookup_name(tname);
19034 			}
19035 			if (!addr) {
19036 				module_put(mod);
19037 				bpf_log(log,
19038 					"The address of function %s cannot be found\n",
19039 					tname);
19040 				return -ENOENT;
19041 			}
19042 		}
19043 
19044 		if (prog->sleepable) {
19045 			ret = -EINVAL;
19046 			switch (prog->type) {
19047 			case BPF_PROG_TYPE_TRACING:
19048 				if (!check_attach_sleepable(btf_id, addr, tname))
19049 					ret = 0;
19050 				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
19051 				 * in the fmodret id set with the KF_SLEEPABLE flag.
19052 				 */
19053 				else {
19054 					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
19055 										prog);
19056 
19057 					if (flags && (*flags & KF_SLEEPABLE))
19058 						ret = 0;
19059 				}
19060 				break;
19061 			case BPF_PROG_TYPE_LSM:
19062 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
19063 				 * Only some of them are sleepable.
19064 				 */
19065 				if (bpf_lsm_is_sleepable_hook(btf_id))
19066 					ret = 0;
19067 				break;
19068 			default:
19069 				break;
19070 			}
19071 			if (ret) {
19072 				module_put(mod);
19073 				bpf_log(log, "%s is not sleepable\n", tname);
19074 				return ret;
19075 			}
19076 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19077 			if (tgt_prog) {
19078 				module_put(mod);
19079 				bpf_log(log, "can't modify return codes of BPF programs\n");
19080 				return -EINVAL;
19081 			}
19082 			ret = -EINVAL;
19083 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19084 			    !check_attach_modify_return(addr, tname))
19085 				ret = 0;
19086 			if (ret) {
19087 				module_put(mod);
19088 				bpf_log(log, "%s() is not modifiable\n", tname);
19089 				return ret;
19090 			}
19091 		}
19092 
19093 		break;
19094 	}
19095 	tgt_info->tgt_addr = addr;
19096 	tgt_info->tgt_name = tname;
19097 	tgt_info->tgt_type = t;
19098 	tgt_info->tgt_mod = mod;
19099 	return 0;
19100 }
19101 
19102 BTF_SET_START(btf_id_deny)
19103 BTF_ID_UNUSED
19104 #ifdef CONFIG_SMP
19105 BTF_ID(func, ___migrate_enable)
19106 BTF_ID(func, migrate_disable)
19107 BTF_ID(func, migrate_enable)
19108 #endif
19109 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19110 BTF_ID(func, rcu_read_unlock_strict)
19111 #endif
19112 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19113 BTF_ID(func, preempt_count_add)
19114 BTF_ID(func, preempt_count_sub)
19115 #endif
19116 #ifdef CONFIG_PREEMPT_RCU
19117 BTF_ID(func, __rcu_read_lock)
19118 BTF_ID(func, __rcu_read_unlock)
19119 #endif
19120 BTF_SET_END(btf_id_deny)
19121 
19122 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19123  * Currently, we must manually list all __noreturn functions here. Once a more
19124  * robust solution is implemented, this workaround can be removed.
19125  */
19126 BTF_SET_START(noreturn_deny)
19127 #ifdef CONFIG_IA32_EMULATION
19128 BTF_ID(func, __ia32_sys_exit)
19129 BTF_ID(func, __ia32_sys_exit_group)
19130 #endif
19131 #ifdef CONFIG_KUNIT
19132 BTF_ID(func, __kunit_abort)
19133 BTF_ID(func, kunit_try_catch_throw)
19134 #endif
19135 #ifdef CONFIG_MODULES
19136 BTF_ID(func, __module_put_and_kthread_exit)
19137 #endif
19138 #ifdef CONFIG_X86_64
19139 BTF_ID(func, __x64_sys_exit)
19140 BTF_ID(func, __x64_sys_exit_group)
19141 #endif
19142 BTF_ID(func, do_exit)
19143 BTF_ID(func, do_group_exit)
19144 BTF_ID(func, kthread_complete_and_exit)
19145 BTF_ID(func, make_task_dead)
19146 BTF_SET_END(noreturn_deny)
19147 
19148 static bool can_be_sleepable(struct bpf_prog *prog)
19149 {
19150 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19151 		switch (prog->expected_attach_type) {
19152 		case BPF_TRACE_FENTRY:
19153 		case BPF_TRACE_FEXIT:
19154 		case BPF_MODIFY_RETURN:
19155 		case BPF_TRACE_ITER:
19156 		case BPF_TRACE_FSESSION:
19157 		case BPF_TRACE_RAW_TP:
19158 			return true;
19159 		default:
19160 			return false;
19161 		}
19162 	}
19163 	return prog->type == BPF_PROG_TYPE_LSM ||
19164 	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19165 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
19166 	       prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT ||
19167 	       prog->type == BPF_PROG_TYPE_TRACEPOINT;
19168 }
19169 
19170 static int check_attach_btf_id(struct bpf_verifier_env *env)
19171 {
19172 	struct bpf_prog *prog = env->prog;
19173 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19174 	struct bpf_attach_target_info tgt_info = {};
19175 	u32 btf_id = prog->aux->attach_btf_id;
19176 	struct bpf_trampoline *tr;
19177 	int ret;
19178 	u64 key;
19179 
19180 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19181 		if (prog->sleepable)
19182 			/* attach_btf_id checked to be zero already */
19183 			return 0;
19184 		verbose(env, "Syscall programs can only be sleepable\n");
19185 		return -EINVAL;
19186 	}
19187 
19188 	if (prog->sleepable && !can_be_sleepable(prog)) {
19189 		verbose(env, "Program of this type cannot be sleepable\n");
19190 		return -EINVAL;
19191 	}
19192 
19193 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19194 		return check_struct_ops_btf_id(env);
19195 
19196 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19197 	    prog->type != BPF_PROG_TYPE_LSM &&
19198 	    prog->type != BPF_PROG_TYPE_EXT)
19199 		return 0;
19200 
19201 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19202 	if (ret)
19203 		return ret;
19204 
19205 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19206 		/* to make freplace equivalent to their targets, they need to
19207 		 * inherit env->ops and expected_attach_type for the rest of the
19208 		 * verification
19209 		 */
19210 		env->ops = bpf_verifier_ops[tgt_prog->type];
19211 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19212 	}
19213 
19214 	/* store info about the attachment target that will be used later */
19215 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19216 	prog->aux->attach_func_name = tgt_info.tgt_name;
19217 	prog->aux->mod = tgt_info.tgt_mod;
19218 
19219 	if (tgt_prog) {
19220 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19221 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19222 	}
19223 
19224 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19225 		prog->aux->attach_btf_trace = true;
19226 		return 0;
19227 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19228 		return bpf_iter_prog_supported(prog);
19229 	}
19230 
19231 	if (prog->type == BPF_PROG_TYPE_LSM) {
19232 		ret = bpf_lsm_verify_prog(&env->log, prog);
19233 		if (ret < 0)
19234 			return ret;
19235 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19236 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19237 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19238 			tgt_info.tgt_name);
19239 		return -EINVAL;
19240 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19241 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19242 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19243 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19244 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19245 			tgt_info.tgt_name);
19246 		return -EINVAL;
19247 	}
19248 
19249 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19250 	tr = bpf_trampoline_get(key, &tgt_info);
19251 	if (!tr)
19252 		return -ENOMEM;
19253 
19254 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19255 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19256 
19257 	prog->aux->dst_trampoline = tr;
19258 	return 0;
19259 }
19260 
19261 struct btf *bpf_get_btf_vmlinux(void)
19262 {
19263 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19264 		mutex_lock(&bpf_verifier_lock);
19265 		if (!btf_vmlinux)
19266 			btf_vmlinux = btf_parse_vmlinux();
19267 		mutex_unlock(&bpf_verifier_lock);
19268 	}
19269 	return btf_vmlinux;
19270 }
19271 
19272 /*
19273  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19274  * this case expect that every file descriptor in the array is either a map or
19275  * a BTF. Everything else is considered to be trash.
19276  */
19277 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19278 {
19279 	struct bpf_map *map;
19280 	struct btf *btf;
19281 	CLASS(fd, f)(fd);
19282 	int err;
19283 
19284 	map = __bpf_map_get(f);
19285 	if (!IS_ERR(map)) {
19286 		err = __add_used_map(env, map);
19287 		if (err < 0)
19288 			return err;
19289 		return 0;
19290 	}
19291 
19292 	btf = __btf_get_by_fd(f);
19293 	if (!IS_ERR(btf)) {
19294 		btf_get(btf);
19295 		return __add_used_btf(env, btf);
19296 	}
19297 
19298 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19299 	return PTR_ERR(map);
19300 }
19301 
19302 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19303 {
19304 	size_t size = sizeof(int);
19305 	int ret;
19306 	int fd;
19307 	u32 i;
19308 
19309 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19310 
19311 	/*
19312 	 * The only difference between old (no fd_array_cnt is given) and new
19313 	 * APIs is that in the latter case the fd_array is expected to be
19314 	 * continuous and is scanned for map fds right away
19315 	 */
19316 	if (!attr->fd_array_cnt)
19317 		return 0;
19318 
19319 	/* Check for integer overflow */
19320 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19321 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19322 		return -EINVAL;
19323 	}
19324 
19325 	for (i = 0; i < attr->fd_array_cnt; i++) {
19326 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19327 			return -EFAULT;
19328 
19329 		ret = add_fd_from_fd_array(env, fd);
19330 		if (ret)
19331 			return ret;
19332 	}
19333 
19334 	return 0;
19335 }
19336 
19337 /* replace a generic kfunc with a specialized version if necessary */
19338 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19339 {
19340 	struct bpf_prog *prog = env->prog;
19341 	bool seen_direct_write;
19342 	void *xdp_kfunc;
19343 	bool is_rdonly;
19344 	u32 func_id = desc->func_id;
19345 	u16 offset = desc->offset;
19346 	unsigned long addr = desc->addr;
19347 
19348 	if (offset) /* return if module BTF is used */
19349 		return 0;
19350 
19351 	if (bpf_dev_bound_kfunc_id(func_id)) {
19352 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19353 		if (xdp_kfunc)
19354 			addr = (unsigned long)xdp_kfunc;
19355 		/* fallback to default kfunc when not supported by netdev */
19356 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19357 		seen_direct_write = env->seen_direct_write;
19358 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19359 
19360 		if (is_rdonly)
19361 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19362 
19363 		/* restore env->seen_direct_write to its original value, since
19364 		 * may_access_direct_pkt_data mutates it
19365 		 */
19366 		env->seen_direct_write = seen_direct_write;
19367 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19368 		if (bpf_lsm_has_d_inode_locked(prog))
19369 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19370 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19371 		if (bpf_lsm_has_d_inode_locked(prog))
19372 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19373 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19374 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19375 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19376 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19377 		if (env->insn_aux_data[insn_idx].non_sleepable)
19378 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19379 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19380 		if (env->insn_aux_data[insn_idx].non_sleepable)
19381 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19382 	}
19383 	desc->addr = addr;
19384 	return 0;
19385 }
19386 
19387 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19388 					    u16 struct_meta_reg,
19389 					    u16 node_offset_reg,
19390 					    struct bpf_insn *insn,
19391 					    struct bpf_insn *insn_buf,
19392 					    int *cnt)
19393 {
19394 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19395 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19396 
19397 	insn_buf[0] = addr[0];
19398 	insn_buf[1] = addr[1];
19399 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19400 	insn_buf[3] = *insn;
19401 	*cnt = 4;
19402 }
19403 
19404 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19405 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19406 {
19407 	struct bpf_kfunc_desc *desc;
19408 	int err;
19409 
19410 	if (!insn->imm) {
19411 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19412 		return -EINVAL;
19413 	}
19414 
19415 	*cnt = 0;
19416 
19417 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19418 	 * __bpf_call_base, unless the JIT needs to call functions that are
19419 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19420 	 */
19421 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19422 	if (!desc) {
19423 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19424 			     insn->imm);
19425 		return -EFAULT;
19426 	}
19427 
19428 	err = specialize_kfunc(env, desc, insn_idx);
19429 	if (err)
19430 		return err;
19431 
19432 	if (!bpf_jit_supports_far_kfunc_call())
19433 		insn->imm = BPF_CALL_IMM(desc->addr);
19434 
19435 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19436 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19437 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19438 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19439 
19440 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19441 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19442 				     insn_idx);
19443 			return -EFAULT;
19444 		}
19445 
19446 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19447 		insn_buf[1] = addr[0];
19448 		insn_buf[2] = addr[1];
19449 		insn_buf[3] = *insn;
19450 		*cnt = 4;
19451 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19452 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19453 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19454 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19455 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19456 
19457 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19458 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19459 				     insn_idx);
19460 			return -EFAULT;
19461 		}
19462 
19463 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19464 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19465 				     insn_idx);
19466 			return -EFAULT;
19467 		}
19468 
19469 		insn_buf[0] = addr[0];
19470 		insn_buf[1] = addr[1];
19471 		insn_buf[2] = *insn;
19472 		*cnt = 3;
19473 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19474 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19475 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19476 		int struct_meta_reg = BPF_REG_3;
19477 		int node_offset_reg = BPF_REG_4;
19478 
19479 		/* list_add/rbtree_add have an extra arg (prev/less),
19480 		 * so args-to-fixup are in diff regs.
19481 		 */
19482 		if (desc->func_id == special_kfunc_list[KF_bpf_list_add] ||
19483 		    is_bpf_rbtree_add_kfunc(desc->func_id)) {
19484 			struct_meta_reg = BPF_REG_4;
19485 			node_offset_reg = BPF_REG_5;
19486 		}
19487 
19488 		if (!kptr_struct_meta) {
19489 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19490 				     insn_idx);
19491 			return -EFAULT;
19492 		}
19493 
19494 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19495 						node_offset_reg, insn, insn_buf, cnt);
19496 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19497 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19498 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19499 		*cnt = 1;
19500 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19501 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19502 		/*
19503 		 * inline the bpf_session_is_return() for fsession:
19504 		 *   bool bpf_session_is_return(void *ctx)
19505 		 *   {
19506 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19507 		 *   }
19508 		 */
19509 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19510 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19511 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19512 		*cnt = 3;
19513 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19514 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19515 		/*
19516 		 * inline bpf_session_cookie() for fsession:
19517 		 *   __u64 *bpf_session_cookie(void *ctx)
19518 		 *   {
19519 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19520 		 *       return &((u64 *)ctx)[-off];
19521 		 *   }
19522 		 */
19523 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19524 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19525 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19526 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19527 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19528 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19529 		*cnt = 6;
19530 	}
19531 
19532 	if (env->insn_aux_data[insn_idx].arg_prog) {
19533 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19534 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19535 		int idx = *cnt;
19536 
19537 		insn_buf[idx++] = ld_addrs[0];
19538 		insn_buf[idx++] = ld_addrs[1];
19539 		insn_buf[idx++] = *insn;
19540 		*cnt = idx;
19541 	}
19542 	return 0;
19543 }
19544 
19545 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
19546 	      struct bpf_log_attr *attr_log)
19547 {
19548 	u64 start_time = ktime_get_ns();
19549 	struct bpf_verifier_env *env;
19550 	int i, len, ret = -EINVAL, err;
19551 	bool is_priv;
19552 
19553 	BTF_TYPE_EMIT(enum bpf_features);
19554 
19555 	/* no program is valid */
19556 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19557 		return -EINVAL;
19558 
19559 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19560 	 * allocate/free it every time bpf_check() is called
19561 	 */
19562 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19563 	if (!env)
19564 		return -ENOMEM;
19565 
19566 	env->bt.env = env;
19567 
19568 	len = (*prog)->len;
19569 	env->insn_aux_data =
19570 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19571 	ret = -ENOMEM;
19572 	if (!env->insn_aux_data)
19573 		goto err_free_env;
19574 	for (i = 0; i < len; i++)
19575 		env->insn_aux_data[i].orig_idx = i;
19576 	env->succ = bpf_iarray_realloc(NULL, 2);
19577 	if (!env->succ)
19578 		goto err_free_env;
19579 	env->prog = *prog;
19580 	env->ops = bpf_verifier_ops[env->prog->type];
19581 
19582 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19583 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19584 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19585 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19586 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19587 
19588 	bpf_get_btf_vmlinux();
19589 
19590 	/* grab the mutex to protect few globals used by verifier */
19591 	if (!is_priv)
19592 		mutex_lock(&bpf_verifier_lock);
19593 
19594 	/* user could have requested verbose verifier output
19595 	 * and supplied buffer to store the verification trace
19596 	 */
19597 	ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size);
19598 	if (ret)
19599 		goto err_unlock;
19600 
19601 	ret = process_fd_array(env, attr, uattr);
19602 	if (ret)
19603 		goto skip_full_check;
19604 
19605 	mark_verifier_state_clean(env);
19606 
19607 	if (IS_ERR(btf_vmlinux)) {
19608 		/* Either gcc or pahole or kernel are broken. */
19609 		verbose(env, "in-kernel BTF is malformed\n");
19610 		ret = PTR_ERR(btf_vmlinux);
19611 		goto skip_full_check;
19612 	}
19613 
19614 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19615 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19616 		env->strict_alignment = true;
19617 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19618 		env->strict_alignment = false;
19619 
19620 	if (is_priv)
19621 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19622 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19623 
19624 	env->explored_states = kvzalloc_objs(struct list_head,
19625 					     state_htab_size(env),
19626 					     GFP_KERNEL_ACCOUNT);
19627 	ret = -ENOMEM;
19628 	if (!env->explored_states)
19629 		goto skip_full_check;
19630 
19631 	for (i = 0; i < state_htab_size(env); i++)
19632 		INIT_LIST_HEAD(&env->explored_states[i]);
19633 	INIT_LIST_HEAD(&env->free_list);
19634 
19635 	ret = bpf_check_btf_info_early(env, attr, uattr);
19636 	if (ret < 0)
19637 		goto skip_full_check;
19638 
19639 	ret = add_subprog_and_kfunc(env);
19640 	if (ret < 0)
19641 		goto skip_full_check;
19642 
19643 	ret = check_subprogs(env);
19644 	if (ret < 0)
19645 		goto skip_full_check;
19646 
19647 	ret = bpf_check_btf_info(env, attr, uattr);
19648 	if (ret < 0)
19649 		goto skip_full_check;
19650 
19651 	ret = check_and_resolve_insns(env);
19652 	if (ret < 0)
19653 		goto skip_full_check;
19654 
19655 	if (bpf_prog_is_offloaded(env->prog->aux)) {
19656 		ret = bpf_prog_offload_verifier_prep(env->prog);
19657 		if (ret)
19658 			goto skip_full_check;
19659 	}
19660 
19661 	ret = bpf_check_cfg(env);
19662 	if (ret < 0)
19663 		goto skip_full_check;
19664 
19665 	ret = bpf_compute_postorder(env);
19666 	if (ret < 0)
19667 		goto skip_full_check;
19668 
19669 	ret = bpf_stack_liveness_init(env);
19670 	if (ret)
19671 		goto skip_full_check;
19672 
19673 	ret = check_attach_btf_id(env);
19674 	if (ret)
19675 		goto skip_full_check;
19676 
19677 	ret = bpf_compute_const_regs(env);
19678 	if (ret < 0)
19679 		goto skip_full_check;
19680 
19681 	ret = bpf_prune_dead_branches(env);
19682 	if (ret < 0)
19683 		goto skip_full_check;
19684 
19685 	ret = sort_subprogs_topo(env);
19686 	if (ret < 0)
19687 		goto skip_full_check;
19688 
19689 	ret = bpf_compute_scc(env);
19690 	if (ret < 0)
19691 		goto skip_full_check;
19692 
19693 	ret = bpf_compute_live_registers(env);
19694 	if (ret < 0)
19695 		goto skip_full_check;
19696 
19697 	ret = mark_fastcall_patterns(env);
19698 	if (ret < 0)
19699 		goto skip_full_check;
19700 
19701 	ret = do_check_main(env);
19702 	ret = ret ?: do_check_subprogs(env);
19703 
19704 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
19705 		ret = bpf_prog_offload_finalize(env);
19706 
19707 skip_full_check:
19708 	kvfree(env->explored_states);
19709 
19710 	/* might decrease stack depth, keep it before passes that
19711 	 * allocate additional slots.
19712 	 */
19713 	if (ret == 0)
19714 		ret = bpf_remove_fastcall_spills_fills(env);
19715 
19716 	if (ret == 0)
19717 		ret = check_max_stack_depth(env);
19718 
19719 	/* instruction rewrites happen after this point */
19720 	if (ret == 0)
19721 		ret = bpf_optimize_bpf_loop(env);
19722 
19723 	if (is_priv) {
19724 		if (ret == 0)
19725 			bpf_opt_hard_wire_dead_code_branches(env);
19726 		if (ret == 0)
19727 			ret = bpf_opt_remove_dead_code(env);
19728 		if (ret == 0)
19729 			ret = bpf_opt_remove_nops(env);
19730 	} else {
19731 		if (ret == 0)
19732 			sanitize_dead_code(env);
19733 	}
19734 
19735 	if (ret == 0)
19736 		/* program is valid, convert *(u32*)(ctx + off) accesses */
19737 		ret = bpf_convert_ctx_accesses(env);
19738 
19739 	if (ret == 0)
19740 		ret = bpf_do_misc_fixups(env);
19741 
19742 	/* do 32-bit optimization after insn patching has done so those patched
19743 	 * insns could be handled correctly.
19744 	 */
19745 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
19746 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
19747 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
19748 								     : false;
19749 	}
19750 
19751 	if (ret == 0)
19752 		ret = bpf_fixup_call_args(env);
19753 
19754 	env->verification_time = ktime_get_ns() - start_time;
19755 	print_verification_stats(env);
19756 	env->prog->aux->verified_insns = env->insn_processed;
19757 
19758 	/* preserve original error even if log finalization is successful */
19759 	err = bpf_log_attr_finalize(attr_log, &env->log);
19760 	if (err)
19761 		ret = err;
19762 
19763 	if (ret)
19764 		goto err_release_maps;
19765 
19766 	if (env->used_map_cnt) {
19767 		/* if program passed verifier, update used_maps in bpf_prog_info */
19768 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
19769 							 env->used_map_cnt,
19770 							 GFP_KERNEL_ACCOUNT);
19771 
19772 		if (!env->prog->aux->used_maps) {
19773 			ret = -ENOMEM;
19774 			goto err_release_maps;
19775 		}
19776 
19777 		memcpy(env->prog->aux->used_maps, env->used_maps,
19778 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
19779 		env->prog->aux->used_map_cnt = env->used_map_cnt;
19780 	}
19781 	if (env->used_btf_cnt) {
19782 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
19783 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
19784 							 env->used_btf_cnt,
19785 							 GFP_KERNEL_ACCOUNT);
19786 		if (!env->prog->aux->used_btfs) {
19787 			ret = -ENOMEM;
19788 			goto err_release_maps;
19789 		}
19790 
19791 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
19792 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
19793 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
19794 	}
19795 	if (env->used_map_cnt || env->used_btf_cnt) {
19796 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
19797 		 * bpf_ld_imm64 instructions
19798 		 */
19799 		convert_pseudo_ld_imm64(env);
19800 	}
19801 
19802 	adjust_btf_func(env);
19803 
19804 	/* extension progs temporarily inherit the attach_type of their targets
19805 	   for verification purposes, so set it back to zero before returning
19806 	 */
19807 	if (env->prog->type == BPF_PROG_TYPE_EXT)
19808 		env->prog->expected_attach_type = 0;
19809 
19810 	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
19811 
19812 err_release_maps:
19813 	if (ret)
19814 		release_insn_arrays(env);
19815 	if (!env->prog->aux->used_maps)
19816 		/* if we didn't copy map pointers into bpf_prog_info, release
19817 		 * them now. Otherwise free_used_maps() will release them.
19818 		 */
19819 		release_maps(env);
19820 	if (!env->prog->aux->used_btfs)
19821 		release_btfs(env);
19822 
19823 	*prog = env->prog;
19824 
19825 	module_put(env->attach_btf_mod);
19826 err_unlock:
19827 	if (!is_priv)
19828 		mutex_unlock(&bpf_verifier_lock);
19829 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
19830 	vfree(env->insn_aux_data);
19831 err_free_env:
19832 	bpf_stack_liveness_free(env);
19833 	kvfree(env->cfg.insn_postorder);
19834 	kvfree(env->scc_info);
19835 	kvfree(env->succ);
19836 	kvfree(env->gotox_tmp_buf);
19837 	kvfree(env);
19838 	return ret;
19839 }
19840