xref: /linux/kernel/bpf/verifier.c (revision 68f4e480b089abae26fbab0c38c3df3cbac3d79d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/cnum.h>
30 #include <linux/bpf_mem_alloc.h>
31 #include <net/xdp.h>
32 #include <linux/trace_events.h>
33 #include <linux/kallsyms.h>
34 
35 #include "disasm.h"
36 
37 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
38 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
39 	[_id] = & _name ## _verifier_ops,
40 #define BPF_MAP_TYPE(_id, _ops)
41 #define BPF_LINK_TYPE(_id, _name)
42 #include <linux/bpf_types.h>
43 #undef BPF_PROG_TYPE
44 #undef BPF_MAP_TYPE
45 #undef BPF_LINK_TYPE
46 };
47 
48 enum bpf_features {
49 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
50 	BPF_FEAT_STREAMS	     = 1,
51 	__MAX_BPF_FEAT,
52 };
53 
54 struct bpf_mem_alloc bpf_global_percpu_ma;
55 static bool bpf_global_percpu_ma_set;
56 
57 /* bpf_check() is a static code analyzer that walks eBPF program
58  * instruction by instruction and updates register/stack state.
59  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
60  *
61  * The first pass is depth-first-search to check that the program is a DAG.
62  * It rejects the following programs:
63  * - larger than BPF_MAXINSNS insns
64  * - if loop is present (detected via back-edge)
65  * - unreachable insns exist (shouldn't be a forest. program = one function)
66  * - out of bounds or malformed jumps
67  * The second pass is all possible path descent from the 1st insn.
68  * Since it's analyzing all paths through the program, the length of the
69  * analysis is limited to 64k insn, which may be hit even if total number of
70  * insn is less then 4K, but there are too many branches that change stack/regs.
71  * Number of 'branches to be analyzed' is limited to 1k
72  *
73  * On entry to each instruction, each register has a type, and the instruction
74  * changes the types of the registers depending on instruction semantics.
75  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
76  * copied to R1.
77  *
78  * All registers are 64-bit.
79  * R0 - return register
80  * R1-R5 argument passing registers
81  * R6-R9 callee saved registers
82  * R10 - frame pointer read-only
83  *
84  * At the start of BPF program the register R1 contains a pointer to bpf_context
85  * and has type PTR_TO_CTX.
86  *
87  * Verifier tracks arithmetic operations on pointers in case:
88  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
89  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
90  * 1st insn copies R10 (which has FRAME_PTR) type into R1
91  * and 2nd arithmetic instruction is pattern matched to recognize
92  * that it wants to construct a pointer to some element within stack.
93  * So after 2nd insn, the register R1 has type PTR_TO_STACK
94  * (and -20 constant is saved for further stack bounds checking).
95  * Meaning that this reg is a pointer to stack plus known immediate constant.
96  *
97  * Most of the time the registers have SCALAR_VALUE type, which
98  * means the register has some value, but it's not a valid pointer.
99  * (like pointer plus pointer becomes SCALAR_VALUE type)
100  *
101  * When verifier sees load or store instructions the type of base register
102  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
103  * four pointer types recognized by check_mem_access() function.
104  *
105  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
106  * and the range of [ptr, ptr + map's value_size) is accessible.
107  *
108  * registers used to pass values to function calls are checked against
109  * function argument constraints.
110  *
111  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
112  * It means that the register type passed to this function must be
113  * PTR_TO_STACK and it will be used inside the function as
114  * 'pointer to map element key'
115  *
116  * For example the argument constraints for bpf_map_lookup_elem():
117  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
118  *   .arg1_type = ARG_CONST_MAP_PTR,
119  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
120  *
121  * ret_type says that this function returns 'pointer to map elem value or null'
122  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
123  * 2nd argument should be a pointer to stack, which will be used inside
124  * the helper function as a pointer to map element key.
125  *
126  * On the kernel side the helper function looks like:
127  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
128  * {
129  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
130  *    void *key = (void *) (unsigned long) r2;
131  *    void *value;
132  *
133  *    here kernel can access 'key' and 'map' pointers safely, knowing that
134  *    [key, key + map->key_size) bytes are valid and were initialized on
135  *    the stack of eBPF program.
136  * }
137  *
138  * Corresponding eBPF program may look like:
139  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
140  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
141  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
142  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
143  * here verifier looks at prototype of map_lookup_elem() and sees:
144  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
145  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
146  *
147  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
148  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
149  * and were initialized prior to this call.
150  * If it's ok, then verifier allows this BPF_CALL insn and looks at
151  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
152  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
153  * returns either pointer to map value or NULL.
154  *
155  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
156  * insn, the register holding that pointer in the true branch changes state to
157  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
158  * branch. See check_cond_jmp_op().
159  *
160  * After the call R0 is set to return type of the function and registers R1-R5
161  * are set to NOT_INIT to indicate that they are no longer readable.
162  *
163  * The following reference types represent a potential reference to a kernel
164  * resource which, after first being allocated, must be checked and freed by
165  * the BPF program:
166  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
167  *
168  * When the verifier sees a helper call return a reference type, it allocates a
169  * pointer id for the reference and stores it in the current function state.
170  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
171  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
172  * passes through a NULL-check conditional. For the branch wherein the state is
173  * changed to CONST_IMM, the verifier releases the reference.
174  *
175  * For each helper function that allocates a reference, such as
176  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
177  * bpf_sk_release(). When a reference type passes into the release function,
178  * the verifier also releases the reference. If any unchecked or unreleased
179  * reference remains at the end of the program, the verifier rejects it.
180  */
181 
182 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
183 struct bpf_verifier_stack_elem {
184 	/* verifier state is 'st'
185 	 * before processing instruction 'insn_idx'
186 	 * and after processing instruction 'prev_insn_idx'
187 	 */
188 	struct bpf_verifier_state st;
189 	int insn_idx;
190 	int prev_insn_idx;
191 	struct bpf_verifier_stack_elem *next;
192 	/* length of verifier log at the time this state was pushed on stack */
193 	u32 log_pos;
194 };
195 
196 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
197 #define BPF_COMPLEXITY_LIMIT_STATES	64
198 
199 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
200 
201 #define BPF_PRIV_STACK_MIN_SIZE		64
202 
203 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id);
204 static int release_reference_nomark(struct bpf_verifier_state *state, int id);
205 static int release_reference(struct bpf_verifier_env *env, int id);
206 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
207 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
208 static int ref_set_non_owning(struct bpf_verifier_env *env,
209 			      struct bpf_reg_state *reg);
210 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg);
211 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
212 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
213 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
214 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
215 
216 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
217 			      struct bpf_map *map,
218 			      bool unpriv, bool poison)
219 {
220 	unpriv |= bpf_map_ptr_unpriv(aux);
221 	aux->map_ptr_state.unpriv = unpriv;
222 	aux->map_ptr_state.poison = poison;
223 	aux->map_ptr_state.map_ptr = map;
224 }
225 
226 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
227 {
228 	bool poisoned = bpf_map_key_poisoned(aux);
229 
230 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
231 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
232 }
233 
234 static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg)
235 {
236 	ref_obj->id = reg->id;
237 	ref_obj->parent_id = reg->parent_id;
238 	ref_obj->cnt++;
239 }
240 
241 static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj)
242 {
243 	if (ref_obj->cnt > 1) {
244 		verifier_bug(env, "function expects only one referenced object but got %d\n",
245 			     ref_obj->cnt);
246 		return -EFAULT;
247 	}
248 
249 	return 0;
250 }
251 
252 struct bpf_call_arg_meta {
253 	struct bpf_map_desc map;
254 	struct bpf_dynptr_desc dynptr;
255 	struct ref_obj_desc ref_obj;
256 	bool raw_mode;
257 	bool pkt_access;
258 	u8 release_regno;
259 	int regno;
260 	int access_size;
261 	int mem_size;
262 	u64 msize_max_value;
263 	int func_id;
264 	struct btf *btf;
265 	u32 btf_id;
266 	struct btf *ret_btf;
267 	u32 ret_btf_id;
268 	u32 subprogno;
269 	struct btf_field *kptr_field;
270 	s64 const_map_key;
271 };
272 
273 struct bpf_kfunc_meta {
274 	struct btf *btf;
275 	const struct btf_type *proto;
276 	const char *name;
277 	const u32 *flags;
278 	s32 id;
279 };
280 
281 struct btf *btf_vmlinux;
282 
283 typedef struct argno {
284 	int argno;
285 } argno_t;
286 
287 static argno_t argno_from_reg(u32 regno)
288 {
289 	return (argno_t){ .argno = regno };
290 }
291 
292 static argno_t argno_from_arg(u32 arg)
293 {
294 	return (argno_t){ .argno = -arg };
295 }
296 
297 static int reg_from_argno(argno_t a)
298 {
299 	if (a.argno >= 0)
300 		return a.argno;
301 	if (a.argno >= -MAX_BPF_FUNC_REG_ARGS)
302 		return -a.argno;
303 	return -1;
304 }
305 
306 static int arg_from_argno(argno_t a)
307 {
308 	if (a.argno < 0)
309 		return -a.argno;
310 	return -1;
311 }
312 
313 static int arg_idx_from_argno(argno_t a)
314 {
315 	return arg_from_argno(a) - 1;
316 }
317 
318 static const char *btf_type_name(const struct btf *btf, u32 id)
319 {
320 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
321 }
322 
323 static DEFINE_MUTEX(bpf_verifier_lock);
324 static DEFINE_MUTEX(bpf_percpu_ma_lock);
325 
326 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
327 {
328 	struct bpf_verifier_env *env = private_data;
329 	va_list args;
330 
331 	if (!bpf_verifier_log_needed(&env->log))
332 		return;
333 
334 	va_start(args, fmt);
335 	bpf_verifier_vlog(&env->log, fmt, args);
336 	va_end(args);
337 }
338 
339 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
340 				   struct bpf_reg_state *reg,
341 				   struct bpf_retval_range range, const char *ctx,
342 				   const char *reg_name)
343 {
344 	bool unknown = true;
345 
346 	verbose(env, "%s the register %s has", ctx, reg_name);
347 	if (reg_smin(reg) > S64_MIN) {
348 		verbose(env, " smin=%lld", reg_smin(reg));
349 		unknown = false;
350 	}
351 	if (reg_smax(reg) < S64_MAX) {
352 		verbose(env, " smax=%lld", reg_smax(reg));
353 		unknown = false;
354 	}
355 	if (unknown)
356 		verbose(env, " unknown scalar value");
357 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
358 }
359 
360 static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
361 {
362 	enum bpf_reg_type type;
363 
364 	type = reg->type;
365 	if (type_may_be_null(type))
366 		return false;
367 
368 	type = base_type(type);
369 	return type == PTR_TO_SOCKET ||
370 		type == PTR_TO_TCP_SOCK ||
371 		type == PTR_TO_MAP_VALUE ||
372 		type == PTR_TO_MAP_KEY ||
373 		type == PTR_TO_SOCK_COMMON ||
374 		(type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) ||
375 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
376 		type == CONST_PTR_TO_MAP;
377 }
378 
379 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
380 {
381 	struct btf_record *rec = NULL;
382 	struct btf_struct_meta *meta;
383 
384 	if (reg->type == PTR_TO_MAP_VALUE) {
385 		rec = reg->map_ptr->record;
386 	} else if (type_is_ptr_alloc_obj(reg->type)) {
387 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
388 		if (meta)
389 			rec = meta->record;
390 	}
391 	return rec;
392 }
393 
394 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
395 {
396 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
397 
398 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
399 }
400 
401 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
402 {
403 	const struct btf_type *type, *func, *func_proto;
404 	const struct btf *btf = env->prog->aux->btf;
405 	u32 btf_id;
406 
407 	btf_id = env->prog->aux->func_info[subprog].type_id;
408 
409 	func = btf_type_by_id(btf, btf_id);
410 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
411 		return false;
412 
413 	func_proto = btf_type_by_id(btf, func->type);
414 	if (!func_proto)
415 		return false;
416 
417 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
418 	if (!type)
419 		return false;
420 
421 	return btf_type_is_void(type);
422 }
423 
424 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
425 {
426 	struct bpf_func_info *info;
427 
428 	if (!env->prog->aux->func_info)
429 		return "";
430 
431 	info = &env->prog->aux->func_info[subprog];
432 	return btf_type_name(env->prog->aux->btf, info->type_id);
433 }
434 
435 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
436 {
437 	struct bpf_subprog_info *info = subprog_info(env, subprog);
438 
439 	info->is_cb = true;
440 	info->is_async_cb = true;
441 	info->is_exception_cb = true;
442 }
443 
444 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
445 {
446 	return subprog_info(env, subprog)->is_exception_cb;
447 }
448 
449 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
450 {
451 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
452 }
453 
454 static bool type_is_rdonly_mem(u32 type)
455 {
456 	return type & MEM_RDONLY;
457 }
458 
459 static bool is_acquire_function(enum bpf_func_id func_id,
460 				const struct bpf_map *map)
461 {
462 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
463 
464 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
465 	    func_id == BPF_FUNC_sk_lookup_udp ||
466 	    func_id == BPF_FUNC_skc_lookup_tcp ||
467 	    func_id == BPF_FUNC_ringbuf_reserve ||
468 	    func_id == BPF_FUNC_kptr_xchg)
469 		return true;
470 
471 	if (func_id == BPF_FUNC_map_lookup_elem &&
472 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
473 	     map_type == BPF_MAP_TYPE_SOCKHASH))
474 		return true;
475 
476 	return false;
477 }
478 
479 static bool is_ptr_cast_function(enum bpf_func_id func_id)
480 {
481 	return func_id == BPF_FUNC_tcp_sock ||
482 		func_id == BPF_FUNC_sk_fullsock ||
483 		func_id == BPF_FUNC_skc_to_tcp_sock ||
484 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
485 		func_id == BPF_FUNC_skc_to_udp6_sock ||
486 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
487 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
488 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
489 }
490 
491 static bool is_sync_callback_calling_kfunc(u32 btf_id);
492 static bool is_async_callback_calling_kfunc(u32 btf_id);
493 static bool is_callback_calling_kfunc(u32 btf_id);
494 
495 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
496 static bool is_task_work_add_kfunc(u32 func_id);
497 
498 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
499 {
500 	return func_id == BPF_FUNC_for_each_map_elem ||
501 	       func_id == BPF_FUNC_find_vma ||
502 	       func_id == BPF_FUNC_loop ||
503 	       func_id == BPF_FUNC_user_ringbuf_drain;
504 }
505 
506 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
507 {
508 	return func_id == BPF_FUNC_timer_set_callback;
509 }
510 
511 static bool is_callback_calling_function(enum bpf_func_id func_id)
512 {
513 	return is_sync_callback_calling_function(func_id) ||
514 	       is_async_callback_calling_function(func_id);
515 }
516 
517 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
518 {
519 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
520 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
521 }
522 
523 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
524 {
525 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
526 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
527 }
528 
529 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
530 {
531 	/* bpf_timer callbacks are never sleepable. */
532 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
533 		return false;
534 
535 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
536 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
537 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
538 		return true;
539 
540 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
541 	return false;
542 }
543 
544 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
545 {
546 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
547 }
548 
549 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
550 {
551        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
552 
553        /* We need to check that slots between [spi - nr_slots + 1, spi] are
554 	* within [0, allocated_stack).
555 	*
556 	* Please note that the spi grows downwards. For example, a dynptr
557 	* takes the size of two stack slots; the first slot will be at
558 	* spi and the second slot will be at spi - 1.
559 	*/
560        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
561 }
562 
563 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
564 			          const char *obj_kind, int nr_slots)
565 {
566 	int off, spi;
567 
568 	if (!tnum_is_const(reg->var_off)) {
569 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
570 		return -EINVAL;
571 	}
572 
573 	off = reg->var_off.value;
574 	if (off % BPF_REG_SIZE) {
575 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
576 		return -EINVAL;
577 	}
578 
579 	spi = bpf_get_spi(off);
580 	if (spi + 1 < nr_slots) {
581 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
582 		return -EINVAL;
583 	}
584 
585 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
586 		return -ERANGE;
587 	return spi;
588 }
589 
590 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
591 {
592 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
593 }
594 
595 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
596 {
597 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
598 }
599 
600 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
601 {
602 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
603 }
604 
605 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
606 {
607 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
608 	case DYNPTR_TYPE_LOCAL:
609 		return BPF_DYNPTR_TYPE_LOCAL;
610 	case DYNPTR_TYPE_RINGBUF:
611 		return BPF_DYNPTR_TYPE_RINGBUF;
612 	case DYNPTR_TYPE_SKB:
613 		return BPF_DYNPTR_TYPE_SKB;
614 	case DYNPTR_TYPE_XDP:
615 		return BPF_DYNPTR_TYPE_XDP;
616 	case DYNPTR_TYPE_SKB_META:
617 		return BPF_DYNPTR_TYPE_SKB_META;
618 	case DYNPTR_TYPE_FILE:
619 		return BPF_DYNPTR_TYPE_FILE;
620 	default:
621 		return BPF_DYNPTR_TYPE_INVALID;
622 	}
623 }
624 
625 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
626 {
627 	switch (type) {
628 	case BPF_DYNPTR_TYPE_LOCAL:
629 		return DYNPTR_TYPE_LOCAL;
630 	case BPF_DYNPTR_TYPE_RINGBUF:
631 		return DYNPTR_TYPE_RINGBUF;
632 	case BPF_DYNPTR_TYPE_SKB:
633 		return DYNPTR_TYPE_SKB;
634 	case BPF_DYNPTR_TYPE_XDP:
635 		return DYNPTR_TYPE_XDP;
636 	case BPF_DYNPTR_TYPE_SKB_META:
637 		return DYNPTR_TYPE_SKB_META;
638 	case BPF_DYNPTR_TYPE_FILE:
639 		return DYNPTR_TYPE_FILE;
640 	default:
641 		return 0;
642 	}
643 }
644 
645 static bool dynptr_type_referenced(enum bpf_dynptr_type type)
646 {
647 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
648 }
649 
650 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
651 			      enum bpf_dynptr_type type,
652 			      bool first_slot, int id, int parent_id);
653 
654 
655 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
656 				   struct bpf_reg_state *sreg1,
657 				   struct bpf_reg_state *sreg2,
658 				   enum bpf_dynptr_type type, int parent_id)
659 {
660 	int id = ++env->id_gen;
661 
662 	__mark_dynptr_reg(sreg1, type, true, id, parent_id);
663 	__mark_dynptr_reg(sreg2, type, false, id, parent_id);
664 }
665 
666 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
667 			       struct bpf_reg_state *reg,
668 			       enum bpf_dynptr_type type)
669 {
670 	__mark_dynptr_reg(reg, type, true, ++env->id_gen, 0);
671 }
672 
673 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
674 				        struct bpf_func_state *state, int spi);
675 
676 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
677 				   enum bpf_arg_type arg_type, int insn_idx,
678 				   struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
679 {
680 	struct bpf_func_state *state = bpf_func(env, reg);
681 	int spi, i, err, parent_id = 0;
682 	enum bpf_dynptr_type type;
683 
684 	spi = dynptr_get_spi(env, reg);
685 	if (spi < 0)
686 		return spi;
687 
688 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
689 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
690 	 * to ensure that for the following example:
691 	 *	[d1][d1][d2][d2]
692 	 * spi    3   2   1   0
693 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
694 	 * case they do belong to same dynptr, second call won't see slot_type
695 	 * as STACK_DYNPTR and will simply skip destruction.
696 	 */
697 	err = destroy_if_dynptr_stack_slot(env, state, spi);
698 	if (err)
699 		return err;
700 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
701 	if (err)
702 		return err;
703 
704 	for (i = 0; i < BPF_REG_SIZE; i++) {
705 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
706 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
707 	}
708 
709 	type = arg_to_dynptr_type(arg_type);
710 	if (type == BPF_DYNPTR_TYPE_INVALID)
711 		return -EINVAL;
712 
713 	if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */
714 		err = validate_ref_obj(env, ref_obj);
715 		if (err)
716 			return err;
717 
718 		/* Track parent's id if the parent is a referenced object */
719 		parent_id = ref_obj->id;
720 
721 		if (dynptr_type_referenced(type)) {
722 			int id;
723 
724 			/*
725 			 * Create an intermediate reference that tracks the referenced
726 			 * object for the referenced dynptr. Freeing a referenced dynptr
727 			 * through helpers/kfuncs will invalidate all clones.
728 			 */
729 			id = acquire_reference(env, insn_idx, parent_id);
730 			if (id < 0)
731 				return id;
732 
733 			parent_id = id;
734 		}
735 	} else { /* bpf_dynptr_clone() */
736 		parent_id = dynptr->parent_id;
737 	}
738 
739 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
740 			       &state->stack[spi - 1].spilled_ptr, type, parent_id);
741 
742 	return 0;
743 }
744 
745 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack)
746 {
747 	int i;
748 
749 	for (i = 0; i < BPF_REG_SIZE; i++) {
750 		stack[0].slot_type[i] = STACK_INVALID;
751 		stack[1].slot_type[i] = STACK_INVALID;
752 	}
753 
754 	bpf_mark_reg_not_init(env, &stack[0].spilled_ptr);
755 	bpf_mark_reg_not_init(env, &stack[1].spilled_ptr);
756 }
757 
758 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
759 {
760 	struct bpf_func_state *state = bpf_func(env, reg);
761 	int spi;
762 
763 	spi = dynptr_get_spi(env, reg);
764 	if (spi < 0)
765 		return spi;
766 
767 	/*
768 	 * For referenced dynptr, release the parent ref which cascades to
769 	 * all clones and derived slices. For non-referenced dynptr, only
770 	 * the dynptr and slices derived from it will be invalidated.
771 	 */
772 	reg = &state->stack[spi].spilled_ptr;
773 	return release_reference(env, dynptr_type_referenced(reg->dynptr.type)
774 				      ? reg->parent_id
775 				      : reg->id);
776 }
777 
778 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
779 			       struct bpf_reg_state *reg);
780 
781 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
782 {
783 	if (!env->allow_ptr_leaks)
784 		bpf_mark_reg_not_init(env, reg);
785 	else
786 		__mark_reg_unknown(env, reg);
787 }
788 
789 static int dynptr_ref_cnt(struct bpf_verifier_env *env, int v_parent_id)
790 {
791 	struct bpf_stack_state *stack;
792 	struct bpf_func_state *state;
793 	struct bpf_reg_state *reg;
794 	int ref_cnt = 0;
795 
796 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, 1 << STACK_DYNPTR, ({
797 		if (!stack || stack->slot_type[0] != STACK_DYNPTR)
798 			continue;
799 		if (!stack->spilled_ptr.dynptr.first_slot)
800 			continue;
801 		if (stack->spilled_ptr.parent_id == v_parent_id)
802 			ref_cnt++;
803 	}));
804 
805 	return ref_cnt;
806 }
807 
808 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
809 				        struct bpf_func_state *state, int spi)
810 {
811 	int err = 0;
812 
813 	/* We always ensure that STACK_DYNPTR is never set partially,
814 	 * hence just checking for slot_type[0] is enough. This is
815 	 * different for STACK_SPILL, where it may be only set for
816 	 * 1 byte, so code has to use is_spilled_reg.
817 	 */
818 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
819 		return 0;
820 
821 	/* Reposition spi to first slot */
822 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
823 		spi = spi + 1;
824 
825 	/*
826 	 * A referenced dynptr can be overwritten only if there is at
827 	 * least one other dynptr sharing the same virtual ref parent,
828 	 * ensuring the reference can still be properly released.
829 	 */
830 	if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type) &&
831 	    dynptr_ref_cnt(env, state->stack[spi].spilled_ptr.parent_id) <= 1) {
832 		verbose(env, "cannot overwrite referenced dynptr\n");
833 		return -EINVAL;
834 	}
835 
836 	/* Invalidate the dynptr and any derived slices */
837 	err = release_reference(env, state->stack[spi].spilled_ptr.id);
838 	if (!err) {
839 		mark_stack_slot_scratched(env, spi);
840 		mark_stack_slot_scratched(env, spi - 1);
841 	}
842 
843 	return err;
844 }
845 
846 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
847 {
848 	int spi;
849 
850 	if (reg->type == CONST_PTR_TO_DYNPTR)
851 		return false;
852 
853 	spi = dynptr_get_spi(env, reg);
854 
855 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
856 	 * error because this just means the stack state hasn't been updated yet.
857 	 * We will do check_mem_access to check and update stack bounds later.
858 	 */
859 	if (spi < 0 && spi != -ERANGE)
860 		return false;
861 
862 	/* We don't need to check if the stack slots are marked by previous
863 	 * dynptr initializations because we allow overwriting existing unreferenced
864 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
865 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
866 	 * touching are completely destructed before we reinitialize them for a new
867 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
868 	 * instead of delaying it until the end where the user will get "Unreleased
869 	 * reference" error.
870 	 */
871 	return true;
872 }
873 
874 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
875 {
876 	struct bpf_func_state *state = bpf_func(env, reg);
877 	int i, spi;
878 
879 	/* This already represents first slot of initialized bpf_dynptr.
880 	 *
881 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
882 	 * check_func_arg_reg_off's logic, so we don't need to check its
883 	 * offset and alignment.
884 	 */
885 	if (reg->type == CONST_PTR_TO_DYNPTR)
886 		return true;
887 
888 	spi = dynptr_get_spi(env, reg);
889 	if (spi < 0)
890 		return false;
891 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
892 		return false;
893 
894 	for (i = 0; i < BPF_REG_SIZE; i++) {
895 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
896 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
897 			return false;
898 	}
899 
900 	return true;
901 }
902 
903 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
904 				    enum bpf_arg_type arg_type)
905 {
906 	struct bpf_func_state *state = bpf_func(env, reg);
907 	enum bpf_dynptr_type dynptr_type;
908 	int spi;
909 
910 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
911 	if (arg_type == ARG_PTR_TO_DYNPTR)
912 		return true;
913 
914 	dynptr_type = arg_to_dynptr_type(arg_type);
915 	if (reg->type == CONST_PTR_TO_DYNPTR) {
916 		return reg->dynptr.type == dynptr_type;
917 	} else {
918 		spi = dynptr_get_spi(env, reg);
919 		if (spi < 0)
920 			return false;
921 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
922 	}
923 }
924 
925 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
926 
927 static bool in_rcu_cs(struct bpf_verifier_env *env);
928 
929 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
930 
931 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
932 				 struct bpf_kfunc_call_arg_meta *meta,
933 				 struct bpf_reg_state *reg, int insn_idx,
934 				 struct btf *btf, u32 btf_id, int nr_slots)
935 {
936 	struct bpf_func_state *state = bpf_func(env, reg);
937 	int spi, i, j, id;
938 
939 	spi = iter_get_spi(env, reg, nr_slots);
940 	if (spi < 0)
941 		return spi;
942 
943 	id = acquire_reference(env, insn_idx, 0);
944 	if (id < 0)
945 		return id;
946 
947 	for (i = 0; i < nr_slots; i++) {
948 		struct bpf_stack_state *slot = &state->stack[spi - i];
949 		struct bpf_reg_state *st = &slot->spilled_ptr;
950 
951 		__mark_reg_known_zero(st);
952 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
953 		if (is_kfunc_rcu_protected(meta)) {
954 			if (in_rcu_cs(env))
955 				st->type |= MEM_RCU;
956 			else
957 				st->type |= PTR_UNTRUSTED;
958 		}
959 		st->id = i == 0 ? id : 0;
960 		st->iter.btf = btf;
961 		st->iter.btf_id = btf_id;
962 		st->iter.state = BPF_ITER_STATE_ACTIVE;
963 		st->iter.depth = 0;
964 
965 		for (j = 0; j < BPF_REG_SIZE; j++)
966 			slot->slot_type[j] = STACK_ITER;
967 
968 		mark_stack_slot_scratched(env, spi - i);
969 	}
970 
971 	return 0;
972 }
973 
974 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
975 				   struct bpf_reg_state *reg, int nr_slots)
976 {
977 	struct bpf_func_state *state = bpf_func(env, reg);
978 	int spi, i, j;
979 
980 	spi = iter_get_spi(env, reg, nr_slots);
981 	if (spi < 0)
982 		return spi;
983 
984 	for (i = 0; i < nr_slots; i++) {
985 		struct bpf_stack_state *slot = &state->stack[spi - i];
986 		struct bpf_reg_state *st = &slot->spilled_ptr;
987 
988 		if (i == 0)
989 			WARN_ON_ONCE(release_reference(env, st->id));
990 
991 		bpf_mark_reg_not_init(env, st);
992 
993 		for (j = 0; j < BPF_REG_SIZE; j++)
994 			slot->slot_type[j] = STACK_INVALID;
995 
996 		mark_stack_slot_scratched(env, spi - i);
997 	}
998 
999 	return 0;
1000 }
1001 
1002 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1003 				     struct bpf_reg_state *reg, int nr_slots)
1004 {
1005 	struct bpf_func_state *state = bpf_func(env, reg);
1006 	int spi, i, j;
1007 
1008 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1009 	 * will do check_mem_access to check and update stack bounds later, so
1010 	 * return true for that case.
1011 	 */
1012 	spi = iter_get_spi(env, reg, nr_slots);
1013 	if (spi == -ERANGE)
1014 		return true;
1015 	if (spi < 0)
1016 		return false;
1017 
1018 	for (i = 0; i < nr_slots; i++) {
1019 		struct bpf_stack_state *slot = &state->stack[spi - i];
1020 
1021 		for (j = 0; j < BPF_REG_SIZE; j++)
1022 			if (slot->slot_type[j] == STACK_ITER)
1023 				return false;
1024 	}
1025 
1026 	return true;
1027 }
1028 
1029 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1030 				   struct btf *btf, u32 btf_id, int nr_slots)
1031 {
1032 	struct bpf_func_state *state = bpf_func(env, reg);
1033 	int spi, i, j;
1034 
1035 	spi = iter_get_spi(env, reg, nr_slots);
1036 	if (spi < 0)
1037 		return -EINVAL;
1038 
1039 	for (i = 0; i < nr_slots; i++) {
1040 		struct bpf_stack_state *slot = &state->stack[spi - i];
1041 		struct bpf_reg_state *st = &slot->spilled_ptr;
1042 
1043 		if (st->type & PTR_UNTRUSTED)
1044 			return -EPROTO;
1045 		/* only main (first) slot has id set */
1046 		if (i == 0 && !st->id)
1047 			return -EINVAL;
1048 		if (i != 0 && st->id)
1049 			return -EINVAL;
1050 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1051 			return -EINVAL;
1052 
1053 		for (j = 0; j < BPF_REG_SIZE; j++)
1054 			if (slot->slot_type[j] != STACK_ITER)
1055 				return -EINVAL;
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1062 static int release_irq_state(struct bpf_verifier_state *state, int id);
1063 
1064 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1065 				     struct bpf_kfunc_call_arg_meta *meta,
1066 				     struct bpf_reg_state *reg, int insn_idx,
1067 				     int kfunc_class)
1068 {
1069 	struct bpf_func_state *state = bpf_func(env, reg);
1070 	struct bpf_stack_state *slot;
1071 	struct bpf_reg_state *st;
1072 	int spi, i, id;
1073 
1074 	spi = irq_flag_get_spi(env, reg);
1075 	if (spi < 0)
1076 		return spi;
1077 
1078 	id = acquire_irq_state(env, insn_idx);
1079 	if (id < 0)
1080 		return id;
1081 
1082 	slot = &state->stack[spi];
1083 	st = &slot->spilled_ptr;
1084 
1085 	__mark_reg_known_zero(st);
1086 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1087 	st->id = id;
1088 	st->irq.kfunc_class = kfunc_class;
1089 
1090 	for (i = 0; i < BPF_REG_SIZE; i++)
1091 		slot->slot_type[i] = STACK_IRQ_FLAG;
1092 
1093 	mark_stack_slot_scratched(env, spi);
1094 	return 0;
1095 }
1096 
1097 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1098 				      int kfunc_class)
1099 {
1100 	struct bpf_func_state *state = bpf_func(env, reg);
1101 	struct bpf_stack_state *slot;
1102 	struct bpf_reg_state *st;
1103 	int spi, i, err;
1104 
1105 	spi = irq_flag_get_spi(env, reg);
1106 	if (spi < 0)
1107 		return spi;
1108 
1109 	slot = &state->stack[spi];
1110 	st = &slot->spilled_ptr;
1111 
1112 	if (st->irq.kfunc_class != kfunc_class) {
1113 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1114 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1115 
1116 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1117 			flag_kfunc, used_kfunc);
1118 		return -EINVAL;
1119 	}
1120 
1121 	err = release_irq_state(env->cur_state, st->id);
1122 	WARN_ON_ONCE(err && err != -EACCES);
1123 	if (err) {
1124 		int insn_idx = 0;
1125 
1126 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1127 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1128 				insn_idx = env->cur_state->refs[i].insn_idx;
1129 				break;
1130 			}
1131 		}
1132 
1133 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1134 			env->cur_state->active_irq_id, insn_idx);
1135 		return err;
1136 	}
1137 
1138 	bpf_mark_reg_not_init(env, st);
1139 
1140 	for (i = 0; i < BPF_REG_SIZE; i++)
1141 		slot->slot_type[i] = STACK_INVALID;
1142 
1143 	mark_stack_slot_scratched(env, spi);
1144 	return 0;
1145 }
1146 
1147 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1148 {
1149 	struct bpf_func_state *state = bpf_func(env, reg);
1150 	struct bpf_stack_state *slot;
1151 	int spi, i;
1152 
1153 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1154 	 * will do check_mem_access to check and update stack bounds later, so
1155 	 * return true for that case.
1156 	 */
1157 	spi = irq_flag_get_spi(env, reg);
1158 	if (spi == -ERANGE)
1159 		return true;
1160 	if (spi < 0)
1161 		return false;
1162 
1163 	slot = &state->stack[spi];
1164 
1165 	for (i = 0; i < BPF_REG_SIZE; i++)
1166 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1167 			return false;
1168 	return true;
1169 }
1170 
1171 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1172 {
1173 	struct bpf_func_state *state = bpf_func(env, reg);
1174 	struct bpf_stack_state *slot;
1175 	struct bpf_reg_state *st;
1176 	int spi, i;
1177 
1178 	spi = irq_flag_get_spi(env, reg);
1179 	if (spi < 0)
1180 		return -EINVAL;
1181 
1182 	slot = &state->stack[spi];
1183 	st = &slot->spilled_ptr;
1184 
1185 	if (!st->id)
1186 		return -EINVAL;
1187 
1188 	for (i = 0; i < BPF_REG_SIZE; i++)
1189 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1190 			return -EINVAL;
1191 	return 0;
1192 }
1193 
1194 /* Check if given stack slot is "special":
1195  *   - spilled register state (STACK_SPILL);
1196  *   - dynptr state (STACK_DYNPTR);
1197  *   - iter state (STACK_ITER).
1198  *   - irq flag state (STACK_IRQ_FLAG)
1199  */
1200 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1201 {
1202 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1203 
1204 	switch (type) {
1205 	case STACK_SPILL:
1206 	case STACK_DYNPTR:
1207 	case STACK_ITER:
1208 	case STACK_IRQ_FLAG:
1209 		return true;
1210 	case STACK_INVALID:
1211 	case STACK_POISON:
1212 	case STACK_MISC:
1213 	case STACK_ZERO:
1214 		return false;
1215 	default:
1216 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1217 		return true;
1218 	}
1219 }
1220 
1221 /* The reg state of a pointer or a bounded scalar was saved when
1222  * it was spilled to the stack.
1223  */
1224 
1225 /*
1226  * Mark stack slot as STACK_MISC, unless it is already:
1227  * - STACK_INVALID, in which case they are equivalent.
1228  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1229  * - STACK_POISON, which truly forbids access to the slot.
1230  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1231  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1232  * unnecessary as both are considered equivalent when loading data and pruning,
1233  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1234  * slots.
1235  */
1236 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1237 {
1238 	if (*stype == STACK_ZERO)
1239 		return;
1240 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1241 		return;
1242 	*stype = STACK_MISC;
1243 }
1244 
1245 static void scrub_spilled_slot(u8 *stype)
1246 {
1247 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1248 		*stype = STACK_MISC;
1249 }
1250 
1251 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1252  * small to hold src. This is different from krealloc since we don't want to preserve
1253  * the contents of dst.
1254  *
1255  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1256  * not be allocated.
1257  */
1258 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1259 {
1260 	size_t alloc_bytes;
1261 	void *orig = dst;
1262 	size_t bytes;
1263 
1264 	if (ZERO_OR_NULL_PTR(src))
1265 		goto out;
1266 
1267 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1268 		return NULL;
1269 
1270 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1271 	dst = krealloc(orig, alloc_bytes, flags);
1272 	if (!dst) {
1273 		kfree(orig);
1274 		return NULL;
1275 	}
1276 
1277 	memcpy(dst, src, bytes);
1278 out:
1279 	return dst ? dst : ZERO_SIZE_PTR;
1280 }
1281 
1282 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1283  * small to hold new_n items. new items are zeroed out if the array grows.
1284  *
1285  * Contrary to krealloc_array, does not free arr if new_n is zero.
1286  */
1287 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1288 {
1289 	size_t alloc_size;
1290 	void *new_arr;
1291 
1292 	if (!new_n || old_n == new_n)
1293 		goto out;
1294 
1295 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1296 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1297 	if (!new_arr) {
1298 		kfree(arr);
1299 		return NULL;
1300 	}
1301 	arr = new_arr;
1302 
1303 	if (new_n > old_n)
1304 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1305 
1306 out:
1307 	return arr ? arr : ZERO_SIZE_PTR;
1308 }
1309 
1310 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1311 {
1312 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1313 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1314 	if (!dst->refs)
1315 		return -ENOMEM;
1316 
1317 	dst->acquired_refs = src->acquired_refs;
1318 	dst->active_locks = src->active_locks;
1319 	dst->active_preempt_locks = src->active_preempt_locks;
1320 	dst->active_rcu_locks = src->active_rcu_locks;
1321 	dst->active_irq_id = src->active_irq_id;
1322 	dst->active_lock_id = src->active_lock_id;
1323 	dst->active_lock_ptr = src->active_lock_ptr;
1324 	return 0;
1325 }
1326 
1327 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1328 {
1329 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1330 
1331 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1332 				GFP_KERNEL_ACCOUNT);
1333 	if (!dst->stack)
1334 		return -ENOMEM;
1335 
1336 	dst->allocated_stack = src->allocated_stack;
1337 
1338 	/* copy stack args state */
1339 	n = src->out_stack_arg_cnt;
1340 	if (n) {
1341 		dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n,
1342 						 sizeof(struct bpf_reg_state),
1343 						 GFP_KERNEL_ACCOUNT);
1344 		if (!dst->stack_arg_regs)
1345 			return -ENOMEM;
1346 	}
1347 
1348 	dst->out_stack_arg_cnt = src->out_stack_arg_cnt;
1349 	return 0;
1350 }
1351 
1352 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1353 {
1354 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1355 				    sizeof(struct bpf_reference_state));
1356 	if (!state->refs)
1357 		return -ENOMEM;
1358 
1359 	state->acquired_refs = n;
1360 	return 0;
1361 }
1362 
1363 /* Possibly update state->allocated_stack to be at least size bytes. Also
1364  * possibly update the function's high-water mark in its bpf_subprog_info.
1365  */
1366 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1367 {
1368 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1369 
1370 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1371 	size = round_up(size, BPF_REG_SIZE);
1372 	n = size / BPF_REG_SIZE;
1373 
1374 	if (old_n >= n)
1375 		return 0;
1376 
1377 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1378 	if (!state->stack)
1379 		return -ENOMEM;
1380 
1381 	state->allocated_stack = size;
1382 
1383 	/* update known max for given subprogram */
1384 	if (env->subprog_info[state->subprogno].stack_depth < size)
1385 		env->subprog_info[state->subprogno].stack_depth = size;
1386 
1387 	return 0;
1388 }
1389 
1390 static int grow_stack_arg_slots(struct bpf_verifier_env *env,
1391 				struct bpf_func_state *state, int cnt)
1392 {
1393 	size_t old_n = state->out_stack_arg_cnt;
1394 
1395 	if (old_n >= cnt)
1396 		return 0;
1397 
1398 	state->stack_arg_regs = realloc_array(state->stack_arg_regs, old_n, cnt,
1399 					      sizeof(struct bpf_reg_state));
1400 	if (!state->stack_arg_regs)
1401 		return -ENOMEM;
1402 
1403 	state->out_stack_arg_cnt = cnt;
1404 	return 0;
1405 }
1406 
1407 /* Acquire a pointer id from the env and update the state->refs to include
1408  * this new pointer reference.
1409  * On success, returns a valid pointer id to associate with the register
1410  * On failure, returns a negative errno.
1411  */
1412 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1413 {
1414 	struct bpf_verifier_state *state = env->cur_state;
1415 	int new_ofs = state->acquired_refs;
1416 	int err;
1417 
1418 	err = resize_reference_state(state, state->acquired_refs + 1);
1419 	if (err)
1420 		return NULL;
1421 	state->refs[new_ofs].insn_idx = insn_idx;
1422 
1423 	return &state->refs[new_ofs];
1424 }
1425 
1426 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id)
1427 {
1428 	struct bpf_reference_state *s;
1429 
1430 	s = acquire_reference_state(env, insn_idx);
1431 	if (!s)
1432 		return -ENOMEM;
1433 	s->type = REF_TYPE_PTR;
1434 	s->id = ++env->id_gen;
1435 	s->parent_id = parent_id;
1436 	return s->id;
1437 }
1438 
1439 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1440 			      int id, void *ptr)
1441 {
1442 	struct bpf_verifier_state *state = env->cur_state;
1443 	struct bpf_reference_state *s;
1444 
1445 	s = acquire_reference_state(env, insn_idx);
1446 	if (!s)
1447 		return -ENOMEM;
1448 	s->type = type;
1449 	s->id = id;
1450 	s->ptr = ptr;
1451 
1452 	state->active_locks++;
1453 	state->active_lock_id = id;
1454 	state->active_lock_ptr = ptr;
1455 	return 0;
1456 }
1457 
1458 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1459 {
1460 	struct bpf_verifier_state *state = env->cur_state;
1461 	struct bpf_reference_state *s;
1462 
1463 	s = acquire_reference_state(env, insn_idx);
1464 	if (!s)
1465 		return -ENOMEM;
1466 	s->type = REF_TYPE_IRQ;
1467 	s->id = ++env->id_gen;
1468 
1469 	state->active_irq_id = s->id;
1470 	return s->id;
1471 }
1472 
1473 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1474 {
1475 	int last_idx;
1476 	size_t rem;
1477 
1478 	/* IRQ state requires the relative ordering of elements remaining the
1479 	 * same, since it relies on the refs array to behave as a stack, so that
1480 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1481 	 * the array instead of swapping the final element into the deleted idx.
1482 	 */
1483 	last_idx = state->acquired_refs - 1;
1484 	rem = state->acquired_refs - idx - 1;
1485 	if (last_idx && idx != last_idx)
1486 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1487 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1488 	state->acquired_refs--;
1489 	return;
1490 }
1491 
1492 static bool find_reference_state(struct bpf_verifier_state *state, int id)
1493 {
1494 	int i;
1495 
1496 	for (i = 0; i < state->acquired_refs; i++) {
1497 		if (state->refs[i].type != REF_TYPE_PTR)
1498 			continue;
1499 		if (state->refs[i].id == id)
1500 			return true;
1501 	}
1502 
1503 	return false;
1504 }
1505 
1506 static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
1507 {
1508 	return find_reference_state(env->cur_state, reg->id);
1509 }
1510 
1511 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1512 {
1513 	void *prev_ptr = NULL;
1514 	u32 prev_id = 0;
1515 	int i;
1516 
1517 	for (i = 0; i < state->acquired_refs; i++) {
1518 		if (state->refs[i].type == type && state->refs[i].id == id &&
1519 		    state->refs[i].ptr == ptr) {
1520 			release_reference_state(state, i);
1521 			state->active_locks--;
1522 			/* Reassign active lock (id, ptr). */
1523 			state->active_lock_id = prev_id;
1524 			state->active_lock_ptr = prev_ptr;
1525 			return 0;
1526 		}
1527 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1528 			prev_id = state->refs[i].id;
1529 			prev_ptr = state->refs[i].ptr;
1530 		}
1531 	}
1532 	return -EINVAL;
1533 }
1534 
1535 static int release_irq_state(struct bpf_verifier_state *state, int id)
1536 {
1537 	u32 prev_id = 0;
1538 	int i;
1539 
1540 	if (id != state->active_irq_id)
1541 		return -EACCES;
1542 
1543 	for (i = 0; i < state->acquired_refs; i++) {
1544 		if (state->refs[i].type != REF_TYPE_IRQ)
1545 			continue;
1546 		if (state->refs[i].id == id) {
1547 			release_reference_state(state, i);
1548 			state->active_irq_id = prev_id;
1549 			return 0;
1550 		} else {
1551 			prev_id = state->refs[i].id;
1552 		}
1553 	}
1554 	return -EINVAL;
1555 }
1556 
1557 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1558 						   int id, void *ptr)
1559 {
1560 	int i;
1561 
1562 	for (i = 0; i < state->acquired_refs; i++) {
1563 		struct bpf_reference_state *s = &state->refs[i];
1564 
1565 		if (!(s->type & type))
1566 			continue;
1567 
1568 		if (s->id == id && s->ptr == ptr)
1569 			return s;
1570 	}
1571 	return NULL;
1572 }
1573 
1574 static void free_func_state(struct bpf_func_state *state)
1575 {
1576 	if (!state)
1577 		return;
1578 	kfree(state->stack_arg_regs);
1579 	kfree(state->stack);
1580 	kfree(state);
1581 }
1582 
1583 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1584 {
1585 	kfree(state->jmp_history);
1586 	state->jmp_history = NULL;
1587 	state->jmp_history_cnt = 0;
1588 }
1589 
1590 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1591 			    bool free_self)
1592 {
1593 	int i;
1594 
1595 	for (i = 0; i <= state->curframe; i++) {
1596 		free_func_state(state->frame[i]);
1597 		state->frame[i] = NULL;
1598 	}
1599 	kfree(state->refs);
1600 	bpf_clear_jmp_history(state);
1601 	if (free_self)
1602 		kfree(state);
1603 }
1604 
1605 /* copy verifier state from src to dst growing dst stack space
1606  * when necessary to accommodate larger src stack
1607  */
1608 static int copy_func_state(struct bpf_func_state *dst,
1609 			   const struct bpf_func_state *src)
1610 {
1611 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1612 	return copy_stack_state(dst, src);
1613 }
1614 
1615 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1616 			   const struct bpf_verifier_state *src)
1617 {
1618 	struct bpf_func_state *dst;
1619 	int i, err;
1620 
1621 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1622 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1623 					  GFP_KERNEL_ACCOUNT);
1624 	if (!dst_state->jmp_history)
1625 		return -ENOMEM;
1626 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1627 
1628 	/* if dst has more stack frames then src frame, free them, this is also
1629 	 * necessary in case of exceptional exits using bpf_throw.
1630 	 */
1631 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1632 		free_func_state(dst_state->frame[i]);
1633 		dst_state->frame[i] = NULL;
1634 	}
1635 	err = copy_reference_state(dst_state, src);
1636 	if (err)
1637 		return err;
1638 	dst_state->speculative = src->speculative;
1639 	dst_state->in_sleepable = src->in_sleepable;
1640 	dst_state->curframe = src->curframe;
1641 	dst_state->branches = src->branches;
1642 	dst_state->parent = src->parent;
1643 	dst_state->first_insn_idx = src->first_insn_idx;
1644 	dst_state->last_insn_idx = src->last_insn_idx;
1645 	dst_state->dfs_depth = src->dfs_depth;
1646 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1647 	dst_state->may_goto_depth = src->may_goto_depth;
1648 	dst_state->equal_state = src->equal_state;
1649 	for (i = 0; i <= src->curframe; i++) {
1650 		dst = dst_state->frame[i];
1651 		if (!dst) {
1652 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1653 			if (!dst)
1654 				return -ENOMEM;
1655 			dst_state->frame[i] = dst;
1656 		}
1657 		err = copy_func_state(dst, src->frame[i]);
1658 		if (err)
1659 			return err;
1660 	}
1661 	return 0;
1662 }
1663 
1664 static u32 state_htab_size(struct bpf_verifier_env *env)
1665 {
1666 	return env->prog->len;
1667 }
1668 
1669 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1670 {
1671 	struct bpf_verifier_state *cur = env->cur_state;
1672 	struct bpf_func_state *state = cur->frame[cur->curframe];
1673 
1674 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1675 }
1676 
1677 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1678 {
1679 	int fr;
1680 
1681 	if (a->curframe != b->curframe)
1682 		return false;
1683 
1684 	for (fr = a->curframe; fr >= 0; fr--)
1685 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1686 			return false;
1687 
1688 	return true;
1689 }
1690 
1691 
1692 void bpf_free_backedges(struct bpf_scc_visit *visit)
1693 {
1694 	struct bpf_scc_backedge *backedge, *next;
1695 
1696 	for (backedge = visit->backedges; backedge; backedge = next) {
1697 		bpf_free_verifier_state(&backedge->state, false);
1698 		next = backedge->next;
1699 		kfree(backedge);
1700 	}
1701 	visit->backedges = NULL;
1702 }
1703 
1704 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1705 		     int *insn_idx, bool pop_log)
1706 {
1707 	struct bpf_verifier_state *cur = env->cur_state;
1708 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1709 	int err;
1710 
1711 	if (env->head == NULL)
1712 		return -ENOENT;
1713 
1714 	if (cur) {
1715 		err = bpf_copy_verifier_state(cur, &head->st);
1716 		if (err)
1717 			return err;
1718 	}
1719 	if (pop_log)
1720 		bpf_vlog_reset(&env->log, head->log_pos);
1721 	if (insn_idx)
1722 		*insn_idx = head->insn_idx;
1723 	if (prev_insn_idx)
1724 		*prev_insn_idx = head->prev_insn_idx;
1725 	elem = head->next;
1726 	bpf_free_verifier_state(&head->st, false);
1727 	kfree(head);
1728 	env->head = elem;
1729 	env->stack_size--;
1730 	return 0;
1731 }
1732 
1733 static bool error_recoverable_with_nospec(int err)
1734 {
1735 	/* Should only return true for non-fatal errors that are allowed to
1736 	 * occur during speculative verification. For these we can insert a
1737 	 * nospec and the program might still be accepted. Do not include
1738 	 * something like ENOMEM because it is likely to re-occur for the next
1739 	 * architectural path once it has been recovered-from in all speculative
1740 	 * paths.
1741 	 */
1742 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1743 }
1744 
1745 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1746 					     int insn_idx, int prev_insn_idx,
1747 					     bool speculative)
1748 {
1749 	struct bpf_verifier_state *cur = env->cur_state;
1750 	struct bpf_verifier_stack_elem *elem;
1751 	int err;
1752 
1753 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1754 	if (!elem)
1755 		return ERR_PTR(-ENOMEM);
1756 
1757 	elem->insn_idx = insn_idx;
1758 	elem->prev_insn_idx = prev_insn_idx;
1759 	elem->next = env->head;
1760 	elem->log_pos = env->log.end_pos;
1761 	env->head = elem;
1762 	env->stack_size++;
1763 	err = bpf_copy_verifier_state(&elem->st, cur);
1764 	if (err)
1765 		return ERR_PTR(-ENOMEM);
1766 	elem->st.speculative |= speculative;
1767 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1768 		verbose(env, "The sequence of %d jumps is too complex.\n",
1769 			env->stack_size);
1770 		return ERR_PTR(-E2BIG);
1771 	}
1772 	if (elem->st.parent) {
1773 		++elem->st.parent->branches;
1774 		/* WARN_ON(branches > 2) technically makes sense here,
1775 		 * but
1776 		 * 1. speculative states will bump 'branches' for non-branch
1777 		 * instructions
1778 		 * 2. is_state_visited() heuristics may decide not to create
1779 		 * a new state for a sequence of branches and all such current
1780 		 * and cloned states will be pointing to a single parent state
1781 		 * which might have large 'branches' count.
1782 		 */
1783 	}
1784 	return &elem->st;
1785 }
1786 
1787 static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno)
1788 {
1789 	char *buf = env->tmp_arg_name;
1790 	int len = sizeof(env->tmp_arg_name);
1791 	int arg, regno = reg_from_argno(argno);
1792 
1793 	if (regno >= 0) {
1794 		snprintf(buf, len, "R%d", regno);
1795 	} else {
1796 		arg = arg_from_argno(argno);
1797 		snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE);
1798 	}
1799 
1800 	return buf;
1801 }
1802 
1803 static const int caller_saved[CALLER_SAVED_REGS] = {
1804 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1805 };
1806 
1807 /* This helper doesn't clear reg->id */
1808 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1809 {
1810 	reg->var_off = tnum_const(imm);
1811 	reg->r64 = cnum64_from_urange(imm, imm);
1812 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1813 }
1814 
1815 /* Mark the unknown part of a register (variable offset or scalar value) as
1816  * known to have the value @imm.
1817  */
1818 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1819 {
1820 	/* Clear off and union(map_ptr, range) */
1821 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1822 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1823 	reg->id = 0;
1824 	reg->parent_id = 0;
1825 	___mark_reg_known(reg, imm);
1826 }
1827 
1828 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1829 {
1830 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1831 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1832 }
1833 
1834 /* Mark the 'variable offset' part of a register as zero.  This should be
1835  * used only on registers holding a pointer type.
1836  */
1837 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1838 {
1839 	__mark_reg_known(reg, 0);
1840 }
1841 
1842 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1843 {
1844 	__mark_reg_known(reg, 0);
1845 	reg->type = SCALAR_VALUE;
1846 	/* all scalars are assumed imprecise initially (unless unprivileged,
1847 	 * in which case everything is forced to be precise)
1848 	 */
1849 	reg->precise = !env->bpf_capable;
1850 }
1851 
1852 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1853 				struct bpf_reg_state *regs, u32 regno)
1854 {
1855 	__mark_reg_known_zero(regs + regno);
1856 }
1857 
1858 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1859 			      bool first_slot, int id, int parent_id)
1860 {
1861 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1862 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1863 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1864 	 */
1865 	__mark_reg_known_zero(reg);
1866 	reg->type = CONST_PTR_TO_DYNPTR;
1867 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1868 	reg->id = id;
1869 	reg->parent_id = parent_id;
1870 	reg->dynptr.type = type;
1871 	reg->dynptr.first_slot = first_slot;
1872 }
1873 
1874 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1875 {
1876 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1877 		const struct bpf_map *map = reg->map_ptr;
1878 
1879 		if (map->inner_map_meta) {
1880 			reg->type = CONST_PTR_TO_MAP;
1881 			reg->map_ptr = map->inner_map_meta;
1882 			/* transfer reg's id which is unique for every map_lookup_elem
1883 			 * as UID of the inner map.
1884 			 */
1885 			if (btf_record_has_field(map->inner_map_meta->record,
1886 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1887 				reg->map_uid = reg->id;
1888 			}
1889 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1890 			reg->type = PTR_TO_XDP_SOCK;
1891 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1892 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1893 			reg->type = PTR_TO_SOCKET;
1894 		} else {
1895 			reg->type = PTR_TO_MAP_VALUE;
1896 		}
1897 		return;
1898 	}
1899 
1900 	reg->type &= ~PTR_MAYBE_NULL;
1901 }
1902 
1903 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1904 				struct btf_field_graph_root *ds_head)
1905 {
1906 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1907 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1908 	regs[regno].btf = ds_head->btf;
1909 	regs[regno].btf_id = ds_head->value_btf_id;
1910 }
1911 
1912 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1913 {
1914 	return type_is_pkt_pointer(reg->type);
1915 }
1916 
1917 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1918 {
1919 	return reg_is_pkt_pointer(reg) ||
1920 	       reg->type == PTR_TO_PACKET_END;
1921 }
1922 
1923 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1924 {
1925 	return base_type(reg->type) == PTR_TO_MEM &&
1926 	       (reg->type &
1927 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1928 }
1929 
1930 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1931 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1932 				    enum bpf_reg_type which)
1933 {
1934 	/* The register can already have a range from prior markings.
1935 	 * This is fine as long as it hasn't been advanced from its
1936 	 * origin.
1937 	 */
1938 	return reg->type == which &&
1939 	       reg->id == 0 &&
1940 	       tnum_equals_const(reg->var_off, 0);
1941 }
1942 
1943 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1944 {
1945 	reg->r32 = CNUM32_UNBOUNDED;
1946 }
1947 
1948 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1949 {
1950 	reg->r64 = CNUM64_UNBOUNDED;
1951 }
1952 
1953 /* Reset the min/max bounds of a register */
1954 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1955 {
1956 	__mark_reg64_unbounded(reg);
1957 	__mark_reg32_unbounded(reg);
1958 }
1959 
1960 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1961 {
1962 	__mark_reg64_unbounded(reg);
1963 	reg->var_off = tnum_unknown;
1964 }
1965 
1966 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1967 {
1968 	__mark_reg32_unbounded(reg);
1969 	reg->var_off = tnum_unknown;
1970 }
1971 
1972 static struct cnum32 cnum32_from_tnum(struct tnum tnum)
1973 {
1974 	tnum = tnum_subreg(tnum);
1975 	if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN))
1976 		/* min signed is max(sign bit) | min(other bits) */
1977 		/* max signed is min(sign bit) | max(other bits) */
1978 		return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN),
1979 					  tnum.value | (tnum.mask & S32_MAX));
1980 	else
1981 		return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask));
1982 }
1983 
1984 static struct cnum64 cnum64_from_tnum(struct tnum tnum)
1985 {
1986 	if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN))
1987 		/* min signed is max(sign bit) | min(other bits) */
1988 		/* max signed is min(sign bit) | max(other bits) */
1989 		return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN),
1990 					  tnum.value | (tnum.mask & S64_MAX));
1991 	else
1992 		return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask));
1993 }
1994 
1995 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1996 {
1997 	cnum32_intersect_with(&reg->r32, cnum32_from_tnum(reg->var_off));
1998 }
1999 
2000 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2001 {
2002 	u64 tnum_next, tmax;
2003 	bool umin_in_tnum;
2004 
2005 	cnum64_intersect_with(&reg->r64, cnum64_from_tnum(reg->var_off));
2006 
2007 	/* Check if u64 and tnum overlap in a single value */
2008 	tnum_next = tnum_step(reg->var_off, reg_umin(reg));
2009 	umin_in_tnum = (reg_umin(reg) & ~reg->var_off.mask) == reg->var_off.value;
2010 	tmax = reg->var_off.value | reg->var_off.mask;
2011 	if (umin_in_tnum && tnum_next > reg_umax(reg)) {
2012 		/* The u64 range and the tnum only overlap in umin.
2013 		 * u64:  ---[xxxxxx]-----
2014 		 * tnum: --xx----------x-
2015 		 */
2016 		___mark_reg_known(reg, reg_umin(reg));
2017 	} else if (!umin_in_tnum && tnum_next == tmax) {
2018 		/* The u64 range and the tnum only overlap in the maximum value
2019 		 * represented by the tnum, called tmax.
2020 		 * u64:  ---[xxxxxx]-----
2021 		 * tnum: xx-----x--------
2022 		 */
2023 		___mark_reg_known(reg, tmax);
2024 	} else if (!umin_in_tnum && tnum_next <= reg_umax(reg) &&
2025 		   tnum_step(reg->var_off, tnum_next) > reg_umax(reg)) {
2026 		/* The u64 range and the tnum only overlap in between umin
2027 		 * (excluded) and umax.
2028 		 * u64:  ---[xxxxxx]-----
2029 		 * tnum: xx----x-------x-
2030 		 */
2031 		___mark_reg_known(reg, tnum_next);
2032 	}
2033 }
2034 
2035 static void __update_reg_bounds(struct bpf_reg_state *reg)
2036 {
2037 	__update_reg32_bounds(reg);
2038 	__update_reg64_bounds(reg);
2039 }
2040 
2041 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2042 {
2043 	cnum32_intersect_with(&reg->r32, cnum32_from_cnum64(reg->r64));
2044 }
2045 
2046 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2047 {
2048 	reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32);
2049 }
2050 
2051 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2052 {
2053 	deduce_bounds_32_from_64(reg);
2054 	deduce_bounds_64_from_32(reg);
2055 }
2056 
2057 /* Attempts to improve var_off based on unsigned min/max information */
2058 static void __reg_bound_offset(struct bpf_reg_state *reg)
2059 {
2060 	struct tnum var64_off = tnum_intersect(reg->var_off,
2061 					       tnum_range(reg_umin(reg),
2062 							  reg_umax(reg)));
2063 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2064 					       tnum_range(reg_u32_min(reg),
2065 							  reg_u32_max(reg)));
2066 
2067 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2068 }
2069 
2070 static bool range_bounds_violation(struct bpf_reg_state *reg);
2071 
2072 static void reg_bounds_sync(struct bpf_reg_state *reg)
2073 {
2074 	/* If the input reg_state is invalid, we can exit early */
2075 	if (range_bounds_violation(reg))
2076 		return;
2077 	/* We might have learned new bounds from the var_off. */
2078 	__update_reg_bounds(reg);
2079 	/* We might have learned something about the sign bit. */
2080 	__reg_deduce_bounds(reg);
2081 	__reg_deduce_bounds(reg);
2082 	/* We might have learned some bits from the bounds. */
2083 	__reg_bound_offset(reg);
2084 	/* Intersecting with the old var_off might have improved our bounds
2085 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2086 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2087 	 */
2088 	__update_reg_bounds(reg);
2089 }
2090 
2091 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2092 {
2093 	if (!tnum_is_const(reg->var_off))
2094 		return false;
2095 
2096 	return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value;
2097 }
2098 
2099 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2100 {
2101 	if (!tnum_subreg_is_const(reg->var_off))
2102 		return false;
2103 
2104 	return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value;
2105 }
2106 
2107 static bool range_bounds_violation(struct bpf_reg_state *reg)
2108 {
2109 	return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64);
2110 }
2111 
2112 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2113 				   struct bpf_reg_state *reg, const char *ctx)
2114 {
2115 	const char *msg;
2116 
2117 	if (range_bounds_violation(reg)) {
2118 		msg = "range bounds violation";
2119 		goto out;
2120 	}
2121 
2122 	if (const_tnum_range_mismatch(reg)) {
2123 		msg = "const tnum out of sync with range bounds";
2124 		goto out;
2125 	}
2126 
2127 	if (const_tnum_range_mismatch_32(reg)) {
2128 		msg = "const subreg tnum out of sync with range bounds";
2129 		goto out;
2130 	}
2131 
2132 	return 0;
2133 out:
2134 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} "
2135 		     "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)",
2136 		     ctx, msg,
2137 		     reg->r64.base, reg->r64.size,
2138 		     reg->r32.base, reg->r32.size,
2139 		     reg->var_off.value, reg->var_off.mask);
2140 	if (env->test_reg_invariants)
2141 		return -EFAULT;
2142 	__mark_reg_unbounded(reg);
2143 	return 0;
2144 }
2145 
2146 /* Mark a register as having a completely unknown (scalar) value. */
2147 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2148 {
2149 	s32 subreg_def = reg->subreg_def;
2150 
2151 	memset(reg, 0, sizeof(*reg));
2152 	reg->type = SCALAR_VALUE;
2153 	reg->var_off = tnum_unknown;
2154 	reg->subreg_def = subreg_def;
2155 	__mark_reg_unbounded(reg);
2156 }
2157 
2158 /* Mark a register as having a completely unknown (scalar) value,
2159  * initialize .precise as true when not bpf capable.
2160  */
2161 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2162 			       struct bpf_reg_state *reg)
2163 {
2164 	bpf_mark_reg_unknown_imprecise(reg);
2165 	reg->precise = !env->bpf_capable;
2166 }
2167 
2168 static void mark_reg_unknown(struct bpf_verifier_env *env,
2169 			     struct bpf_reg_state *regs, u32 regno)
2170 {
2171 	__mark_reg_unknown(env, regs + regno);
2172 }
2173 
2174 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2175 				struct bpf_reg_state *regs,
2176 				u32 regno,
2177 				s32 s32_min,
2178 				s32 s32_max)
2179 {
2180 	struct bpf_reg_state *reg = regs + regno;
2181 
2182 	reg_set_srange32(reg,
2183 			 max_t(s32, reg_s32_min(reg), s32_min),
2184 			 min_t(s32, reg_s32_max(reg), s32_max));
2185 	reg_set_srange64(reg,
2186 			 max_t(s64, reg_smin(reg), s32_min),
2187 			 min_t(s64, reg_smax(reg), s32_max));
2188 
2189 	reg_bounds_sync(reg);
2190 
2191 	return reg_bounds_sanity_check(env, reg, "s32_range");
2192 }
2193 
2194 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2195 			   struct bpf_reg_state *reg)
2196 {
2197 	__mark_reg_unknown(env, reg);
2198 	reg->type = NOT_INIT;
2199 }
2200 
2201 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2202 			   struct bpf_reg_state *regs, u32 regno,
2203 			   enum bpf_reg_type reg_type,
2204 			   struct btf *btf, u32 btf_id,
2205 			   enum bpf_type_flag flag)
2206 {
2207 	switch (reg_type) {
2208 	case SCALAR_VALUE:
2209 		mark_reg_unknown(env, regs, regno);
2210 		return 0;
2211 	case PTR_TO_BTF_ID:
2212 		mark_reg_known_zero(env, regs, regno);
2213 		regs[regno].type = PTR_TO_BTF_ID | flag;
2214 		regs[regno].btf = btf;
2215 		regs[regno].btf_id = btf_id;
2216 		if (type_may_be_null(flag))
2217 			regs[regno].id = ++env->id_gen;
2218 		return 0;
2219 	case PTR_TO_MEM:
2220 		mark_reg_known_zero(env, regs, regno);
2221 		regs[regno].type = PTR_TO_MEM | flag;
2222 		regs[regno].mem_size = 0;
2223 		return 0;
2224 	default:
2225 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2226 		return -EFAULT;
2227 	}
2228 }
2229 
2230 #define DEF_NOT_SUBREG	(0)
2231 static void init_reg_state(struct bpf_verifier_env *env,
2232 			   struct bpf_func_state *state)
2233 {
2234 	struct bpf_reg_state *regs = state->regs;
2235 	int i;
2236 
2237 	for (i = 0; i < MAX_BPF_REG; i++) {
2238 		bpf_mark_reg_not_init(env, &regs[i]);
2239 		regs[i].subreg_def = DEF_NOT_SUBREG;
2240 	}
2241 
2242 	/* frame pointer */
2243 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2244 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2245 	regs[BPF_REG_FP].frameno = state->frameno;
2246 }
2247 
2248 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2249 {
2250 	/*
2251 	 * return_32bit is set to false by default and set explicitly
2252 	 * by the caller when necessary.
2253 	 */
2254 	return (struct bpf_retval_range){ minval, maxval, false };
2255 }
2256 
2257 static void init_func_state(struct bpf_verifier_env *env,
2258 			    struct bpf_func_state *state,
2259 			    int callsite, int frameno, int subprogno)
2260 {
2261 	state->callsite = callsite;
2262 	state->frameno = frameno;
2263 	state->subprogno = subprogno;
2264 	state->callback_ret_range = retval_range(0, 0);
2265 	init_reg_state(env, state);
2266 	mark_verifier_state_scratched(env);
2267 }
2268 
2269 /* Similar to push_stack(), but for async callbacks */
2270 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2271 						int insn_idx, int prev_insn_idx,
2272 						int subprog, bool is_sleepable)
2273 {
2274 	struct bpf_verifier_stack_elem *elem;
2275 	struct bpf_func_state *frame;
2276 
2277 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2278 	if (!elem)
2279 		return ERR_PTR(-ENOMEM);
2280 
2281 	elem->insn_idx = insn_idx;
2282 	elem->prev_insn_idx = prev_insn_idx;
2283 	elem->next = env->head;
2284 	elem->log_pos = env->log.end_pos;
2285 	env->head = elem;
2286 	env->stack_size++;
2287 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2288 		verbose(env,
2289 			"The sequence of %d jumps is too complex for async cb.\n",
2290 			env->stack_size);
2291 		return ERR_PTR(-E2BIG);
2292 	}
2293 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2294 	 * The caller state doesn't matter.
2295 	 * This is async callback. It starts in a fresh stack.
2296 	 * Initialize it similar to do_check_common().
2297 	 */
2298 	elem->st.branches = 1;
2299 	elem->st.in_sleepable = is_sleepable;
2300 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2301 	if (!frame)
2302 		return ERR_PTR(-ENOMEM);
2303 	init_func_state(env, frame,
2304 			BPF_MAIN_FUNC /* callsite */,
2305 			0 /* frameno within this callchain */,
2306 			subprog /* subprog number within this prog */);
2307 	elem->st.frame[0] = frame;
2308 	return &elem->st;
2309 }
2310 
2311 
2312 static int cmp_subprogs(const void *a, const void *b)
2313 {
2314 	return ((struct bpf_subprog_info *)a)->start -
2315 	       ((struct bpf_subprog_info *)b)->start;
2316 }
2317 
2318 /* Find subprogram that contains instruction at 'off' */
2319 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2320 {
2321 	struct bpf_subprog_info *vals = env->subprog_info;
2322 	int l, r, m;
2323 
2324 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2325 		return NULL;
2326 
2327 	l = 0;
2328 	r = env->subprog_cnt - 1;
2329 	while (l < r) {
2330 		m = l + (r - l + 1) / 2;
2331 		if (vals[m].start <= off)
2332 			l = m;
2333 		else
2334 			r = m - 1;
2335 	}
2336 	return &vals[l];
2337 }
2338 
2339 /* Find subprogram that starts exactly at 'off' */
2340 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2341 {
2342 	struct bpf_subprog_info *p;
2343 
2344 	p = bpf_find_containing_subprog(env, off);
2345 	if (!p || p->start != off)
2346 		return -ENOENT;
2347 	return p - env->subprog_info;
2348 }
2349 
2350 static int add_subprog(struct bpf_verifier_env *env, int off)
2351 {
2352 	int insn_cnt = env->prog->len;
2353 	int ret;
2354 
2355 	if (off >= insn_cnt || off < 0) {
2356 		verbose(env, "call to invalid destination\n");
2357 		return -EINVAL;
2358 	}
2359 	ret = bpf_find_subprog(env, off);
2360 	if (ret >= 0)
2361 		return ret;
2362 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2363 		verbose(env, "too many subprograms\n");
2364 		return -E2BIG;
2365 	}
2366 	/* determine subprog starts. The end is one before the next starts */
2367 	env->subprog_info[env->subprog_cnt++].start = off;
2368 	sort(env->subprog_info, env->subprog_cnt,
2369 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2370 	return env->subprog_cnt - 1;
2371 }
2372 
2373 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2374 {
2375 	struct bpf_prog_aux *aux = env->prog->aux;
2376 	struct btf *btf = aux->btf;
2377 	const struct btf_type *t;
2378 	u32 main_btf_id, id;
2379 	const char *name;
2380 	int ret, i;
2381 
2382 	/* Non-zero func_info_cnt implies valid btf */
2383 	if (!aux->func_info_cnt)
2384 		return 0;
2385 	main_btf_id = aux->func_info[0].type_id;
2386 
2387 	t = btf_type_by_id(btf, main_btf_id);
2388 	if (!t) {
2389 		verbose(env, "invalid btf id for main subprog in func_info\n");
2390 		return -EINVAL;
2391 	}
2392 
2393 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2394 	if (IS_ERR(name)) {
2395 		ret = PTR_ERR(name);
2396 		/* If there is no tag present, there is no exception callback */
2397 		if (ret == -ENOENT)
2398 			ret = 0;
2399 		else if (ret == -EEXIST)
2400 			verbose(env, "multiple exception callback tags for main subprog\n");
2401 		return ret;
2402 	}
2403 
2404 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2405 	if (ret < 0) {
2406 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2407 		return ret;
2408 	}
2409 	id = ret;
2410 	t = btf_type_by_id(btf, id);
2411 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2412 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2413 		return -EINVAL;
2414 	}
2415 	ret = 0;
2416 	for (i = 0; i < aux->func_info_cnt; i++) {
2417 		if (aux->func_info[i].type_id != id)
2418 			continue;
2419 		ret = aux->func_info[i].insn_off;
2420 		/* Further func_info and subprog checks will also happen
2421 		 * later, so assume this is the right insn_off for now.
2422 		 */
2423 		if (!ret) {
2424 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2425 			ret = -EINVAL;
2426 		}
2427 	}
2428 	if (!ret) {
2429 		verbose(env, "exception callback type id not found in func_info\n");
2430 		ret = -EINVAL;
2431 	}
2432 	return ret;
2433 }
2434 
2435 #define MAX_KFUNC_BTFS	256
2436 
2437 struct bpf_kfunc_btf {
2438 	struct btf *btf;
2439 	struct module *module;
2440 	u16 offset;
2441 };
2442 
2443 struct bpf_kfunc_btf_tab {
2444 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2445 	u32 nr_descs;
2446 };
2447 
2448 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2449 {
2450 	const struct bpf_kfunc_desc *d0 = a;
2451 	const struct bpf_kfunc_desc *d1 = b;
2452 
2453 	/* func_id is not greater than BTF_MAX_TYPE */
2454 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2455 }
2456 
2457 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2458 {
2459 	const struct bpf_kfunc_btf *d0 = a;
2460 	const struct bpf_kfunc_btf *d1 = b;
2461 
2462 	return d0->offset - d1->offset;
2463 }
2464 
2465 static struct bpf_kfunc_desc *
2466 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2467 {
2468 	struct bpf_kfunc_desc desc = {
2469 		.func_id = func_id,
2470 		.offset = offset,
2471 	};
2472 	struct bpf_kfunc_desc_tab *tab;
2473 
2474 	tab = prog->aux->kfunc_tab;
2475 	return bsearch(&desc, tab->descs, tab->nr_descs,
2476 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2477 }
2478 
2479 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2480 		       u16 btf_fd_idx, u8 **func_addr)
2481 {
2482 	const struct bpf_kfunc_desc *desc;
2483 
2484 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2485 	if (!desc)
2486 		return -EFAULT;
2487 
2488 	*func_addr = (u8 *)desc->addr;
2489 	return 0;
2490 }
2491 
2492 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2493 					 s16 offset)
2494 {
2495 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2496 	struct bpf_kfunc_btf_tab *tab;
2497 	struct bpf_kfunc_btf *b;
2498 	struct module *mod;
2499 	struct btf *btf;
2500 	int btf_fd;
2501 
2502 	tab = env->prog->aux->kfunc_btf_tab;
2503 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2504 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2505 	if (!b) {
2506 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2507 			verbose(env, "too many different module BTFs\n");
2508 			return ERR_PTR(-E2BIG);
2509 		}
2510 
2511 		if (bpfptr_is_null(env->fd_array)) {
2512 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2513 			return ERR_PTR(-EPROTO);
2514 		}
2515 
2516 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2517 					    offset * sizeof(btf_fd),
2518 					    sizeof(btf_fd)))
2519 			return ERR_PTR(-EFAULT);
2520 
2521 		btf = btf_get_by_fd(btf_fd);
2522 		if (IS_ERR(btf)) {
2523 			verbose(env, "invalid module BTF fd specified\n");
2524 			return btf;
2525 		}
2526 
2527 		if (!btf_is_module(btf)) {
2528 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2529 			btf_put(btf);
2530 			return ERR_PTR(-EINVAL);
2531 		}
2532 
2533 		mod = btf_try_get_module(btf);
2534 		if (!mod) {
2535 			btf_put(btf);
2536 			return ERR_PTR(-ENXIO);
2537 		}
2538 
2539 		b = &tab->descs[tab->nr_descs++];
2540 		b->btf = btf;
2541 		b->module = mod;
2542 		b->offset = offset;
2543 
2544 		/* sort() reorders entries by value, so b may no longer point
2545 		 * to the right entry after this
2546 		 */
2547 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2548 		     kfunc_btf_cmp_by_off, NULL);
2549 	} else {
2550 		btf = b->btf;
2551 	}
2552 
2553 	return btf;
2554 }
2555 
2556 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2557 {
2558 	if (!tab)
2559 		return;
2560 
2561 	while (tab->nr_descs--) {
2562 		module_put(tab->descs[tab->nr_descs].module);
2563 		btf_put(tab->descs[tab->nr_descs].btf);
2564 	}
2565 	kfree(tab);
2566 }
2567 
2568 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2569 {
2570 	if (offset) {
2571 		if (offset < 0) {
2572 			/* In the future, this can be allowed to increase limit
2573 			 * of fd index into fd_array, interpreted as u16.
2574 			 */
2575 			verbose(env, "negative offset disallowed for kernel module function call\n");
2576 			return ERR_PTR(-EINVAL);
2577 		}
2578 
2579 		return __find_kfunc_desc_btf(env, offset);
2580 	}
2581 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2582 }
2583 
2584 #define KF_IMPL_SUFFIX "_impl"
2585 
2586 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2587 						    struct btf *btf,
2588 						    const char *func_name)
2589 {
2590 	char *buf = env->tmp_str_buf;
2591 	const struct btf_type *func;
2592 	s32 impl_id;
2593 	int len;
2594 
2595 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2596 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2597 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2598 		return NULL;
2599 	}
2600 
2601 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2602 	if (impl_id <= 0) {
2603 		verbose(env, "cannot find function %s in BTF\n", buf);
2604 		return NULL;
2605 	}
2606 
2607 	func = btf_type_by_id(btf, impl_id);
2608 
2609 	return btf_type_by_id(btf, func->type);
2610 }
2611 
2612 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2613 			    s32 func_id,
2614 			    s16 offset,
2615 			    struct bpf_kfunc_meta *kfunc)
2616 {
2617 	const struct btf_type *func, *func_proto;
2618 	const char *func_name;
2619 	u32 *kfunc_flags;
2620 	struct btf *btf;
2621 
2622 	if (func_id <= 0) {
2623 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2624 		return -EINVAL;
2625 	}
2626 
2627 	btf = find_kfunc_desc_btf(env, offset);
2628 	if (IS_ERR(btf)) {
2629 		verbose(env, "failed to find BTF for kernel function\n");
2630 		return PTR_ERR(btf);
2631 	}
2632 
2633 	/*
2634 	 * Note that kfunc_flags may be NULL at this point, which
2635 	 * means that we couldn't find func_id in any relevant
2636 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2637 	 * call.  However we don't fail with an error here,
2638 	 * and let the caller decide what to do with NULL kfunc->flags.
2639 	 */
2640 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2641 
2642 	func = btf_type_by_id(btf, func_id);
2643 	if (!func || !btf_type_is_func(func)) {
2644 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2645 		return -EINVAL;
2646 	}
2647 
2648 	func_name = btf_name_by_offset(btf, func->name_off);
2649 
2650 	/*
2651 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2652 	 * can be found through the counterpart _impl kfunc.
2653 	 */
2654 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2655 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2656 	else
2657 		func_proto = btf_type_by_id(btf, func->type);
2658 
2659 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2660 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2661 			func_id);
2662 		return -EINVAL;
2663 	}
2664 
2665 	memset(kfunc, 0, sizeof(*kfunc));
2666 	kfunc->btf = btf;
2667 	kfunc->id = func_id;
2668 	kfunc->name = func_name;
2669 	kfunc->proto = func_proto;
2670 	kfunc->flags = kfunc_flags;
2671 
2672 	return 0;
2673 }
2674 
2675 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2676 {
2677 	struct bpf_kfunc_btf_tab *btf_tab;
2678 	struct btf_func_model func_model;
2679 	struct bpf_kfunc_desc_tab *tab;
2680 	struct bpf_prog_aux *prog_aux;
2681 	struct bpf_kfunc_meta kfunc;
2682 	struct bpf_kfunc_desc *desc;
2683 	unsigned long addr;
2684 	int err;
2685 
2686 	prog_aux = env->prog->aux;
2687 	tab = prog_aux->kfunc_tab;
2688 	btf_tab = prog_aux->kfunc_btf_tab;
2689 	if (!tab) {
2690 		if (!btf_vmlinux) {
2691 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2692 			return -ENOTSUPP;
2693 		}
2694 
2695 		if (!env->prog->jit_requested) {
2696 			verbose(env, "JIT is required for calling kernel function\n");
2697 			return -ENOTSUPP;
2698 		}
2699 
2700 		if (!bpf_jit_supports_kfunc_call()) {
2701 			verbose(env, "JIT does not support calling kernel function\n");
2702 			return -ENOTSUPP;
2703 		}
2704 
2705 		if (!env->prog->gpl_compatible) {
2706 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2707 			return -EINVAL;
2708 		}
2709 
2710 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
2711 		if (!tab)
2712 			return -ENOMEM;
2713 		prog_aux->kfunc_tab = tab;
2714 	}
2715 
2716 	/* func_id == 0 is always invalid, but instead of returning an error, be
2717 	 * conservative and wait until the code elimination pass before returning
2718 	 * error, so that invalid calls that get pruned out can be in BPF programs
2719 	 * loaded from userspace.  It is also required that offset be untouched
2720 	 * for such calls.
2721 	 */
2722 	if (!func_id && !offset)
2723 		return 0;
2724 
2725 	if (!btf_tab && offset) {
2726 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
2727 		if (!btf_tab)
2728 			return -ENOMEM;
2729 		prog_aux->kfunc_btf_tab = btf_tab;
2730 	}
2731 
2732 	if (find_kfunc_desc(env->prog, func_id, offset))
2733 		return 0;
2734 
2735 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2736 		verbose(env, "too many different kernel function calls\n");
2737 		return -E2BIG;
2738 	}
2739 
2740 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
2741 	if (err)
2742 		return err;
2743 
2744 	addr = kallsyms_lookup_name(kfunc.name);
2745 	if (!addr) {
2746 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
2747 		return -EINVAL;
2748 	}
2749 
2750 	if (bpf_dev_bound_kfunc_id(func_id)) {
2751 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2752 		if (err)
2753 			return err;
2754 	}
2755 
2756 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
2757 	if (err)
2758 		return err;
2759 
2760 	desc = &tab->descs[tab->nr_descs++];
2761 	desc->func_id = func_id;
2762 	desc->offset = offset;
2763 	desc->addr = addr;
2764 	desc->func_model = func_model;
2765 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2766 	     kfunc_desc_cmp_by_id_off, NULL);
2767 	return 0;
2768 }
2769 
2770 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2771 {
2772 	return !!prog->aux->kfunc_tab;
2773 }
2774 
2775 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2776 {
2777 	struct bpf_subprog_info *subprog = env->subprog_info;
2778 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2779 	struct bpf_insn *insn = env->prog->insnsi;
2780 
2781 	/* Add entry function. */
2782 	ret = add_subprog(env, 0);
2783 	if (ret)
2784 		return ret;
2785 
2786 	for (i = 0; i < insn_cnt; i++, insn++) {
2787 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2788 		    !bpf_pseudo_kfunc_call(insn))
2789 			continue;
2790 
2791 		if (!env->bpf_capable) {
2792 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2793 			return -EPERM;
2794 		}
2795 
2796 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2797 			ret = add_subprog(env, i + insn->imm + 1);
2798 		else
2799 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
2800 
2801 		if (ret < 0)
2802 			return ret;
2803 	}
2804 
2805 	ret = bpf_find_exception_callback_insn_off(env);
2806 	if (ret < 0)
2807 		return ret;
2808 	ex_cb_insn = ret;
2809 
2810 	/* If ex_cb_insn > 0, this means that the main program has a subprog
2811 	 * marked using BTF decl tag to serve as the exception callback.
2812 	 */
2813 	if (ex_cb_insn) {
2814 		ret = add_subprog(env, ex_cb_insn);
2815 		if (ret < 0)
2816 			return ret;
2817 		for (i = 1; i < env->subprog_cnt; i++) {
2818 			if (env->subprog_info[i].start != ex_cb_insn)
2819 				continue;
2820 			env->exception_callback_subprog = i;
2821 			bpf_mark_subprog_exc_cb(env, i);
2822 			break;
2823 		}
2824 	}
2825 
2826 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2827 	 * logic. 'subprog_cnt' should not be increased.
2828 	 */
2829 	subprog[env->subprog_cnt].start = insn_cnt;
2830 
2831 	if (env->log.level & BPF_LOG_LEVEL2)
2832 		for (i = 0; i < env->subprog_cnt; i++)
2833 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2834 
2835 	return 0;
2836 }
2837 
2838 static int check_subprogs(struct bpf_verifier_env *env)
2839 {
2840 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2841 	struct bpf_subprog_info *subprog = env->subprog_info;
2842 	struct bpf_insn *insn = env->prog->insnsi;
2843 	int insn_cnt = env->prog->len;
2844 
2845 	/* now check that all jumps are within the same subprog */
2846 	subprog_start = subprog[cur_subprog].start;
2847 	subprog_end = subprog[cur_subprog + 1].start;
2848 	for (i = 0; i < insn_cnt; i++) {
2849 		u8 code = insn[i].code;
2850 
2851 		if (code == (BPF_JMP | BPF_CALL) &&
2852 		    insn[i].src_reg == 0 &&
2853 		    insn[i].imm == BPF_FUNC_tail_call) {
2854 			subprog[cur_subprog].has_tail_call = true;
2855 			subprog[cur_subprog].tail_call_reachable = true;
2856 		}
2857 		if (BPF_CLASS(code) == BPF_LD &&
2858 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2859 			subprog[cur_subprog].has_ld_abs = true;
2860 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2861 			goto next;
2862 		if (BPF_OP(code) == BPF_CALL)
2863 			goto next;
2864 		if (BPF_OP(code) == BPF_EXIT) {
2865 			subprog[cur_subprog].exit_idx = i;
2866 			goto next;
2867 		}
2868 		off = i + bpf_jmp_offset(&insn[i]) + 1;
2869 		if (off < subprog_start || off >= subprog_end) {
2870 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2871 			return -EINVAL;
2872 		}
2873 next:
2874 		if (i == subprog_end - 1) {
2875 			/* to avoid fall-through from one subprog into another
2876 			 * the last insn of the subprog should be either exit
2877 			 * or unconditional jump back or bpf_throw call
2878 			 */
2879 			if (code != (BPF_JMP | BPF_EXIT) &&
2880 			    code != (BPF_JMP32 | BPF_JA) &&
2881 			    code != (BPF_JMP | BPF_JA)) {
2882 				verbose(env, "last insn is not an exit or jmp\n");
2883 				return -EINVAL;
2884 			}
2885 			subprog_start = subprog_end;
2886 			cur_subprog++;
2887 			if (cur_subprog < env->subprog_cnt)
2888 				subprog_end = subprog[cur_subprog + 1].start;
2889 		}
2890 	}
2891 	return 0;
2892 }
2893 
2894 /*
2895  * Sort subprogs in topological order so that leaf subprogs come first and
2896  * their callers come later. This is a DFS post-order traversal of the call
2897  * graph. Scan only reachable instructions (those in the computed postorder) of
2898  * the current subprog to discover callees (direct subprogs and sync
2899  * callbacks).
2900  */
2901 static int sort_subprogs_topo(struct bpf_verifier_env *env)
2902 {
2903 	struct bpf_subprog_info *si = env->subprog_info;
2904 	int *insn_postorder = env->cfg.insn_postorder;
2905 	struct bpf_insn *insn = env->prog->insnsi;
2906 	int cnt = env->subprog_cnt;
2907 	int *dfs_stack = NULL;
2908 	int top = 0, order = 0;
2909 	int i, ret = 0;
2910 	u8 *color = NULL;
2911 
2912 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
2913 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
2914 	if (!color || !dfs_stack) {
2915 		ret = -ENOMEM;
2916 		goto out;
2917 	}
2918 
2919 	/*
2920 	 * DFS post-order traversal.
2921 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
2922 	 */
2923 	for (i = 0; i < cnt; i++) {
2924 		if (color[i])
2925 			continue;
2926 		color[i] = 1;
2927 		dfs_stack[top++] = i;
2928 
2929 		while (top > 0) {
2930 			int cur = dfs_stack[top - 1];
2931 			int po_start = si[cur].postorder_start;
2932 			int po_end = si[cur + 1].postorder_start;
2933 			bool pushed = false;
2934 			int j;
2935 
2936 			for (j = po_start; j < po_end; j++) {
2937 				int idx = insn_postorder[j];
2938 				int callee;
2939 
2940 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
2941 					continue;
2942 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
2943 				if (callee < 0) {
2944 					ret = -EFAULT;
2945 					goto out;
2946 				}
2947 				if (color[callee] == 2)
2948 					continue;
2949 				if (color[callee] == 1) {
2950 					if (bpf_pseudo_func(&insn[idx]))
2951 						continue;
2952 					verbose(env, "recursive call from %s() to %s()\n",
2953 						subprog_name(env, cur),
2954 						subprog_name(env, callee));
2955 					ret = -EINVAL;
2956 					goto out;
2957 				}
2958 				color[callee] = 1;
2959 				dfs_stack[top++] = callee;
2960 				pushed = true;
2961 				break;
2962 			}
2963 
2964 			if (!pushed) {
2965 				color[cur] = 2;
2966 				env->subprog_topo_order[order++] = cur;
2967 				top--;
2968 			}
2969 		}
2970 	}
2971 
2972 	if (env->log.level & BPF_LOG_LEVEL2)
2973 		for (i = 0; i < cnt; i++)
2974 			verbose(env, "topo_order[%d] = %s\n",
2975 				i, subprog_name(env, env->subprog_topo_order[i]));
2976 out:
2977 	kvfree(dfs_stack);
2978 	kvfree(color);
2979 	return ret;
2980 }
2981 
2982 static void mark_stack_slots_scratched(struct bpf_verifier_env *env,
2983 				       int spi, int nr_slots)
2984 {
2985 	int i;
2986 
2987 	for (i = 0; i < nr_slots; i++)
2988 		mark_stack_slot_scratched(env, spi - i);
2989 }
2990 
2991 /* This function is supposed to be used by the following 32-bit optimization
2992  * code only. It returns TRUE if the source or destination register operates
2993  * on 64-bit, otherwise return FALSE.
2994  */
2995 bool bpf_is_reg64(struct bpf_insn *insn,
2996 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
2997 {
2998 	u8 code, class, op;
2999 
3000 	code = insn->code;
3001 	class = BPF_CLASS(code);
3002 	op = BPF_OP(code);
3003 	if (class == BPF_JMP) {
3004 		/* BPF_EXIT for "main" will reach here. Return TRUE
3005 		 * conservatively.
3006 		 */
3007 		if (op == BPF_EXIT)
3008 			return true;
3009 		if (op == BPF_CALL) {
3010 			/* BPF to BPF call will reach here because of marking
3011 			 * caller saved clobber with DST_OP_NO_MARK for which we
3012 			 * don't care the register def because they are anyway
3013 			 * marked as NOT_INIT already.
3014 			 */
3015 			if (insn->src_reg == BPF_PSEUDO_CALL)
3016 				return false;
3017 			/* Helper call will reach here because of arg type
3018 			 * check, conservatively return TRUE.
3019 			 */
3020 			if (t == SRC_OP)
3021 				return true;
3022 
3023 			return false;
3024 		}
3025 	}
3026 
3027 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3028 		return false;
3029 
3030 	if (class == BPF_ALU64 || class == BPF_JMP ||
3031 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3032 		return true;
3033 
3034 	if (class == BPF_ALU || class == BPF_JMP32)
3035 		return false;
3036 
3037 	if (class == BPF_LDX) {
3038 		if (t != SRC_OP)
3039 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3040 		/* LDX source must be ptr. */
3041 		return true;
3042 	}
3043 
3044 	if (class == BPF_STX) {
3045 		/* BPF_STX (including atomic variants) has one or more source
3046 		 * operands, one of which is a ptr. Check whether the caller is
3047 		 * asking about it.
3048 		 */
3049 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3050 			return true;
3051 		return BPF_SIZE(code) == BPF_DW;
3052 	}
3053 
3054 	if (class == BPF_LD) {
3055 		u8 mode = BPF_MODE(code);
3056 
3057 		/* LD_IMM64 */
3058 		if (mode == BPF_IMM)
3059 			return true;
3060 
3061 		/* Both LD_IND and LD_ABS return 32-bit data. */
3062 		if (t != SRC_OP)
3063 			return  false;
3064 
3065 		/* Implicit ctx ptr. */
3066 		if (regno == BPF_REG_6)
3067 			return true;
3068 
3069 		/* Explicit source could be any width. */
3070 		return true;
3071 	}
3072 
3073 	if (class == BPF_ST)
3074 		/* The only source register for BPF_ST is a ptr. */
3075 		return true;
3076 
3077 	/* Conservatively return true at default. */
3078 	return true;
3079 }
3080 
3081 static void mark_insn_zext(struct bpf_verifier_env *env,
3082 			   struct bpf_reg_state *reg)
3083 {
3084 	s32 def_idx = reg->subreg_def;
3085 
3086 	if (def_idx == DEF_NOT_SUBREG)
3087 		return;
3088 
3089 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3090 	/* The dst will be zero extended, so won't be sub-register anymore. */
3091 	reg->subreg_def = DEF_NOT_SUBREG;
3092 }
3093 
3094 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3095 			   enum bpf_reg_arg_type t)
3096 {
3097 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3098 	struct bpf_reg_state *reg;
3099 	bool rw64;
3100 
3101 	mark_reg_scratched(env, regno);
3102 
3103 	reg = &regs[regno];
3104 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3105 	if (t == SRC_OP) {
3106 		/* check whether register used as source operand can be read */
3107 		if (reg->type == NOT_INIT) {
3108 			verbose(env, "R%d !read_ok\n", regno);
3109 			return -EACCES;
3110 		}
3111 		/* We don't need to worry about FP liveness because it's read-only */
3112 		if (regno == BPF_REG_FP)
3113 			return 0;
3114 
3115 		if (rw64)
3116 			mark_insn_zext(env, reg);
3117 
3118 		return 0;
3119 	} else {
3120 		/* check whether register used as dest operand can be written to */
3121 		if (regno == BPF_REG_FP) {
3122 			verbose(env, "frame pointer is read only\n");
3123 			return -EACCES;
3124 		}
3125 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3126 		if (t == DST_OP)
3127 			mark_reg_unknown(env, regs, regno);
3128 	}
3129 	return 0;
3130 }
3131 
3132 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3133 			 enum bpf_reg_arg_type t)
3134 {
3135 	struct bpf_verifier_state *vstate = env->cur_state;
3136 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3137 
3138 	return __check_reg_arg(env, state->regs, regno, t);
3139 }
3140 
3141 static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
3142 {
3143 	env->insn_aux_data[idx].indirect_target = true;
3144 }
3145 
3146 #define LR_FRAMENO_BITS	3
3147 #define LR_SPI_BITS	6
3148 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3149 #define LR_SIZE_BITS	4
3150 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3151 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3152 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3153 #define LR_SPI_OFF	LR_FRAMENO_BITS
3154 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3155 #define LINKED_REGS_MAX	6
3156 
3157 struct linked_reg {
3158 	u8 frameno;
3159 	union {
3160 		u8 spi;
3161 		u8 regno;
3162 	};
3163 	bool is_reg;
3164 };
3165 
3166 struct linked_regs {
3167 	int cnt;
3168 	struct linked_reg entries[LINKED_REGS_MAX];
3169 };
3170 
3171 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3172 {
3173 	if (s->cnt < LINKED_REGS_MAX)
3174 		return &s->entries[s->cnt++];
3175 
3176 	return NULL;
3177 }
3178 
3179 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3180  * number of elements currently in stack.
3181  * Pack one history entry for linked registers as 10 bits in the following format:
3182  * - 3-bits frameno
3183  * - 6-bits spi_or_reg
3184  * - 1-bit  is_reg
3185  */
3186 static u64 linked_regs_pack(struct linked_regs *s)
3187 {
3188 	u64 val = 0;
3189 	int i;
3190 
3191 	for (i = 0; i < s->cnt; ++i) {
3192 		struct linked_reg *e = &s->entries[i];
3193 		u64 tmp = 0;
3194 
3195 		tmp |= e->frameno;
3196 		tmp |= e->spi << LR_SPI_OFF;
3197 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3198 
3199 		val <<= LR_ENTRY_BITS;
3200 		val |= tmp;
3201 	}
3202 	val <<= LR_SIZE_BITS;
3203 	val |= s->cnt;
3204 	return val;
3205 }
3206 
3207 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3208 {
3209 	int i;
3210 
3211 	s->cnt = val & LR_SIZE_MASK;
3212 	val >>= LR_SIZE_BITS;
3213 
3214 	for (i = 0; i < s->cnt; ++i) {
3215 		struct linked_reg *e = &s->entries[i];
3216 
3217 		e->frameno =  val & LR_FRAMENO_MASK;
3218 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3219 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3220 		val >>= LR_ENTRY_BITS;
3221 	}
3222 }
3223 
3224 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3225 {
3226 	const struct btf_type *func;
3227 	struct btf *desc_btf;
3228 
3229 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3230 		return NULL;
3231 
3232 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3233 	if (IS_ERR(desc_btf))
3234 		return "<error>";
3235 
3236 	func = btf_type_by_id(desc_btf, insn->imm);
3237 	return btf_name_by_offset(desc_btf, func->name_off);
3238 }
3239 
3240 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3241 {
3242 	const struct bpf_insn_cbs cbs = {
3243 		.cb_call	= disasm_kfunc_name,
3244 		.cb_print	= verbose,
3245 		.private_data	= env,
3246 	};
3247 
3248 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3249 }
3250 
3251 /* If any register R in hist->linked_regs is marked as precise in bt,
3252  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3253  */
3254 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3255 {
3256 	struct linked_regs linked_regs;
3257 	bool some_precise = false;
3258 	int i;
3259 
3260 	if (!hist || hist->linked_regs == 0)
3261 		return;
3262 
3263 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3264 	for (i = 0; i < linked_regs.cnt; ++i) {
3265 		struct linked_reg *e = &linked_regs.entries[i];
3266 
3267 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3268 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3269 			some_precise = true;
3270 			break;
3271 		}
3272 	}
3273 
3274 	if (!some_precise)
3275 		return;
3276 
3277 	for (i = 0; i < linked_regs.cnt; ++i) {
3278 		struct linked_reg *e = &linked_regs.entries[i];
3279 
3280 		if (e->is_reg)
3281 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3282 		else
3283 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3284 	}
3285 }
3286 
3287 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3288 {
3289 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3290 }
3291 
3292 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3293  * desired reg and stack masks across all relevant frames
3294  */
3295 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3296 				      struct bpf_verifier_state *starting_state)
3297 {
3298 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3299 }
3300 
3301 static bool is_spillable_regtype(enum bpf_reg_type type)
3302 {
3303 	switch (base_type(type)) {
3304 	case PTR_TO_MAP_VALUE:
3305 	case PTR_TO_STACK:
3306 	case PTR_TO_CTX:
3307 	case PTR_TO_PACKET:
3308 	case PTR_TO_PACKET_META:
3309 	case PTR_TO_PACKET_END:
3310 	case PTR_TO_FLOW_KEYS:
3311 	case CONST_PTR_TO_MAP:
3312 	case PTR_TO_SOCKET:
3313 	case PTR_TO_SOCK_COMMON:
3314 	case PTR_TO_TCP_SOCK:
3315 	case PTR_TO_XDP_SOCK:
3316 	case PTR_TO_BTF_ID:
3317 	case PTR_TO_BUF:
3318 	case PTR_TO_MEM:
3319 	case PTR_TO_FUNC:
3320 	case PTR_TO_MAP_KEY:
3321 	case PTR_TO_ARENA:
3322 		return true;
3323 	default:
3324 		return false;
3325 	}
3326 }
3327 
3328 
3329 /* check if register is a constant scalar value */
3330 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3331 {
3332 	return reg->type == SCALAR_VALUE &&
3333 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3334 }
3335 
3336 /* assuming is_reg_const() is true, return constant value of a register */
3337 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3338 {
3339 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3340 }
3341 
3342 static bool __is_pointer_value(bool allow_ptr_leaks,
3343 			       const struct bpf_reg_state *reg)
3344 {
3345 	if (allow_ptr_leaks)
3346 		return false;
3347 
3348 	return reg->type != SCALAR_VALUE;
3349 }
3350 
3351 static void clear_scalar_id(struct bpf_reg_state *reg)
3352 {
3353 	reg->id = 0;
3354 	reg->delta = 0;
3355 }
3356 
3357 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3358 					struct bpf_reg_state *src_reg)
3359 {
3360 	if (src_reg->type != SCALAR_VALUE)
3361 		return;
3362 	/*
3363 	 * The verifier is processing rX = rY insn and
3364 	 * rY->id has special linked register already.
3365 	 * Cleared it, since multiple rX += const are not supported.
3366 	 */
3367 	if (src_reg->id & BPF_ADD_CONST)
3368 		clear_scalar_id(src_reg);
3369 	/*
3370 	 * Ensure that src_reg has a valid ID that will be copied to
3371 	 * dst_reg and then will be used by sync_linked_regs() to
3372 	 * propagate min/max range.
3373 	 */
3374 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3375 		src_reg->id = ++env->id_gen;
3376 }
3377 
3378 static void save_register_state(struct bpf_verifier_env *env,
3379 				struct bpf_func_state *state,
3380 				int spi, struct bpf_reg_state *reg,
3381 				int size)
3382 {
3383 	int i;
3384 
3385 	state->stack[spi].spilled_ptr = *reg;
3386 
3387 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3388 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3389 
3390 	/* size < 8 bytes spill */
3391 	for (; i; i--)
3392 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3393 }
3394 
3395 static bool is_bpf_st_mem(struct bpf_insn *insn)
3396 {
3397 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3398 }
3399 
3400 static int get_reg_width(struct bpf_reg_state *reg)
3401 {
3402 	return fls64(reg_umax(reg));
3403 }
3404 
3405 /* See comment for mark_fastcall_pattern_for_call() */
3406 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3407 					  struct bpf_func_state *state, int insn_idx, int off)
3408 {
3409 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3410 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3411 	int i;
3412 
3413 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3414 		return;
3415 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3416 	 * from something that is not a part of the fastcall pattern,
3417 	 * disable fastcall rewrites for current subprogram by setting
3418 	 * fastcall_stack_off to a value smaller than any possible offset.
3419 	 */
3420 	subprog->fastcall_stack_off = S16_MIN;
3421 	/* reset fastcall aux flags within subprogram,
3422 	 * happens at most once per subprogram
3423 	 */
3424 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3425 		aux[i].fastcall_spills_num = 0;
3426 		aux[i].fastcall_pattern = 0;
3427 	}
3428 }
3429 
3430 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3431 {
3432 	int i;
3433 
3434 	/* regular write of data into stack destroys any spilled ptr */
3435 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3436 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3437 	if (is_stack_slot_special(&state->stack[spi]))
3438 		for (i = 0; i < BPF_REG_SIZE; i++)
3439 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3440 }
3441 
3442 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3443  * stack boundary and alignment are checked in check_mem_access()
3444  */
3445 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3446 				       /* stack frame we're writing to */
3447 				       struct bpf_func_state *state,
3448 				       int off, int size, int value_regno,
3449 				       int insn_idx)
3450 {
3451 	struct bpf_func_state *cur; /* state of the current function */
3452 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3453 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3454 	struct bpf_reg_state *reg = NULL;
3455 	int insn_flags = INSN_F_STACK_ACCESS;
3456 	int hist_spi = spi, hist_frame = state->frameno;
3457 
3458 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3459 	 * so it's aligned access and [off, off + size) are within stack limits
3460 	 */
3461 	if (!env->allow_ptr_leaks &&
3462 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3463 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3464 	    size != BPF_REG_SIZE) {
3465 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3466 		return -EACCES;
3467 	}
3468 
3469 	cur = env->cur_state->frame[env->cur_state->curframe];
3470 	if (value_regno >= 0)
3471 		reg = &cur->regs[value_regno];
3472 	if (!env->bypass_spec_v4) {
3473 		bool sanitize = reg && is_spillable_regtype(reg->type);
3474 
3475 		for (i = 0; i < size; i++) {
3476 			u8 type = state->stack[spi].slot_type[i];
3477 
3478 			if (type != STACK_MISC && type != STACK_ZERO) {
3479 				sanitize = true;
3480 				break;
3481 			}
3482 		}
3483 
3484 		if (sanitize)
3485 			env->insn_aux_data[insn_idx].nospec_result = true;
3486 	}
3487 
3488 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3489 	if (err)
3490 		return err;
3491 
3492 	check_fastcall_stack_contract(env, state, insn_idx, off);
3493 	mark_stack_slot_scratched(env, spi);
3494 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3495 		bool reg_value_fits;
3496 
3497 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3498 		/* Make sure that reg had an ID to build a relation on spill. */
3499 		if (reg_value_fits)
3500 			assign_scalar_id_before_mov(env, reg);
3501 		save_register_state(env, state, spi, reg, size);
3502 		/* Break the relation on a narrowing spill. */
3503 		if (!reg_value_fits)
3504 			state->stack[spi].spilled_ptr.id = 0;
3505 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3506 		   env->bpf_capable) {
3507 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3508 
3509 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3510 		__mark_reg_known(tmp_reg, insn->imm);
3511 		tmp_reg->type = SCALAR_VALUE;
3512 		save_register_state(env, state, spi, tmp_reg, size);
3513 	} else if (reg && is_spillable_regtype(reg->type)) {
3514 		/* register containing pointer is being spilled into stack */
3515 		if (size != BPF_REG_SIZE) {
3516 			verbose_linfo(env, insn_idx, "; ");
3517 			verbose(env, "invalid size of register spill\n");
3518 			return -EACCES;
3519 		}
3520 		if (state != cur && reg->type == PTR_TO_STACK) {
3521 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3522 			return -EINVAL;
3523 		}
3524 		save_register_state(env, state, spi, reg, size);
3525 	} else {
3526 		u8 type = STACK_MISC;
3527 
3528 		scrub_special_slot(state, spi);
3529 
3530 		/* when we zero initialize stack slots mark them as such */
3531 		if ((reg && bpf_register_is_null(reg)) ||
3532 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3533 			/* STACK_ZERO case happened because register spill
3534 			 * wasn't properly aligned at the stack slot boundary,
3535 			 * so it's not a register spill anymore; force
3536 			 * originating register to be precise to make
3537 			 * STACK_ZERO correct for subsequent states
3538 			 */
3539 			err = mark_chain_precision(env, value_regno);
3540 			if (err)
3541 				return err;
3542 			type = STACK_ZERO;
3543 		}
3544 
3545 		/* Mark slots affected by this stack write. */
3546 		for (i = 0; i < size; i++)
3547 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3548 		insn_flags = 0; /* not a register spill */
3549 	}
3550 
3551 	if (insn_flags)
3552 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3553 					    hist_spi, hist_frame, 0);
3554 	return 0;
3555 }
3556 
3557 /* Write the stack: 'stack[ptr_reg + off] = value_regno'. 'ptr_reg' is
3558  * known to contain a variable offset.
3559  * This function checks whether the write is permitted and conservatively
3560  * tracks the effects of the write, considering that each stack slot in the
3561  * dynamic range is potentially written to.
3562  *
3563  * 'value_regno' can be -1, meaning that an unknown value is being written to
3564  * the stack.
3565  *
3566  * Spilled pointers in range are not marked as written because we don't know
3567  * what's going to be actually written. This means that read propagation for
3568  * future reads cannot be terminated by this write.
3569  *
3570  * For privileged programs, uninitialized stack slots are considered
3571  * initialized by this write (even though we don't know exactly what offsets
3572  * are going to be written to). The idea is that we don't want the verifier to
3573  * reject future reads that access slots written to through variable offsets.
3574  */
3575 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3576 				     /* func where register points to */
3577 				     struct bpf_func_state *state,
3578 				     struct bpf_reg_state *ptr_reg, int off, int size,
3579 				     int value_regno, int insn_idx)
3580 {
3581 	struct bpf_func_state *cur; /* state of the current function */
3582 	int min_off, max_off;
3583 	int i, err;
3584 	struct bpf_reg_state *value_reg = NULL;
3585 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3586 	bool writing_zero = false;
3587 	/* set if the fact that we're writing a zero is used to let any
3588 	 * stack slots remain STACK_ZERO
3589 	 */
3590 	bool zero_used = false;
3591 
3592 	cur = env->cur_state->frame[env->cur_state->curframe];
3593 	min_off = reg_smin(ptr_reg) + off;
3594 	max_off = reg_smax(ptr_reg) + off + size;
3595 	if (value_regno >= 0)
3596 		value_reg = &cur->regs[value_regno];
3597 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3598 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3599 		writing_zero = true;
3600 
3601 	for (i = min_off; i < max_off; i++) {
3602 		int spi;
3603 
3604 		spi = bpf_get_spi(i);
3605 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3606 		if (err)
3607 			return err;
3608 	}
3609 
3610 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3611 	/* Variable offset writes destroy any spilled pointers in range. */
3612 	for (i = min_off; i < max_off; i++) {
3613 		u8 new_type, *stype;
3614 		int slot, spi;
3615 
3616 		slot = -i - 1;
3617 		spi = slot / BPF_REG_SIZE;
3618 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3619 		mark_stack_slot_scratched(env, spi);
3620 
3621 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3622 			/* Reject the write if range we may write to has not
3623 			 * been initialized beforehand. If we didn't reject
3624 			 * here, the ptr status would be erased below (even
3625 			 * though not all slots are actually overwritten),
3626 			 * possibly opening the door to leaks.
3627 			 *
3628 			 * We do however catch STACK_INVALID case below, and
3629 			 * only allow reading possibly uninitialized memory
3630 			 * later for CAP_PERFMON, as the write may not happen to
3631 			 * that slot.
3632 			 */
3633 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3634 				insn_idx, i);
3635 			return -EINVAL;
3636 		}
3637 
3638 		/* If writing_zero and the spi slot contains a spill of value 0,
3639 		 * maintain the spill type.
3640 		 */
3641 		if (writing_zero && *stype == STACK_SPILL &&
3642 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
3643 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
3644 
3645 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
3646 				zero_used = true;
3647 				continue;
3648 			}
3649 		}
3650 
3651 		/*
3652 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
3653 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
3654 		 * and valid program is rejected by check_stack_read_fixed_off()
3655 		 * with obscure "invalid size of register fill" message.
3656 		 */
3657 		scrub_special_slot(state, spi);
3658 
3659 		/* Update the slot type. */
3660 		new_type = STACK_MISC;
3661 		if (writing_zero && *stype == STACK_ZERO) {
3662 			new_type = STACK_ZERO;
3663 			zero_used = true;
3664 		}
3665 		/* If the slot is STACK_INVALID, we check whether it's OK to
3666 		 * pretend that it will be initialized by this write. The slot
3667 		 * might not actually be written to, and so if we mark it as
3668 		 * initialized future reads might leak uninitialized memory.
3669 		 * For privileged programs, we will accept such reads to slots
3670 		 * that may or may not be written because, if we're reject
3671 		 * them, the error would be too confusing.
3672 		 * Conservatively, treat STACK_POISON in a similar way.
3673 		 */
3674 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
3675 		    !env->allow_uninit_stack) {
3676 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3677 					insn_idx, i);
3678 			return -EINVAL;
3679 		}
3680 		*stype = new_type;
3681 	}
3682 	if (zero_used) {
3683 		/* backtracking doesn't work for STACK_ZERO yet. */
3684 		err = mark_chain_precision(env, value_regno);
3685 		if (err)
3686 			return err;
3687 	}
3688 	return 0;
3689 }
3690 
3691 /* When register 'dst_regno' is assigned some values from stack[min_off,
3692  * max_off), we set the register's type according to the types of the
3693  * respective stack slots. If all the stack values are known to be zeros, then
3694  * so is the destination reg. Otherwise, the register is considered to be
3695  * SCALAR. This function does not deal with register filling; the caller must
3696  * ensure that all spilled registers in the stack range have been marked as
3697  * read.
3698  */
3699 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3700 				/* func where src register points to */
3701 				struct bpf_func_state *ptr_state,
3702 				int min_off, int max_off, int dst_regno)
3703 {
3704 	struct bpf_verifier_state *vstate = env->cur_state;
3705 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3706 	int i, slot, spi;
3707 	u8 *stype;
3708 	int zeros = 0;
3709 
3710 	for (i = min_off; i < max_off; i++) {
3711 		slot = -i - 1;
3712 		spi = slot / BPF_REG_SIZE;
3713 		mark_stack_slot_scratched(env, spi);
3714 		stype = ptr_state->stack[spi].slot_type;
3715 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3716 			break;
3717 		zeros++;
3718 	}
3719 	if (zeros == max_off - min_off) {
3720 		/* Any access_size read into register is zero extended,
3721 		 * so the whole register == const_zero.
3722 		 */
3723 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
3724 	} else {
3725 		/* have read misc data from the stack */
3726 		mark_reg_unknown(env, state->regs, dst_regno);
3727 	}
3728 }
3729 
3730 /* Read the stack at 'off' and put the results into the register indicated by
3731  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3732  * spilled reg.
3733  *
3734  * 'dst_regno' can be -1, meaning that the read value is not going to a
3735  * register.
3736  *
3737  * The access is assumed to be within the current stack bounds.
3738  */
3739 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3740 				      /* func where src register points to */
3741 				      struct bpf_func_state *reg_state,
3742 				      int off, int size, int dst_regno)
3743 {
3744 	struct bpf_verifier_state *vstate = env->cur_state;
3745 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3746 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3747 	struct bpf_reg_state *reg;
3748 	u8 *stype, type;
3749 	int insn_flags = INSN_F_STACK_ACCESS;
3750 	int hist_spi = spi, hist_frame = reg_state->frameno;
3751 
3752 	stype = reg_state->stack[spi].slot_type;
3753 	reg = &reg_state->stack[spi].spilled_ptr;
3754 
3755 	mark_stack_slot_scratched(env, spi);
3756 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
3757 
3758 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
3759 		u8 spill_size = 1;
3760 
3761 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3762 			spill_size++;
3763 
3764 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3765 			if (reg->type != SCALAR_VALUE) {
3766 				verbose_linfo(env, env->insn_idx, "; ");
3767 				verbose(env, "invalid size of register fill\n");
3768 				return -EACCES;
3769 			}
3770 
3771 			if (dst_regno < 0)
3772 				return 0;
3773 
3774 			if (size <= spill_size &&
3775 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
3776 				/* The earlier check_reg_arg() has decided the
3777 				 * subreg_def for this insn.  Save it first.
3778 				 */
3779 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3780 
3781 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
3782 				    get_reg_width(reg) <= 32)
3783 					/* Ensure stack slot has an ID to build a relation
3784 					 * with the destination register on fill.
3785 					 */
3786 					assign_scalar_id_before_mov(env, reg);
3787 				state->regs[dst_regno] = *reg;
3788 				state->regs[dst_regno].subreg_def = subreg_def;
3789 
3790 				/* Break the relation on a narrowing fill.
3791 				 * coerce_reg_to_size will adjust the boundaries.
3792 				 */
3793 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
3794 					clear_scalar_id(&state->regs[dst_regno]);
3795 			} else {
3796 				int spill_cnt = 0, zero_cnt = 0;
3797 
3798 				for (i = 0; i < size; i++) {
3799 					type = stype[(slot - i) % BPF_REG_SIZE];
3800 					if (type == STACK_SPILL) {
3801 						spill_cnt++;
3802 						continue;
3803 					}
3804 					if (type == STACK_MISC)
3805 						continue;
3806 					if (type == STACK_ZERO) {
3807 						zero_cnt++;
3808 						continue;
3809 					}
3810 					if (type == STACK_INVALID && env->allow_uninit_stack)
3811 						continue;
3812 					if (type == STACK_POISON) {
3813 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3814 							off, i, size);
3815 					} else {
3816 						verbose(env, "invalid read from stack off %d+%d size %d\n",
3817 							off, i, size);
3818 					}
3819 					return -EACCES;
3820 				}
3821 
3822 				if (spill_cnt == size &&
3823 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
3824 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3825 					/* this IS register fill, so keep insn_flags */
3826 				} else if (zero_cnt == size) {
3827 					/* similarly to mark_reg_stack_read(), preserve zeroes */
3828 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3829 					insn_flags = 0; /* not restoring original register state */
3830 				} else {
3831 					mark_reg_unknown(env, state->regs, dst_regno);
3832 					insn_flags = 0; /* not restoring original register state */
3833 				}
3834 			}
3835 		} else if (dst_regno >= 0) {
3836 			/* restore register state from stack */
3837 			if (env->bpf_capable)
3838 				/* Ensure stack slot has an ID to build a relation
3839 				 * with the destination register on fill.
3840 				 */
3841 				assign_scalar_id_before_mov(env, reg);
3842 			state->regs[dst_regno] = *reg;
3843 			/* mark reg as written since spilled pointer state likely
3844 			 * has its liveness marks cleared by is_state_visited()
3845 			 * which resets stack/reg liveness for state transitions
3846 			 */
3847 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3848 			/* If dst_regno==-1, the caller is asking us whether
3849 			 * it is acceptable to use this value as a SCALAR_VALUE
3850 			 * (e.g. for XADD).
3851 			 * We must not allow unprivileged callers to do that
3852 			 * with spilled pointers.
3853 			 */
3854 			verbose(env, "leaking pointer from stack off %d\n",
3855 				off);
3856 			return -EACCES;
3857 		}
3858 	} else {
3859 		for (i = 0; i < size; i++) {
3860 			type = stype[(slot - i) % BPF_REG_SIZE];
3861 			if (type == STACK_MISC)
3862 				continue;
3863 			if (type == STACK_ZERO)
3864 				continue;
3865 			if (type == STACK_INVALID && env->allow_uninit_stack)
3866 				continue;
3867 			if (type == STACK_POISON) {
3868 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3869 					off, i, size);
3870 			} else {
3871 				verbose(env, "invalid read from stack off %d+%d size %d\n",
3872 					off, i, size);
3873 			}
3874 			return -EACCES;
3875 		}
3876 		if (dst_regno >= 0)
3877 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3878 		insn_flags = 0; /* we are not restoring spilled register */
3879 	}
3880 	if (insn_flags)
3881 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3882 					    hist_spi, hist_frame, 0);
3883 	return 0;
3884 }
3885 
3886 enum bpf_access_src {
3887 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3888 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3889 };
3890 
3891 static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3892 					 argno_t argno, int off, int access_size,
3893 					 bool zero_size_allowed,
3894 					 enum bpf_access_type type,
3895 					 struct bpf_call_arg_meta *meta);
3896 
3897 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3898 {
3899 	return cur_regs(env) + regno;
3900 }
3901 
3902 /* Read the stack at 'reg + off' and put the result into the register
3903  * 'dst_regno'.
3904  * 'off' includes the pointer register's fixed offset(i.e. 'reg->off'),
3905  * but not its variable offset.
3906  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3907  *
3908  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3909  * filling registers (i.e. reads of spilled register cannot be detected when
3910  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3911  * SCALAR_VALUE. That's why we assert that the 'reg' has a variable
3912  * offset; for a fixed offset check_stack_read_fixed_off should be used
3913  * instead.
3914  */
3915 static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3916 				    argno_t ptr_argno, int off, int size, int dst_regno)
3917 {
3918 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
3919 	int err;
3920 	int min_off, max_off;
3921 
3922 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3923 	 */
3924 	err = check_stack_range_initialized(env, reg, ptr_argno, off, size,
3925 					    false, BPF_READ, NULL);
3926 	if (err)
3927 		return err;
3928 
3929 	min_off = reg_smin(reg) + off;
3930 	max_off = reg_smax(reg) + off;
3931 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3932 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
3933 	return 0;
3934 }
3935 
3936 /* check_stack_read dispatches to check_stack_read_fixed_off or
3937  * check_stack_read_var_off.
3938  *
3939  * The caller must ensure that the offset falls within the allocated stack
3940  * bounds.
3941  *
3942  * 'dst_regno' is a register which will receive the value from the stack. It
3943  * can be -1, meaning that the read value is not going to a register.
3944  */
3945 static int check_stack_read(struct bpf_verifier_env *env,
3946 			    struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size,
3947 			    int dst_regno)
3948 {
3949 	struct bpf_func_state *state = bpf_func(env, reg);
3950 	int err;
3951 	/* Some accesses are only permitted with a static offset. */
3952 	bool var_off = !tnum_is_const(reg->var_off);
3953 
3954 	/* The offset is required to be static when reads don't go to a
3955 	 * register, in order to not leak pointers (see
3956 	 * check_stack_read_fixed_off).
3957 	 */
3958 	if (dst_regno < 0 && var_off) {
3959 		char tn_buf[48];
3960 
3961 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3962 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3963 			tn_buf, off, size);
3964 		return -EACCES;
3965 	}
3966 	/* Variable offset is prohibited for unprivileged mode for simplicity
3967 	 * since it requires corresponding support in Spectre masking for stack
3968 	 * ALU. See also retrieve_ptr_limit(). The check in
3969 	 * check_stack_access_for_ptr_arithmetic() called by
3970 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
3971 	 * with variable offsets, therefore no check is required here. Further,
3972 	 * just checking it here would be insufficient as speculative stack
3973 	 * writes could still lead to unsafe speculative behaviour.
3974 	 */
3975 	if (!var_off) {
3976 		off += reg->var_off.value;
3977 		err = check_stack_read_fixed_off(env, state, off, size,
3978 						 dst_regno);
3979 	} else {
3980 		/* Variable offset stack reads need more conservative handling
3981 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3982 		 * branch.
3983 		 */
3984 		err = check_stack_read_var_off(env, reg, ptr_argno, off, size,
3985 					       dst_regno);
3986 	}
3987 	return err;
3988 }
3989 
3990 
3991 /* check_stack_write dispatches to check_stack_write_fixed_off or
3992  * check_stack_write_var_off.
3993  *
3994  * 'reg' is the register used as a pointer into the stack.
3995  * 'value_regno' is the register whose value we're writing to the stack. It can
3996  * be -1, meaning that we're not writing from a register.
3997  *
3998  * The caller must ensure that the offset falls within the maximum stack size.
3999  */
4000 static int check_stack_write(struct bpf_verifier_env *env,
4001 			     struct bpf_reg_state *reg, int off, int size,
4002 			     int value_regno, int insn_idx)
4003 {
4004 	struct bpf_func_state *state = bpf_func(env, reg);
4005 	int err;
4006 
4007 	if (tnum_is_const(reg->var_off)) {
4008 		off += reg->var_off.value;
4009 		err = check_stack_write_fixed_off(env, state, off, size,
4010 						  value_regno, insn_idx);
4011 	} else {
4012 		/* Variable offset stack reads need more conservative handling
4013 		 * than fixed offset ones.
4014 		 */
4015 		err = check_stack_write_var_off(env, state,
4016 						reg, off, size,
4017 						value_regno, insn_idx);
4018 	}
4019 	return err;
4020 }
4021 
4022 /*
4023  * Write a value to the outgoing stack arg area.
4024  * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7).
4025  */
4026 static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state,
4027 				 int off, struct bpf_reg_state *value_reg)
4028 {
4029 	int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS;
4030 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4031 	int spi = -off / BPF_REG_SIZE - 1;
4032 	struct bpf_reg_state *arg;
4033 	int err;
4034 
4035 	if (spi >= max_stack_arg_regs) {
4036 		verbose(env, "stack arg write offset %d exceeds max %d stack args\n",
4037 			off, max_stack_arg_regs);
4038 		return -EINVAL;
4039 	}
4040 
4041 	err = grow_stack_arg_slots(env, state, spi + 1);
4042 	if (err)
4043 		return err;
4044 
4045 	/* Track the max outgoing stack arg slot count. */
4046 	if (spi + 1 > subprog->max_out_stack_arg_cnt)
4047 		subprog->max_out_stack_arg_cnt = spi + 1;
4048 
4049 	if (value_reg) {
4050 		state->stack_arg_regs[spi] = *value_reg;
4051 	} else {
4052 		/* BPF_ST: store immediate, treat as scalar */
4053 		arg = &state->stack_arg_regs[spi];
4054 		arg->type = SCALAR_VALUE;
4055 		__mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm);
4056 	}
4057 	state->no_stack_arg_load = true;
4058 	return bpf_push_jmp_history(env, env->cur_state,
4059 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4060 }
4061 
4062 /*
4063  * Read a value from the incoming stack arg area.
4064  * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7).
4065  */
4066 static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state,
4067 				int off, int dst_regno)
4068 {
4069 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4070 	struct bpf_verifier_state *vstate = env->cur_state;
4071 	int spi = off / BPF_REG_SIZE - 1;
4072 	struct bpf_func_state *caller, *cur;
4073 	struct bpf_reg_state *arg;
4074 
4075 	if (state->no_stack_arg_load) {
4076 		verbose(env, "r11 load must be before any r11 store or call insn\n");
4077 		return -EINVAL;
4078 	}
4079 
4080 	if (spi + 1 > bpf_in_stack_arg_cnt(subprog)) {
4081 		verbose(env, "invalid read from stack arg off %d depth %d\n",
4082 			off, bpf_in_stack_arg_cnt(subprog) * BPF_REG_SIZE);
4083 		return -EACCES;
4084 	}
4085 
4086 	caller = vstate->frame[vstate->curframe - 1];
4087 	arg = &caller->stack_arg_regs[spi];
4088 	cur = vstate->frame[vstate->curframe];
4089 	cur->regs[dst_regno] = *arg;
4090 	return bpf_push_jmp_history(env, env->cur_state,
4091 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4092 }
4093 
4094 static int mark_stack_arg_precision(struct bpf_verifier_env *env, int arg_idx)
4095 {
4096 	struct bpf_func_state *caller = cur_func(env);
4097 	int spi = arg_idx - MAX_BPF_FUNC_REG_ARGS;
4098 
4099 	bt_set_frame_stack_arg_slot(&env->bt, caller->frameno, spi);
4100 	return mark_chain_precision_batch(env, env->cur_state);
4101 }
4102 
4103 static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller,
4104 				     int nargs)
4105 {
4106 	int i, spi;
4107 
4108 	for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) {
4109 		spi = i - MAX_BPF_FUNC_REG_ARGS;
4110 		if (spi >= caller->out_stack_arg_cnt ||
4111 		    caller->stack_arg_regs[spi].type == NOT_INIT) {
4112 			verbose(env, "callee expects %d args, stack arg%d is not initialized\n",
4113 				nargs, spi + 1);
4114 			return -EFAULT;
4115 		}
4116 	}
4117 
4118 	return 0;
4119 }
4120 
4121 static struct bpf_reg_state *get_func_arg_reg(struct bpf_func_state *caller,
4122 					      struct bpf_reg_state *regs, int arg)
4123 {
4124 	if (arg < MAX_BPF_FUNC_REG_ARGS)
4125 		return &regs[arg + 1];
4126 
4127 	return &caller->stack_arg_regs[arg - MAX_BPF_FUNC_REG_ARGS];
4128 }
4129 
4130 static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4131 				 int off, int size, enum bpf_access_type type)
4132 {
4133 	struct bpf_map *map = reg->map_ptr;
4134 	u32 cap = bpf_map_flags_to_cap(map);
4135 
4136 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4137 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4138 			map->value_size, reg_smin(reg) + off, size);
4139 		return -EACCES;
4140 	}
4141 
4142 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4143 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4144 			map->value_size, reg_smin(reg) + off, size);
4145 		return -EACCES;
4146 	}
4147 
4148 	return 0;
4149 }
4150 
4151 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4152 static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4153 			      int off, int size, u32 mem_size,
4154 			      bool zero_size_allowed)
4155 {
4156 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4157 
4158 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4159 		return 0;
4160 
4161 	switch (reg->type) {
4162 	case PTR_TO_MAP_KEY:
4163 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4164 			mem_size, off, size);
4165 		break;
4166 	case PTR_TO_MAP_VALUE:
4167 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4168 			mem_size, off, size);
4169 		break;
4170 	case PTR_TO_PACKET:
4171 	case PTR_TO_PACKET_META:
4172 	case PTR_TO_PACKET_END:
4173 		verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n",
4174 			off, size, reg_arg_name(env, argno), reg->id, off, mem_size);
4175 		break;
4176 	case PTR_TO_CTX:
4177 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4178 			mem_size, off, size);
4179 		break;
4180 	case PTR_TO_MEM:
4181 	default:
4182 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4183 			mem_size, off, size);
4184 	}
4185 
4186 	return -EACCES;
4187 }
4188 
4189 /* check read/write into a memory region with possible variable offset */
4190 static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4191 				   int off, int size, u32 mem_size,
4192 				   bool zero_size_allowed)
4193 {
4194 	int err;
4195 
4196 	/* We may have adjusted the register pointing to memory region, so we
4197 	 * need to try adding each of min_value and max_value to off
4198 	 * to make sure our theoretical access will be safe.
4199 	 *
4200 	 * The minimum value is only important with signed
4201 	 * comparisons where we can't assume the floor of a
4202 	 * value is 0.  If we are using signed variables for our
4203 	 * index'es we need to make sure that whatever we use
4204 	 * will have a set floor within our range.
4205 	 */
4206 	if (reg_smin(reg) < 0 &&
4207 	    (reg_smin(reg) == S64_MIN ||
4208 	     (off + reg_smin(reg) != (s64)(s32)(off + reg_smin(reg))) ||
4209 	      reg_smin(reg) + off < 0)) {
4210 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4211 			reg_arg_name(env, argno));
4212 		return -EACCES;
4213 	}
4214 	err = __check_mem_access(env, reg, argno, reg_smin(reg) + off, size,
4215 				 mem_size, zero_size_allowed);
4216 	if (err) {
4217 		verbose(env, "%s min value is outside of the allowed memory range\n",
4218 			reg_arg_name(env, argno));
4219 		return err;
4220 	}
4221 
4222 	/* If we haven't set a max value then we need to bail since we can't be
4223 	 * sure we won't do bad things.
4224 	 * If reg_umax(reg) + off could overflow, treat that as unbounded too.
4225 	 */
4226 	if (reg_umax(reg) >= BPF_MAX_VAR_OFF) {
4227 		verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n",
4228 			reg_arg_name(env, argno));
4229 		return -EACCES;
4230 	}
4231 	err = __check_mem_access(env, reg, argno, reg_umax(reg) + off, size,
4232 				 mem_size, zero_size_allowed);
4233 	if (err) {
4234 		verbose(env, "%s max value is outside of the allowed memory range\n",
4235 			reg_arg_name(env, argno));
4236 		return err;
4237 	}
4238 
4239 	return 0;
4240 }
4241 
4242 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4243 			       const struct bpf_reg_state *reg, argno_t argno,
4244 			       bool fixed_off_ok)
4245 {
4246 	/* Access to this pointer-typed register or passing it to a helper
4247 	 * is only allowed in its original, unmodified form.
4248 	 */
4249 
4250 	if (!tnum_is_const(reg->var_off)) {
4251 		char tn_buf[48];
4252 
4253 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4254 		verbose(env, "variable %s access var_off=%s disallowed\n",
4255 			reg_type_str(env, reg->type), tn_buf);
4256 		return -EACCES;
4257 	}
4258 
4259 	if (reg_smin(reg) < 0) {
4260 		verbose(env, "negative offset %s ptr %s off=%lld disallowed\n",
4261 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4262 		return -EACCES;
4263 	}
4264 
4265 	if (!fixed_off_ok && reg->var_off.value != 0) {
4266 		verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n",
4267 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4268 		return -EACCES;
4269 	}
4270 
4271 	return 0;
4272 }
4273 
4274 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4275 		             const struct bpf_reg_state *reg, int regno)
4276 {
4277 	return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
4278 }
4279 
4280 static int map_kptr_match_type(struct bpf_verifier_env *env,
4281 			       struct btf_field *kptr_field,
4282 			       struct bpf_reg_state *reg, u32 regno)
4283 {
4284 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4285 	int perm_flags;
4286 	const char *reg_name = "";
4287 
4288 	if (base_type(reg->type) != PTR_TO_BTF_ID)
4289 		goto bad_type;
4290 
4291 	if (btf_is_kernel(reg->btf)) {
4292 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4293 
4294 		/* Only unreferenced case accepts untrusted pointers */
4295 		if (kptr_field->type == BPF_KPTR_UNREF)
4296 			perm_flags |= PTR_UNTRUSTED;
4297 	} else {
4298 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4299 		if (kptr_field->type == BPF_KPTR_PERCPU)
4300 			perm_flags |= MEM_PERCPU;
4301 	}
4302 
4303 	if (type_flag(reg->type) & ~perm_flags)
4304 		goto bad_type;
4305 
4306 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4307 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4308 
4309 	/* For ref_ptr case, release function check should ensure we get one
4310 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4311 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4312 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4313 	 * reg->id is not needed here.
4314 	 */
4315 	if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true))
4316 		return -EACCES;
4317 
4318 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4319 	 * we also need to take into account the reg->var_off.
4320 	 *
4321 	 * We want to support cases like:
4322 	 *
4323 	 * struct foo {
4324 	 *         struct bar br;
4325 	 *         struct baz bz;
4326 	 * };
4327 	 *
4328 	 * struct foo *v;
4329 	 * v = func();	      // PTR_TO_BTF_ID
4330 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4331 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4332 	 *                    // first member type of struct after comparison fails
4333 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4334 	 *                    // to match type
4335 	 *
4336 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4337 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4338 	 * the struct to match type against first member of struct, i.e. reject
4339 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4340 	 * strict mode to true for type match.
4341 	 */
4342 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4343 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4344 				  kptr_field->type != BPF_KPTR_UNREF))
4345 		goto bad_type;
4346 	return 0;
4347 bad_type:
4348 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4349 		reg_type_str(env, reg->type), reg_name);
4350 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4351 	if (kptr_field->type == BPF_KPTR_UNREF)
4352 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4353 			targ_name);
4354 	else
4355 		verbose(env, "\n");
4356 	return -EINVAL;
4357 }
4358 
4359 static bool in_sleepable(struct bpf_verifier_env *env)
4360 {
4361 	return env->cur_state->in_sleepable;
4362 }
4363 
4364 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4365  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4366  */
4367 static bool in_rcu_cs(struct bpf_verifier_env *env)
4368 {
4369 	return env->cur_state->active_rcu_locks ||
4370 	       env->cur_state->active_locks ||
4371 	       !in_sleepable(env);
4372 }
4373 
4374 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4375 BTF_SET_START(rcu_protected_types)
4376 #ifdef CONFIG_NET
4377 BTF_ID(struct, prog_test_ref_kfunc)
4378 #endif
4379 #ifdef CONFIG_CGROUPS
4380 BTF_ID(struct, cgroup)
4381 #endif
4382 #ifdef CONFIG_BPF_JIT
4383 BTF_ID(struct, bpf_cpumask)
4384 #endif
4385 BTF_ID(struct, task_struct)
4386 #ifdef CONFIG_CRYPTO
4387 BTF_ID(struct, bpf_crypto_ctx)
4388 #endif
4389 BTF_SET_END(rcu_protected_types)
4390 
4391 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4392 {
4393 	if (!btf_is_kernel(btf))
4394 		return true;
4395 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4396 }
4397 
4398 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4399 {
4400 	struct btf_struct_meta *meta;
4401 
4402 	if (btf_is_kernel(kptr_field->kptr.btf))
4403 		return NULL;
4404 
4405 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4406 				    kptr_field->kptr.btf_id);
4407 
4408 	return meta ? meta->record : NULL;
4409 }
4410 
4411 static bool rcu_safe_kptr(const struct btf_field *field)
4412 {
4413 	const struct btf_field_kptr *kptr = &field->kptr;
4414 
4415 	return field->type == BPF_KPTR_PERCPU ||
4416 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4417 }
4418 
4419 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4420 {
4421 	struct btf_record *rec;
4422 	u32 ret;
4423 
4424 	ret = PTR_MAYBE_NULL;
4425 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4426 		ret |= MEM_RCU;
4427 		if (kptr_field->type == BPF_KPTR_PERCPU)
4428 			ret |= MEM_PERCPU;
4429 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4430 			ret |= MEM_ALLOC;
4431 
4432 		rec = kptr_pointee_btf_record(kptr_field);
4433 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4434 			ret |= NON_OWN_REF;
4435 	} else {
4436 		ret |= PTR_UNTRUSTED;
4437 	}
4438 
4439 	return ret;
4440 }
4441 
4442 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4443 			    struct btf_field *field)
4444 {
4445 	struct bpf_reg_state *reg;
4446 	const struct btf_type *t;
4447 
4448 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4449 	mark_reg_known_zero(env, cur_regs(env), regno);
4450 	reg = reg_state(env, regno);
4451 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4452 	reg->mem_size = t->size;
4453 	reg->id = ++env->id_gen;
4454 
4455 	return 0;
4456 }
4457 
4458 static int check_map_kptr_access(struct bpf_verifier_env *env,
4459 				 int value_regno, int insn_idx,
4460 				 struct btf_field *kptr_field)
4461 {
4462 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4463 	int class = BPF_CLASS(insn->code);
4464 	struct bpf_reg_state *val_reg;
4465 	int ret;
4466 
4467 	/* Things we already checked for in check_map_access and caller:
4468 	 *  - Reject cases where variable offset may touch kptr
4469 	 *  - size of access (must be BPF_DW)
4470 	 *  - tnum_is_const(reg->var_off)
4471 	 *  - kptr_field->offset == off + reg->var_off.value
4472 	 */
4473 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4474 	if (BPF_MODE(insn->code) != BPF_MEM) {
4475 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4476 		return -EACCES;
4477 	}
4478 
4479 	/* We only allow loading referenced kptr, since it will be marked as
4480 	 * untrusted, similar to unreferenced kptr.
4481 	 */
4482 	if (class != BPF_LDX &&
4483 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4484 		verbose(env, "store to referenced kptr disallowed\n");
4485 		return -EACCES;
4486 	}
4487 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4488 		verbose(env, "store to uptr disallowed\n");
4489 		return -EACCES;
4490 	}
4491 
4492 	if (class == BPF_LDX) {
4493 		if (kptr_field->type == BPF_UPTR)
4494 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4495 
4496 		/* We can simply mark the value_regno receiving the pointer
4497 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4498 		 */
4499 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4500 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4501 				      btf_ld_kptr_type(env, kptr_field));
4502 		if (ret < 0)
4503 			return ret;
4504 	} else if (class == BPF_STX) {
4505 		val_reg = reg_state(env, value_regno);
4506 		if (!bpf_register_is_null(val_reg) &&
4507 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4508 			return -EACCES;
4509 	} else if (class == BPF_ST) {
4510 		if (insn->imm) {
4511 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4512 				kptr_field->offset);
4513 			return -EACCES;
4514 		}
4515 	} else {
4516 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4517 		return -EACCES;
4518 	}
4519 	return 0;
4520 }
4521 
4522 /*
4523  * Return the size of the memory region accessible from a pointer to map value.
4524  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4525  */
4526 static u32 map_mem_size(const struct bpf_map *map)
4527 {
4528 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4529 		return map->max_entries * sizeof(long);
4530 
4531 	return map->value_size;
4532 }
4533 
4534 /* check read/write into a map element with possible variable offset */
4535 static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4536 			    int off, int size, bool zero_size_allowed,
4537 			    enum bpf_access_src src)
4538 {
4539 	struct bpf_map *map = reg->map_ptr;
4540 	u32 mem_size = map_mem_size(map);
4541 	struct btf_record *rec;
4542 	int err, i;
4543 
4544 	err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed);
4545 	if (err)
4546 		return err;
4547 
4548 	if (IS_ERR_OR_NULL(map->record))
4549 		return 0;
4550 	rec = map->record;
4551 	for (i = 0; i < rec->cnt; i++) {
4552 		struct btf_field *field = &rec->fields[i];
4553 		u32 p = field->offset;
4554 
4555 		/* If any part of a field  can be touched by load/store, reject
4556 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4557 		 * it is sufficient to check x1 < y2 && y1 < x2.
4558 		 */
4559 		if (reg_smin(reg) + off < p + field->size &&
4560 		    p < reg_umax(reg) + off + size) {
4561 			switch (field->type) {
4562 			case BPF_KPTR_UNREF:
4563 			case BPF_KPTR_REF:
4564 			case BPF_KPTR_PERCPU:
4565 			case BPF_UPTR:
4566 				if (src != ACCESS_DIRECT) {
4567 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4568 						btf_field_type_name(field->type));
4569 					return -EACCES;
4570 				}
4571 				if (!tnum_is_const(reg->var_off)) {
4572 					verbose(env, "%s access cannot have variable offset\n",
4573 						btf_field_type_name(field->type));
4574 					return -EACCES;
4575 				}
4576 				if (p != off + reg->var_off.value) {
4577 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4578 						btf_field_type_name(field->type),
4579 						p, off + reg->var_off.value);
4580 					return -EACCES;
4581 				}
4582 				if (size != bpf_size_to_bytes(BPF_DW)) {
4583 					verbose(env, "%s access size must be BPF_DW\n",
4584 						btf_field_type_name(field->type));
4585 					return -EACCES;
4586 				}
4587 				break;
4588 			default:
4589 				verbose(env, "%s cannot be accessed directly by load/store\n",
4590 					btf_field_type_name(field->type));
4591 				return -EACCES;
4592 			}
4593 		}
4594 	}
4595 	return 0;
4596 }
4597 
4598 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4599 			       const struct bpf_call_arg_meta *meta,
4600 			       enum bpf_access_type t)
4601 {
4602 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4603 
4604 	switch (prog_type) {
4605 	/* Program types only with direct read access go here! */
4606 	case BPF_PROG_TYPE_LWT_IN:
4607 	case BPF_PROG_TYPE_LWT_OUT:
4608 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4609 	case BPF_PROG_TYPE_SK_REUSEPORT:
4610 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4611 	case BPF_PROG_TYPE_CGROUP_SKB:
4612 		if (t == BPF_WRITE)
4613 			return false;
4614 		fallthrough;
4615 
4616 	/* Program types with direct read + write access go here! */
4617 	case BPF_PROG_TYPE_SCHED_CLS:
4618 	case BPF_PROG_TYPE_SCHED_ACT:
4619 	case BPF_PROG_TYPE_XDP:
4620 	case BPF_PROG_TYPE_LWT_XMIT:
4621 	case BPF_PROG_TYPE_SK_SKB:
4622 	case BPF_PROG_TYPE_SK_MSG:
4623 		if (meta)
4624 			return meta->pkt_access;
4625 
4626 		env->seen_direct_write = true;
4627 		return true;
4628 
4629 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4630 		if (t == BPF_WRITE)
4631 			env->seen_direct_write = true;
4632 
4633 		return true;
4634 
4635 	default:
4636 		return false;
4637 	}
4638 }
4639 
4640 static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
4641 			       int size, bool zero_size_allowed)
4642 {
4643 	int err;
4644 
4645 	if (reg->range < 0) {
4646 		verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno));
4647 		return -EINVAL;
4648 	}
4649 
4650 	err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed);
4651 	if (err)
4652 		return err;
4653 
4654 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4655 	 * reg_umax(reg) can't be bigger than MAX_PACKET_OFF which is 0xffff,
4656 	 * otherwise find_good_pkt_pointers would have refused to set range info
4657 	 * that __check_mem_access would have rejected this pkt access.
4658 	 * Therefore, "off + reg_umax(reg) + size - 1" won't overflow u32.
4659 	 */
4660 	env->prog->aux->max_pkt_offset =
4661 		max_t(u32, env->prog->aux->max_pkt_offset,
4662 		      off + reg_umax(reg) + size - 1);
4663 
4664 	return 0;
4665 }
4666 
4667 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4668 {
4669 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4670 }
4671 
4672 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4673 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4674 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4675 {
4676 	if (env->ops->is_valid_access &&
4677 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4678 		/* A non zero info.ctx_field_size indicates that this field is a
4679 		 * candidate for later verifier transformation to load the whole
4680 		 * field and then apply a mask when accessed with a narrower
4681 		 * access than actual ctx access size. A zero info.ctx_field_size
4682 		 * will only allow for whole field access and rejects any other
4683 		 * type of narrower access.
4684 		 */
4685 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4686 			if (info->ref_id &&
4687 			    !find_reference_state(env->cur_state, info->ref_id)) {
4688 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4689 					off);
4690 				return -EACCES;
4691 			}
4692 		} else {
4693 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4694 		}
4695 		/* remember the offset of last byte accessed in ctx */
4696 		if (env->prog->aux->max_ctx_offset < off + size)
4697 			env->prog->aux->max_ctx_offset = off + size;
4698 		return 0;
4699 	}
4700 
4701 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4702 	return -EACCES;
4703 }
4704 
4705 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
4706 			    int off, int access_size, enum bpf_access_type t,
4707 			    struct bpf_insn_access_aux *info)
4708 {
4709 	/*
4710 	 * Program types that don't rewrite ctx accesses can safely
4711 	 * dereference ctx pointers with fixed offsets.
4712 	 */
4713 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4714 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4715 	int err;
4716 
4717 	if (var_off_ok)
4718 		err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false);
4719 	else
4720 		err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok);
4721 	if (err)
4722 		return err;
4723 	off += reg_umax(reg);
4724 
4725 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4726 	if (err)
4727 		verbose_linfo(env, insn_idx, "; ");
4728 	return err;
4729 }
4730 
4731 static int check_flow_keys_access(struct bpf_verifier_env *env,
4732 				  struct bpf_reg_state *reg, argno_t argno,
4733 				  int off, int size)
4734 {
4735 	/* Only a constant offset is allowed here; fold it into off. */
4736 	if (!tnum_is_const(reg->var_off)) {
4737 		char tn_buf[48];
4738 
4739 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4740 		verbose(env, "%s invalid variable offset to flow keys: off=%d, var_off=%s\n",
4741 			reg_arg_name(env, argno), off, tn_buf);
4742 		return -EACCES;
4743 	}
4744 	off += reg->var_off.value;
4745 
4746 	if (size < 0 || off < 0 ||
4747 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4748 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4749 			off, size);
4750 		return -EACCES;
4751 	}
4752 	return 0;
4753 }
4754 
4755 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4756 			     struct bpf_reg_state *reg, argno_t argno, int off, int size,
4757 			     enum bpf_access_type t)
4758 {
4759 	struct bpf_insn_access_aux info = {};
4760 	bool valid;
4761 
4762 	if (reg_smin(reg) < 0) {
4763 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4764 			reg_arg_name(env, argno));
4765 		return -EACCES;
4766 	}
4767 
4768 	switch (reg->type) {
4769 	case PTR_TO_SOCK_COMMON:
4770 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4771 		break;
4772 	case PTR_TO_SOCKET:
4773 		valid = bpf_sock_is_valid_access(off, size, t, &info);
4774 		break;
4775 	case PTR_TO_TCP_SOCK:
4776 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4777 		break;
4778 	case PTR_TO_XDP_SOCK:
4779 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4780 		break;
4781 	default:
4782 		valid = false;
4783 	}
4784 
4785 
4786 	if (valid) {
4787 		env->insn_aux_data[insn_idx].ctx_field_size =
4788 			info.ctx_field_size;
4789 		return 0;
4790 	}
4791 
4792 	verbose(env, "%s invalid %s access off=%d size=%d\n",
4793 		reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size);
4794 
4795 	return -EACCES;
4796 }
4797 
4798 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4799 {
4800 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4801 }
4802 
4803 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4804 {
4805 	const struct bpf_reg_state *reg = reg_state(env, regno);
4806 
4807 	return reg->type == PTR_TO_CTX;
4808 }
4809 
4810 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4811 {
4812 	const struct bpf_reg_state *reg = reg_state(env, regno);
4813 
4814 	return type_is_sk_pointer(reg->type);
4815 }
4816 
4817 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4818 {
4819 	const struct bpf_reg_state *reg = reg_state(env, regno);
4820 
4821 	return type_is_pkt_pointer(reg->type);
4822 }
4823 
4824 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4825 {
4826 	const struct bpf_reg_state *reg = reg_state(env, regno);
4827 
4828 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4829 	return reg->type == PTR_TO_FLOW_KEYS;
4830 }
4831 
4832 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
4833 {
4834 	const struct bpf_reg_state *reg = reg_state(env, regno);
4835 
4836 	return reg->type == PTR_TO_ARENA;
4837 }
4838 
4839 /* Return false if @regno contains a pointer whose type isn't supported for
4840  * atomic instruction @insn.
4841  */
4842 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
4843 			       struct bpf_insn *insn)
4844 {
4845 	if (is_ctx_reg(env, regno))
4846 		return false;
4847 	if (is_pkt_reg(env, regno))
4848 		return false;
4849 	if (is_flow_key_reg(env, regno))
4850 		return false;
4851 	if (is_sk_reg(env, regno))
4852 		return false;
4853 	if (is_arena_reg(env, regno))
4854 		return bpf_jit_supports_insn(insn, true);
4855 
4856 	return true;
4857 }
4858 
4859 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
4860 #ifdef CONFIG_NET
4861 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
4862 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4863 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
4864 #endif
4865 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
4866 };
4867 
4868 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
4869 {
4870 	/* A referenced register is always trusted. */
4871 	if (reg_is_referenced(env, reg))
4872 		return true;
4873 
4874 	/* Types listed in the reg2btf_ids are always trusted */
4875 	if (reg2btf_ids[base_type(reg->type)] &&
4876 	    !bpf_type_has_unsafe_modifiers(reg->type))
4877 		return true;
4878 
4879 	/* If a register is not referenced, it is trusted if it has the
4880 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4881 	 * other type modifiers may be safe, but we elect to take an opt-in
4882 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4883 	 * not.
4884 	 *
4885 	 * Eventually, we should make PTR_TRUSTED the single source of truth
4886 	 * for whether a register is trusted.
4887 	 */
4888 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4889 	       !bpf_type_has_unsafe_modifiers(reg->type);
4890 }
4891 
4892 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4893 {
4894 	return reg->type & MEM_RCU;
4895 }
4896 
4897 static void clear_trusted_flags(enum bpf_type_flag *flag)
4898 {
4899 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
4900 }
4901 
4902 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4903 				   const struct bpf_reg_state *reg,
4904 				   int off, int size, bool strict)
4905 {
4906 	struct tnum reg_off;
4907 	int ip_align;
4908 
4909 	/* Byte size accesses are always allowed. */
4910 	if (!strict || size == 1)
4911 		return 0;
4912 
4913 	/* For platforms that do not have a Kconfig enabling
4914 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4915 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4916 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4917 	 * to this code only in strict mode where we want to emulate
4918 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4919 	 * unconditional IP align value of '2'.
4920 	 */
4921 	ip_align = 2;
4922 
4923 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
4924 	if (!tnum_is_aligned(reg_off, size)) {
4925 		char tn_buf[48];
4926 
4927 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4928 		verbose(env,
4929 			"misaligned packet access off %d+%s+%d size %d\n",
4930 			ip_align, tn_buf, off, size);
4931 		return -EACCES;
4932 	}
4933 
4934 	return 0;
4935 }
4936 
4937 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4938 				       const struct bpf_reg_state *reg,
4939 				       const char *pointer_desc,
4940 				       int off, int size, bool strict)
4941 {
4942 	struct tnum reg_off;
4943 
4944 	/* Byte size accesses are always allowed. */
4945 	if (!strict || size == 1)
4946 		return 0;
4947 
4948 	reg_off = tnum_add(reg->var_off, tnum_const(off));
4949 	if (!tnum_is_aligned(reg_off, size)) {
4950 		char tn_buf[48];
4951 
4952 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4953 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
4954 			pointer_desc, tn_buf, off, size);
4955 		return -EACCES;
4956 	}
4957 
4958 	return 0;
4959 }
4960 
4961 static int check_ptr_alignment(struct bpf_verifier_env *env,
4962 			       const struct bpf_reg_state *reg, int off,
4963 			       int size, bool strict_alignment_once)
4964 {
4965 	bool strict = env->strict_alignment || strict_alignment_once;
4966 	const char *pointer_desc = "";
4967 
4968 	switch (reg->type) {
4969 	case PTR_TO_PACKET:
4970 	case PTR_TO_PACKET_META:
4971 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4972 		 * right in front, treat it the very same way.
4973 		 */
4974 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4975 	case PTR_TO_FLOW_KEYS:
4976 		pointer_desc = "flow keys ";
4977 		break;
4978 	case PTR_TO_MAP_KEY:
4979 		pointer_desc = "key ";
4980 		break;
4981 	case PTR_TO_MAP_VALUE:
4982 		pointer_desc = "value ";
4983 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4984 			strict = true;
4985 		break;
4986 	case PTR_TO_CTX:
4987 		pointer_desc = "context ";
4988 		break;
4989 	case PTR_TO_STACK:
4990 		pointer_desc = "stack ";
4991 		/* The stack spill tracking logic in check_stack_write_fixed_off()
4992 		 * and check_stack_read_fixed_off() relies on stack accesses being
4993 		 * aligned.
4994 		 */
4995 		strict = true;
4996 		break;
4997 	case PTR_TO_SOCKET:
4998 		pointer_desc = "sock ";
4999 		break;
5000 	case PTR_TO_SOCK_COMMON:
5001 		pointer_desc = "sock_common ";
5002 		break;
5003 	case PTR_TO_TCP_SOCK:
5004 		pointer_desc = "tcp_sock ";
5005 		break;
5006 	case PTR_TO_XDP_SOCK:
5007 		pointer_desc = "xdp_sock ";
5008 		break;
5009 	case PTR_TO_ARENA:
5010 		return 0;
5011 	default:
5012 		break;
5013 	}
5014 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5015 					   strict);
5016 }
5017 
5018 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5019 {
5020 	if (!bpf_jit_supports_private_stack())
5021 		return NO_PRIV_STACK;
5022 
5023 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5024 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5025 	 * explicitly.
5026 	 */
5027 	switch (prog->type) {
5028 	case BPF_PROG_TYPE_KPROBE:
5029 	case BPF_PROG_TYPE_TRACEPOINT:
5030 	case BPF_PROG_TYPE_PERF_EVENT:
5031 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5032 		return PRIV_STACK_ADAPTIVE;
5033 	case BPF_PROG_TYPE_TRACING:
5034 	case BPF_PROG_TYPE_LSM:
5035 	case BPF_PROG_TYPE_STRUCT_OPS:
5036 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5037 			return PRIV_STACK_ADAPTIVE;
5038 		fallthrough;
5039 	default:
5040 		break;
5041 	}
5042 
5043 	return NO_PRIV_STACK;
5044 }
5045 
5046 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5047 {
5048 	if (env->prog->jit_requested)
5049 		return round_up(stack_depth, 16);
5050 
5051 	/* round up to 32-bytes, since this is granularity
5052 	 * of interpreter stack size
5053 	 */
5054 	return round_up(max_t(u32, stack_depth, 1), 32);
5055 }
5056 
5057 /* temporary state used for call frame depth calculation */
5058 struct bpf_subprog_call_depth_info {
5059 	int ret_insn; /* caller instruction where we return to. */
5060 	int caller; /* caller subprogram idx */
5061 	int frame; /* # of consecutive static call stack frames on top of stack */
5062 };
5063 
5064 /* starting from main bpf function walk all instructions of the function
5065  * and recursively walk all callees that given function can call.
5066  * Ignore jump and exit insns.
5067  */
5068 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5069 					 struct bpf_subprog_call_depth_info *dinfo,
5070 					 bool priv_stack_supported)
5071 {
5072 	struct bpf_subprog_info *subprog = env->subprog_info;
5073 	struct bpf_insn *insn = env->prog->insnsi;
5074 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5075 	bool tail_call_reachable = false;
5076 	int total;
5077 	int tmp;
5078 
5079 	/* no caller idx */
5080 	dinfo[idx].caller = -1;
5081 
5082 	i = subprog[idx].start;
5083 	if (!priv_stack_supported)
5084 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5085 process_func:
5086 	/* protect against potential stack overflow that might happen when
5087 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5088 	 * depth for such case down to 256 so that the worst case scenario
5089 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5090 	 * 8k).
5091 	 *
5092 	 * To get the idea what might happen, see an example:
5093 	 * func1 -> sub rsp, 128
5094 	 *  subfunc1 -> sub rsp, 256
5095 	 *  tailcall1 -> add rsp, 256
5096 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5097 	 *   subfunc2 -> sub rsp, 64
5098 	 *   subfunc22 -> sub rsp, 128
5099 	 *   tailcall2 -> add rsp, 128
5100 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5101 	 *
5102 	 * tailcall will unwind the current stack frame but it will not get rid
5103 	 * of caller's stack as shown on the example above.
5104 	 */
5105 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5106 		verbose(env,
5107 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5108 			depth);
5109 		return -EACCES;
5110 	}
5111 
5112 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5113 	if (IS_ENABLED(CONFIG_X86_64) && subprog[idx].stack_arg_cnt) {
5114 		/* x86-64 uses R9 for both private stack frame pointer and arg6. */
5115 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5116 	} else if (priv_stack_supported) {
5117 		/* Request private stack support only if the subprog stack
5118 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5119 		 * avoid jit penalty if the stack usage is small.
5120 		 */
5121 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5122 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5123 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5124 	}
5125 
5126 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5127 		if (subprog_depth > env->max_stack_depth)
5128 			env->max_stack_depth = subprog_depth;
5129 		if (subprog_depth > MAX_BPF_STACK) {
5130 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5131 				idx, subprog_depth);
5132 			return -EACCES;
5133 		}
5134 	} else {
5135 		depth += subprog_depth;
5136 		if (depth > env->max_stack_depth)
5137 			env->max_stack_depth = depth;
5138 		if (depth > MAX_BPF_STACK) {
5139 			total = 0;
5140 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5141 				total++;
5142 
5143 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5144 				total, depth);
5145 			return -EACCES;
5146 		}
5147 	}
5148 continue_func:
5149 	subprog_end = subprog[idx + 1].start;
5150 	for (; i < subprog_end; i++) {
5151 		int next_insn, sidx;
5152 
5153 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5154 			bool err = false;
5155 
5156 			if (!bpf_is_throw_kfunc(insn + i))
5157 				continue;
5158 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5159 				if (subprog[tmp].is_cb) {
5160 					err = true;
5161 					break;
5162 				}
5163 			}
5164 			if (!err)
5165 				continue;
5166 			verbose(env,
5167 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5168 				i, idx);
5169 			return -EINVAL;
5170 		}
5171 
5172 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5173 			continue;
5174 		/* remember insn and function to return to */
5175 
5176 		/* find the callee */
5177 		next_insn = i + insn[i].imm + 1;
5178 		sidx = bpf_find_subprog(env, next_insn);
5179 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5180 			return -EFAULT;
5181 		if (subprog[sidx].is_async_cb) {
5182 			if (subprog[sidx].has_tail_call) {
5183 				verifier_bug(env, "subprog has tail_call and async cb");
5184 				return -EFAULT;
5185 			}
5186 			/* async callbacks don't increase bpf prog stack size unless called directly */
5187 			if (!bpf_pseudo_call(insn + i))
5188 				continue;
5189 			if (subprog[sidx].is_exception_cb) {
5190 				verbose(env, "insn %d cannot call exception cb directly", i);
5191 				return -EINVAL;
5192 			}
5193 		}
5194 
5195 		/* store caller info for after we return from callee */
5196 		dinfo[idx].frame = frame;
5197 		dinfo[idx].ret_insn = i + 1;
5198 
5199 		/* push caller idx into callee's dinfo */
5200 		dinfo[sidx].caller = idx;
5201 
5202 		i = next_insn;
5203 
5204 		idx = sidx;
5205 		if (!priv_stack_supported)
5206 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5207 
5208 		if (subprog[idx].has_tail_call)
5209 			tail_call_reachable = true;
5210 
5211 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5212 		if (frame >= MAX_CALL_FRAMES) {
5213 			verbose(env, "the call stack of %d frames is too deep !\n",
5214 				frame);
5215 			return -E2BIG;
5216 		}
5217 		goto process_func;
5218 	}
5219 	/* if tail call got detected across bpf2bpf calls then mark each of the
5220 	 * currently present subprog frames as tail call reachable subprogs;
5221 	 * this info will be utilized by JIT so that we will be preserving the
5222 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5223 	 */
5224 	if (tail_call_reachable) {
5225 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5226 			if (subprog[tmp].is_exception_cb) {
5227 				verbose(env, "cannot tail call within exception cb\n");
5228 				return -EINVAL;
5229 			}
5230 			if (subprog[tmp].stack_arg_cnt) {
5231 				verbose(env, "tail_calls are not allowed in programs with stack args\n");
5232 				return -EINVAL;
5233 			}
5234 			subprog[tmp].tail_call_reachable = true;
5235 		}
5236 	} else if (!idx && subprog[0].has_tail_call && subprog[0].stack_arg_cnt) {
5237 		verbose(env, "tail_calls are not allowed in programs with stack args\n");
5238 		return -EINVAL;
5239 	}
5240 
5241 	if (subprog[0].tail_call_reachable)
5242 		env->prog->aux->tail_call_reachable = true;
5243 
5244 	/* end of for() loop means the last insn of the 'subprog'
5245 	 * was reached. Doesn't matter whether it was JA or EXIT
5246 	 */
5247 	if (frame == 0 && dinfo[idx].caller < 0)
5248 		return 0;
5249 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5250 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5251 
5252 	/* pop caller idx from callee */
5253 	idx = dinfo[idx].caller;
5254 
5255 	/* retrieve caller state from its frame */
5256 	frame = dinfo[idx].frame;
5257 	i = dinfo[idx].ret_insn;
5258 
5259 	/* reset tail_call_reachable to the parent's actual state */
5260 	tail_call_reachable = subprog[idx].tail_call_reachable;
5261 
5262 	goto continue_func;
5263 }
5264 
5265 static int check_max_stack_depth(struct bpf_verifier_env *env)
5266 {
5267 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5268 	struct bpf_subprog_call_depth_info *dinfo;
5269 	struct bpf_subprog_info *si = env->subprog_info;
5270 	bool priv_stack_supported;
5271 	int ret;
5272 
5273 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5274 	if (!dinfo)
5275 		return -ENOMEM;
5276 
5277 	for (int i = 0; i < env->subprog_cnt; i++) {
5278 		if (si[i].has_tail_call) {
5279 			priv_stack_mode = NO_PRIV_STACK;
5280 			break;
5281 		}
5282 	}
5283 
5284 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5285 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5286 
5287 	/* All async_cb subprogs use normal kernel stack. If a particular
5288 	 * subprog appears in both main prog and async_cb subtree, that
5289 	 * subprog will use normal kernel stack to avoid potential nesting.
5290 	 * The reverse subprog traversal ensures when main prog subtree is
5291 	 * checked, the subprogs appearing in async_cb subtrees are already
5292 	 * marked as using normal kernel stack, so stack size checking can
5293 	 * be done properly.
5294 	 */
5295 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5296 		if (!i || si[i].is_async_cb) {
5297 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5298 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5299 					priv_stack_supported);
5300 			if (ret < 0) {
5301 				kvfree(dinfo);
5302 				return ret;
5303 			}
5304 		}
5305 	}
5306 
5307 	for (int i = 0; i < env->subprog_cnt; i++) {
5308 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5309 			env->prog->aux->jits_use_priv_stack = true;
5310 			break;
5311 		}
5312 	}
5313 
5314 	kvfree(dinfo);
5315 
5316 	return 0;
5317 }
5318 
5319 static int __check_buffer_access(struct bpf_verifier_env *env,
5320 				 const char *buf_info,
5321 				 const struct bpf_reg_state *reg,
5322 				 argno_t argno, int off, int size)
5323 {
5324 	if (off < 0) {
5325 		verbose(env,
5326 			"%s invalid %s buffer access: off=%d, size=%d\n",
5327 			reg_arg_name(env, argno), buf_info, off, size);
5328 		return -EACCES;
5329 	}
5330 	if (!tnum_is_const(reg->var_off)) {
5331 		char tn_buf[48];
5332 
5333 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5334 		verbose(env,
5335 			"%s invalid variable buffer offset: off=%d, var_off=%s\n",
5336 			reg_arg_name(env, argno), off, tn_buf);
5337 		return -EACCES;
5338 	}
5339 
5340 	return 0;
5341 }
5342 
5343 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5344 				  const struct bpf_reg_state *reg,
5345 				  argno_t argno, int off, int size)
5346 {
5347 	int err;
5348 
5349 	err = __check_buffer_access(env, "tracepoint", reg, argno, off, size);
5350 	if (err)
5351 		return err;
5352 
5353 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5354 					    env->prog->aux->max_tp_access);
5355 
5356 	return 0;
5357 }
5358 
5359 static int check_buffer_access(struct bpf_verifier_env *env,
5360 			       const struct bpf_reg_state *reg,
5361 			       argno_t argno, int off, int size,
5362 			       bool zero_size_allowed,
5363 			       u32 *max_access)
5364 {
5365 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5366 	int err;
5367 
5368 	err = __check_buffer_access(env, buf_info, reg, argno, off, size);
5369 	if (err)
5370 		return err;
5371 
5372 	*max_access = max(reg->var_off.value + off + size, *max_access);
5373 
5374 	return 0;
5375 }
5376 
5377 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5378 static void zext_32_to_64(struct bpf_reg_state *reg)
5379 {
5380 	reg->var_off = tnum_subreg(reg->var_off);
5381 	reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg));
5382 }
5383 
5384 /* truncate register to smaller size (in bytes)
5385  * must be called with size < BPF_REG_SIZE
5386  */
5387 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5388 {
5389 	u64 mask;
5390 
5391 	/* clear high bits in bit representation */
5392 	reg->var_off = tnum_cast(reg->var_off, size);
5393 
5394 	/* fix arithmetic bounds */
5395 	mask = ((u64)1 << (size * 8)) - 1;
5396 	if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask))
5397 		reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask);
5398 	else
5399 		reg_set_urange64(reg, 0, mask);
5400 
5401 	/* If size is smaller than 32bit register the 32bit register
5402 	 * values are also truncated so we push 64-bit bounds into
5403 	 * 32-bit bounds. Above were truncated < 32-bits already.
5404 	 */
5405 	if (size < 4)
5406 		__mark_reg32_unbounded(reg);
5407 
5408 	reg_bounds_sync(reg);
5409 }
5410 
5411 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5412 {
5413 	if (size == 1) {
5414 		reg_set_srange64(reg, S8_MIN, S8_MAX);
5415 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5416 	} else if (size == 2) {
5417 		reg_set_srange64(reg, S16_MIN, S16_MAX);
5418 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5419 	} else {
5420 		/* size == 4 */
5421 		reg_set_srange64(reg, S32_MIN, S32_MAX);
5422 		reg_set_srange32(reg, S32_MIN, S32_MAX);
5423 	}
5424 	reg->var_off = tnum_unknown;
5425 }
5426 
5427 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5428 {
5429 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5430 	u64 top_smax_value, top_smin_value;
5431 	u64 num_bits = size * 8;
5432 
5433 	if (tnum_is_const(reg->var_off)) {
5434 		u64_cval = reg->var_off.value;
5435 		if (size == 1)
5436 			reg->var_off = tnum_const((s8)u64_cval);
5437 		else if (size == 2)
5438 			reg->var_off = tnum_const((s16)u64_cval);
5439 		else
5440 			/* size == 4 */
5441 			reg->var_off = tnum_const((s32)u64_cval);
5442 
5443 		u64_cval = reg->var_off.value;
5444 		reg->r64 = cnum64_from_urange(u64_cval, u64_cval);
5445 		reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval);
5446 		return;
5447 	}
5448 
5449 	top_smax_value = ((u64)reg_smax(reg) >> num_bits) << num_bits;
5450 	top_smin_value = ((u64)reg_smin(reg) >> num_bits) << num_bits;
5451 
5452 	if (top_smax_value != top_smin_value)
5453 		goto out;
5454 
5455 	/* find the s64_min and s64_min after sign extension */
5456 	if (size == 1) {
5457 		init_s64_max = (s8)reg_smax(reg);
5458 		init_s64_min = (s8)reg_smin(reg);
5459 	} else if (size == 2) {
5460 		init_s64_max = (s16)reg_smax(reg);
5461 		init_s64_min = (s16)reg_smin(reg);
5462 	} else {
5463 		init_s64_max = (s32)reg_smax(reg);
5464 		init_s64_min = (s32)reg_smin(reg);
5465 	}
5466 
5467 	s64_max = max(init_s64_max, init_s64_min);
5468 	s64_min = min(init_s64_max, init_s64_min);
5469 
5470 	/* both of s64_max/s64_min positive or negative */
5471 	if ((s64_max >= 0) == (s64_min >= 0)) {
5472 		reg_set_srange64(reg, s64_min, s64_max);
5473 		reg_set_srange32(reg, s64_min, s64_max);
5474 		reg->var_off = tnum_range(s64_min, s64_max);
5475 		return;
5476 	}
5477 
5478 out:
5479 	set_sext64_default_val(reg, size);
5480 }
5481 
5482 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5483 {
5484 	if (size == 1)
5485 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5486 	else
5487 		/* size == 2 */
5488 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5489 	reg->var_off = tnum_subreg(tnum_unknown);
5490 }
5491 
5492 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5493 {
5494 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5495 	u32 top_smax_value, top_smin_value;
5496 	u32 num_bits = size * 8;
5497 
5498 	if (tnum_is_const(reg->var_off)) {
5499 		u32_val = reg->var_off.value;
5500 		if (size == 1)
5501 			reg->var_off = tnum_const((s8)u32_val);
5502 		else
5503 			reg->var_off = tnum_const((s16)u32_val);
5504 
5505 		u32_val = reg->var_off.value;
5506 		reg_set_srange32(reg, u32_val, u32_val);
5507 		return;
5508 	}
5509 
5510 	top_smax_value = ((u32)reg_s32_max(reg) >> num_bits) << num_bits;
5511 	top_smin_value = ((u32)reg_s32_min(reg) >> num_bits) << num_bits;
5512 
5513 	if (top_smax_value != top_smin_value)
5514 		goto out;
5515 
5516 	/* find the s32_min and s32_min after sign extension */
5517 	if (size == 1) {
5518 		init_s32_max = (s8)reg_s32_max(reg);
5519 		init_s32_min = (s8)reg_s32_min(reg);
5520 	} else {
5521 		/* size == 2 */
5522 		init_s32_max = (s16)reg_s32_max(reg);
5523 		init_s32_min = (s16)reg_s32_min(reg);
5524 	}
5525 	s32_max = max(init_s32_max, init_s32_min);
5526 	s32_min = min(init_s32_max, init_s32_min);
5527 
5528 	if ((s32_min >= 0) == (s32_max >= 0)) {
5529 		reg_set_srange32(reg, s32_min, s32_max);
5530 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5531 		return;
5532 	}
5533 
5534 out:
5535 	set_sext32_default_val(reg, size);
5536 }
5537 
5538 bool bpf_map_is_rdonly(const struct bpf_map *map)
5539 {
5540 	/* A map is considered read-only if the following condition are true:
5541 	 *
5542 	 * 1) BPF program side cannot change any of the map content. The
5543 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5544 	 *    and was set at map creation time.
5545 	 * 2) The map value(s) have been initialized from user space by a
5546 	 *    loader and then "frozen", such that no new map update/delete
5547 	 *    operations from syscall side are possible for the rest of
5548 	 *    the map's lifetime from that point onwards.
5549 	 * 3) Any parallel/pending map update/delete operations from syscall
5550 	 *    side have been completed. Only after that point, it's safe to
5551 	 *    assume that map value(s) are immutable.
5552 	 */
5553 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5554 	       READ_ONCE(map->frozen) &&
5555 	       !bpf_map_write_active(map);
5556 }
5557 
5558 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5559 			bool is_ldsx)
5560 {
5561 	void *ptr;
5562 	u64 addr;
5563 	int err;
5564 
5565 	err = map->ops->map_direct_value_addr(map, &addr, off);
5566 	if (err)
5567 		return err;
5568 	ptr = (void *)(long)addr + off;
5569 
5570 	switch (size) {
5571 	case sizeof(u8):
5572 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5573 		break;
5574 	case sizeof(u16):
5575 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5576 		break;
5577 	case sizeof(u32):
5578 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5579 		break;
5580 	case sizeof(u64):
5581 		*val = *(u64 *)ptr;
5582 		break;
5583 	default:
5584 		return -EINVAL;
5585 	}
5586 	return 0;
5587 }
5588 
5589 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5590 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5591 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5592 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5593 
5594 /*
5595  * Allow list few fields as RCU trusted or full trusted.
5596  * This logic doesn't allow mix tagging and will be removed once GCC supports
5597  * btf_type_tag.
5598  */
5599 
5600 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5601 BTF_TYPE_SAFE_RCU(struct task_struct) {
5602 	const cpumask_t *cpus_ptr;
5603 	struct css_set __rcu *cgroups;
5604 	struct task_struct __rcu *real_parent;
5605 	struct task_struct *group_leader;
5606 };
5607 
5608 BTF_TYPE_SAFE_RCU(struct cgroup) {
5609 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5610 	struct kernfs_node *kn;
5611 };
5612 
5613 BTF_TYPE_SAFE_RCU(struct css_set) {
5614 	struct cgroup *dfl_cgrp;
5615 };
5616 
5617 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5618 	struct cgroup *cgroup;
5619 };
5620 
5621 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5622 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5623 	struct file __rcu *exe_file;
5624 #ifdef CONFIG_MEMCG
5625 	struct task_struct __rcu *owner;
5626 #endif
5627 };
5628 
5629 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5630  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5631  */
5632 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5633 	struct sock *sk;
5634 };
5635 
5636 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5637 	struct sock *sk;
5638 };
5639 
5640 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5641 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5642 	struct seq_file *seq;
5643 };
5644 
5645 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5646 	struct bpf_iter_meta *meta;
5647 	struct task_struct *task;
5648 };
5649 
5650 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5651 	struct file *file;
5652 };
5653 
5654 BTF_TYPE_SAFE_TRUSTED(struct file) {
5655 	struct inode *f_inode;
5656 };
5657 
5658 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5659 	struct inode *d_inode;
5660 };
5661 
5662 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5663 	struct sock *sk;
5664 };
5665 
5666 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5667 	struct mm_struct *vm_mm;
5668 	struct file *vm_file;
5669 };
5670 
5671 static bool type_is_rcu(struct bpf_verifier_env *env,
5672 			struct bpf_reg_state *reg,
5673 			const char *field_name, u32 btf_id)
5674 {
5675 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5676 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5677 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5678 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5679 
5680 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5681 }
5682 
5683 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5684 				struct bpf_reg_state *reg,
5685 				const char *field_name, u32 btf_id)
5686 {
5687 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5688 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5689 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5690 
5691 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5692 }
5693 
5694 static bool type_is_trusted(struct bpf_verifier_env *env,
5695 			    struct bpf_reg_state *reg,
5696 			    const char *field_name, u32 btf_id)
5697 {
5698 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5699 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5700 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5701 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5702 
5703 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5704 }
5705 
5706 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5707 				    struct bpf_reg_state *reg,
5708 				    const char *field_name, u32 btf_id)
5709 {
5710 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5711 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5712 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5713 
5714 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5715 					  "__safe_trusted_or_null");
5716 }
5717 
5718 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5719 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5720 				   argno_t argno, int off, int size,
5721 				   enum bpf_access_type atype,
5722 				   int value_regno)
5723 {
5724 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5725 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5726 	const char *field_name = NULL;
5727 	enum bpf_type_flag flag = 0;
5728 	u32 btf_id = 0;
5729 	int ret;
5730 
5731 	if (!env->allow_ptr_leaks) {
5732 		verbose(env,
5733 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5734 			tname);
5735 		return -EPERM;
5736 	}
5737 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5738 		verbose(env,
5739 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5740 			tname);
5741 		return -EINVAL;
5742 	}
5743 
5744 	if (!tnum_is_const(reg->var_off)) {
5745 		char tn_buf[48];
5746 
5747 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5748 		verbose(env,
5749 			"%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5750 			reg_arg_name(env, argno), tname, off, tn_buf);
5751 		return -EACCES;
5752 	}
5753 
5754 	off += reg->var_off.value;
5755 
5756 	if (off < 0) {
5757 		verbose(env,
5758 			"%s is ptr_%s invalid negative access: off=%d\n",
5759 			reg_arg_name(env, argno), tname, off);
5760 		return -EACCES;
5761 	}
5762 
5763 	if (reg->type & MEM_USER) {
5764 		verbose(env,
5765 			"%s is ptr_%s access user memory: off=%d\n",
5766 			reg_arg_name(env, argno), tname, off);
5767 		return -EACCES;
5768 	}
5769 
5770 	if (reg->type & MEM_PERCPU) {
5771 		verbose(env,
5772 			"%s is ptr_%s access percpu memory: off=%d\n",
5773 			reg_arg_name(env, argno), tname, off);
5774 		return -EACCES;
5775 	}
5776 
5777 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
5778 		if (!btf_is_kernel(reg->btf)) {
5779 			verifier_bug(env, "reg->btf must be kernel btf");
5780 			return -EFAULT;
5781 		}
5782 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
5783 	} else {
5784 		/* Writes are permitted with default btf_struct_access for
5785 		 * program allocated objects (which always have id > 0),
5786 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5787 		 */
5788 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
5789 			verbose(env, "only read is supported\n");
5790 			return -EACCES;
5791 		}
5792 
5793 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5794 		    !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) {
5795 			verifier_bug(env, "allocated object must have a referenced id");
5796 			return -EFAULT;
5797 		}
5798 
5799 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
5800 	}
5801 
5802 	if (ret < 0)
5803 		return ret;
5804 
5805 	if (ret != PTR_TO_BTF_ID) {
5806 		/* just mark; */
5807 
5808 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5809 		/* If this is an untrusted pointer, all pointers formed by walking it
5810 		 * also inherit the untrusted flag.
5811 		 */
5812 		flag = PTR_UNTRUSTED;
5813 
5814 	} else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) {
5815 		/* By default any pointer obtained from walking a trusted pointer is no
5816 		 * longer trusted, unless the field being accessed has explicitly been
5817 		 * marked as inheriting its parent's state of trust (either full or RCU).
5818 		 * For example:
5819 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
5820 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
5821 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5822 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5823 		 *
5824 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
5825 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
5826 		 */
5827 		if (type_is_trusted(env, reg, field_name, btf_id)) {
5828 			flag |= PTR_TRUSTED;
5829 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
5830 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
5831 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5832 			if (type_is_rcu(env, reg, field_name, btf_id)) {
5833 				/* ignore __rcu tag and mark it MEM_RCU */
5834 				flag |= MEM_RCU;
5835 			} else if (flag & MEM_RCU ||
5836 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
5837 				/* __rcu tagged pointers can be NULL */
5838 				flag |= MEM_RCU | PTR_MAYBE_NULL;
5839 
5840 				/* We always trust them */
5841 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
5842 				    flag & PTR_UNTRUSTED)
5843 					flag &= ~PTR_UNTRUSTED;
5844 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
5845 				/* keep as-is */
5846 			} else {
5847 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
5848 				clear_trusted_flags(&flag);
5849 			}
5850 		} else {
5851 			/*
5852 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
5853 			 * aggressively mark as untrusted otherwise such
5854 			 * pointers will be plain PTR_TO_BTF_ID without flags
5855 			 * and will be allowed to be passed into helpers for
5856 			 * compat reasons.
5857 			 */
5858 			flag = PTR_UNTRUSTED;
5859 		}
5860 	} else {
5861 		/* Old compat. Deprecated */
5862 		clear_trusted_flags(&flag);
5863 	}
5864 
5865 	if (atype == BPF_READ && value_regno >= 0) {
5866 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5867 		if (ret < 0)
5868 			return ret;
5869 	}
5870 
5871 	return 0;
5872 }
5873 
5874 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5875 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5876 				   argno_t argno, int off, int size,
5877 				   enum bpf_access_type atype,
5878 				   int value_regno)
5879 {
5880 	struct bpf_map *map = reg->map_ptr;
5881 	struct bpf_reg_state map_reg;
5882 	enum bpf_type_flag flag = 0;
5883 	const struct btf_type *t;
5884 	const char *tname;
5885 	u32 btf_id;
5886 	int ret;
5887 
5888 	if (!btf_vmlinux) {
5889 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5890 		return -ENOTSUPP;
5891 	}
5892 
5893 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5894 		verbose(env, "map_ptr access not supported for map type %d\n",
5895 			map->map_type);
5896 		return -ENOTSUPP;
5897 	}
5898 
5899 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5900 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5901 
5902 	if (!env->allow_ptr_leaks) {
5903 		verbose(env,
5904 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5905 			tname);
5906 		return -EPERM;
5907 	}
5908 
5909 	if (off < 0) {
5910 		verbose(env, "%s is %s invalid negative access: off=%d\n",
5911 			reg_arg_name(env, argno), tname, off);
5912 		return -EACCES;
5913 	}
5914 
5915 	if (atype != BPF_READ) {
5916 		verbose(env, "only read from %s is supported\n", tname);
5917 		return -EACCES;
5918 	}
5919 
5920 	/* Simulate access to a PTR_TO_BTF_ID */
5921 	memset(&map_reg, 0, sizeof(map_reg));
5922 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
5923 			      btf_vmlinux, *map->ops->map_btf_id, 0);
5924 	if (ret < 0)
5925 		return ret;
5926 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
5927 	if (ret < 0)
5928 		return ret;
5929 
5930 	if (value_regno >= 0) {
5931 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5932 		if (ret < 0)
5933 			return ret;
5934 	}
5935 
5936 	return 0;
5937 }
5938 
5939 /* Check that the stack access at the given offset is within bounds. The
5940  * maximum valid offset is -1.
5941  *
5942  * The minimum valid offset is -MAX_BPF_STACK for writes, and
5943  * -state->allocated_stack for reads.
5944  */
5945 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
5946                                           s64 off,
5947                                           struct bpf_func_state *state,
5948                                           enum bpf_access_type t)
5949 {
5950 	int min_valid_off;
5951 
5952 	if (t == BPF_WRITE || env->allow_uninit_stack)
5953 		min_valid_off = -MAX_BPF_STACK;
5954 	else
5955 		min_valid_off = -state->allocated_stack;
5956 
5957 	if (off < min_valid_off || off > -1)
5958 		return -EACCES;
5959 	return 0;
5960 }
5961 
5962 /* Check that the stack access at 'regno + off' falls within the maximum stack
5963  * bounds.
5964  *
5965  * 'off' includes `regno->offset`, but not its dynamic part (if any).
5966  */
5967 static int check_stack_access_within_bounds(
5968 		struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5969 		argno_t argno, int off, int access_size,
5970 		enum bpf_access_type type)
5971 {
5972 	struct bpf_func_state *state = bpf_func(env, reg);
5973 	s64 min_off, max_off;
5974 	int err;
5975 	char *err_extra;
5976 
5977 	if (type == BPF_READ)
5978 		err_extra = " read from";
5979 	else
5980 		err_extra = " write to";
5981 
5982 	if (tnum_is_const(reg->var_off)) {
5983 		min_off = (s64)reg->var_off.value + off;
5984 		max_off = min_off + access_size;
5985 	} else {
5986 		if (reg_smax(reg) >= BPF_MAX_VAR_OFF ||
5987 		    reg_smin(reg) <= -BPF_MAX_VAR_OFF) {
5988 			verbose(env, "invalid unbounded variable-offset%s stack %s\n",
5989 				err_extra, reg_arg_name(env, argno));
5990 			return -EACCES;
5991 		}
5992 		min_off = reg_smin(reg) + off;
5993 		max_off = reg_smax(reg) + off + access_size;
5994 	}
5995 
5996 	err = check_stack_slot_within_bounds(env, min_off, state, type);
5997 	if (!err && max_off > 0)
5998 		err = -EINVAL; /* out of stack access into non-negative offsets */
5999 	if (!err && access_size < 0)
6000 		/* access_size should not be negative (or overflow an int); others checks
6001 		 * along the way should have prevented such an access.
6002 		 */
6003 		err = -EFAULT; /* invalid negative access size; integer overflow? */
6004 
6005 	if (err) {
6006 		if (tnum_is_const(reg->var_off)) {
6007 			verbose(env, "invalid%s stack %s off=%lld size=%d\n",
6008 				err_extra, reg_arg_name(env, argno), min_off, access_size);
6009 		} else {
6010 			char tn_buf[48];
6011 
6012 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6013 			verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n",
6014 				err_extra, reg_arg_name(env, argno), tn_buf, off, access_size);
6015 		}
6016 		return err;
6017 	}
6018 
6019 	/* Note that there is no stack access with offset zero, so the needed stack
6020 	 * size is -min_off, not -min_off+1.
6021 	 */
6022 	return grow_stack_state(env, state, -min_off /* size */);
6023 }
6024 
6025 static bool get_func_retval_range(struct bpf_prog *prog,
6026 				  struct bpf_retval_range *range)
6027 {
6028 	if (prog->type == BPF_PROG_TYPE_LSM &&
6029 		prog->expected_attach_type == BPF_LSM_MAC &&
6030 		!bpf_lsm_get_retval_range(prog, range)) {
6031 		return true;
6032 	}
6033 	return false;
6034 }
6035 
6036 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6037 {
6038 	struct bpf_reg_state fake_reg;
6039 
6040 	if (!val)
6041 		return;
6042 
6043 	fake_reg.type = SCALAR_VALUE;
6044 	__mark_reg_known(&fake_reg, val);
6045 
6046 	scalar32_min_max_add(dst_reg, &fake_reg);
6047 	scalar_min_max_add(dst_reg, &fake_reg);
6048 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6049 
6050 	reg_bounds_sync(dst_reg);
6051 }
6052 
6053 /* check whether memory at (regno + off) is accessible for t = (read | write)
6054  * if t==write, value_regno is a register which value is stored into memory
6055  * if t==read, value_regno is a register which will receive the value from memory
6056  * if t==write && value_regno==-1, some unknown value is stored into memory
6057  * if t==read && value_regno==-1, don't care what we read from memory
6058  */
6059 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
6060 			    int off, int bpf_size, enum bpf_access_type t,
6061 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6062 {
6063 	struct bpf_reg_state *regs = cur_regs(env);
6064 	int size, err = 0;
6065 
6066 	size = bpf_size_to_bytes(bpf_size);
6067 	if (size < 0)
6068 		return size;
6069 
6070 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6071 	if (err)
6072 		return err;
6073 
6074 	if (reg->type == PTR_TO_MAP_KEY) {
6075 		if (t == BPF_WRITE) {
6076 			verbose(env, "write to change key %s not allowed\n",
6077 				reg_arg_name(env, argno));
6078 			return -EACCES;
6079 		}
6080 
6081 		err = check_mem_region_access(env, reg, argno, off, size,
6082 					      reg->map_ptr->key_size, false);
6083 		if (err)
6084 			return err;
6085 		if (value_regno >= 0)
6086 			mark_reg_unknown(env, regs, value_regno);
6087 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6088 		struct btf_field *kptr_field = NULL;
6089 
6090 		if (t == BPF_WRITE && value_regno >= 0 &&
6091 		    is_pointer_value(env, value_regno)) {
6092 			verbose(env, "R%d leaks addr into map\n", value_regno);
6093 			return -EACCES;
6094 		}
6095 		err = check_map_access_type(env, reg, off, size, t);
6096 		if (err)
6097 			return err;
6098 		err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT);
6099 		if (err)
6100 			return err;
6101 		if (tnum_is_const(reg->var_off))
6102 			kptr_field = btf_record_find(reg->map_ptr->record,
6103 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6104 		if (kptr_field) {
6105 			err = check_map_kptr_access(env, value_regno, insn_idx, kptr_field);
6106 		} else if (t == BPF_READ && value_regno >= 0) {
6107 			struct bpf_map *map = reg->map_ptr;
6108 
6109 			/*
6110 			 * If map is read-only, track its contents as scalars,
6111 			 * unless it is an insn array (see the special case below)
6112 			 */
6113 			if (tnum_is_const(reg->var_off) &&
6114 			    bpf_map_is_rdonly(map) &&
6115 			    map->ops->map_direct_value_addr &&
6116 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6117 				int map_off = off + reg->var_off.value;
6118 				u64 val = 0;
6119 
6120 				err = bpf_map_direct_read(map, map_off, size,
6121 							  &val, is_ldsx);
6122 				if (err)
6123 					return err;
6124 
6125 				regs[value_regno].type = SCALAR_VALUE;
6126 				__mark_reg_known(&regs[value_regno], val);
6127 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6128 				if (bpf_size != BPF_DW) {
6129 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6130 						     size);
6131 					return -EACCES;
6132 				}
6133 				regs[value_regno] = *reg;
6134 				add_scalar_to_reg(&regs[value_regno], off);
6135 				regs[value_regno].type = PTR_TO_INSN;
6136 			} else {
6137 				mark_reg_unknown(env, regs, value_regno);
6138 			}
6139 		}
6140 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6141 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6142 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6143 
6144 		if (type_may_be_null(reg->type)) {
6145 			verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6146 				reg_type_str(env, reg->type));
6147 			return -EACCES;
6148 		}
6149 
6150 		if (t == BPF_WRITE && rdonly_mem) {
6151 			verbose(env, "%s cannot write into %s\n",
6152 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6153 			return -EACCES;
6154 		}
6155 
6156 		if (t == BPF_WRITE && value_regno >= 0 &&
6157 		    is_pointer_value(env, value_regno)) {
6158 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6159 			return -EACCES;
6160 		}
6161 
6162 		/*
6163 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6164 		 * instructions, hence no need to check bounds in that case.
6165 		 */
6166 		if (!rdonly_untrusted)
6167 			err = check_mem_region_access(env, reg, argno, off, size,
6168 						      reg->mem_size, false);
6169 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6170 			mark_reg_unknown(env, regs, value_regno);
6171 	} else if (reg->type == PTR_TO_CTX) {
6172 		struct bpf_insn_access_aux info = {
6173 			.reg_type = SCALAR_VALUE,
6174 			.is_ldsx = is_ldsx,
6175 			.log = &env->log,
6176 		};
6177 		struct bpf_retval_range range;
6178 
6179 		if (t == BPF_WRITE && value_regno >= 0 &&
6180 		    is_pointer_value(env, value_regno)) {
6181 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6182 			return -EACCES;
6183 		}
6184 
6185 		err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info);
6186 		if (!err && t == BPF_READ && value_regno >= 0) {
6187 			/* ctx access returns either a scalar, or a
6188 			 * PTR_TO_PACKET[_META,_END]. In the latter
6189 			 * case, we know the offset is zero.
6190 			 */
6191 			if (info.reg_type == SCALAR_VALUE) {
6192 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6193 					err = __mark_reg_s32_range(env, regs, value_regno,
6194 								   range.minval, range.maxval);
6195 					if (err)
6196 						return err;
6197 				} else {
6198 					mark_reg_unknown(env, regs, value_regno);
6199 				}
6200 			} else {
6201 				mark_reg_known_zero(env, regs,
6202 						    value_regno);
6203 				/* A load of ctx field could have different
6204 				 * actual load size with the one encoded in the
6205 				 * insn. When the dst is PTR, it is for sure not
6206 				 * a sub-register.
6207 				 */
6208 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6209 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6210 					regs[value_regno].btf = info.btf;
6211 					regs[value_regno].btf_id = info.btf_id;
6212 					regs[value_regno].id = info.ref_id;
6213 				}
6214 				if (type_may_be_null(info.reg_type) && !regs[value_regno].id)
6215 					regs[value_regno].id = ++env->id_gen;
6216 			}
6217 			regs[value_regno].type = info.reg_type;
6218 		}
6219 
6220 	} else if (reg->type == PTR_TO_STACK) {
6221 		/* Basic bounds checks. */
6222 		err = check_stack_access_within_bounds(env, reg, argno, off, size, t);
6223 		if (err)
6224 			return err;
6225 
6226 		if (t == BPF_READ)
6227 			err = check_stack_read(env, reg, argno, off, size,
6228 					       value_regno);
6229 		else
6230 			err = check_stack_write(env, reg, off, size,
6231 						value_regno, insn_idx);
6232 	} else if (reg_is_pkt_pointer(reg)) {
6233 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6234 			verbose(env, "cannot write into packet\n");
6235 			return -EACCES;
6236 		}
6237 		if (t == BPF_WRITE && value_regno >= 0 &&
6238 		    is_pointer_value(env, value_regno)) {
6239 			verbose(env, "R%d leaks addr into packet\n",
6240 				value_regno);
6241 			return -EACCES;
6242 		}
6243 		err = check_packet_access(env, reg, argno, off, size, false);
6244 		if (!err && t == BPF_READ && value_regno >= 0)
6245 			mark_reg_unknown(env, regs, value_regno);
6246 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6247 		if (t == BPF_WRITE && value_regno >= 0 &&
6248 		    is_pointer_value(env, value_regno)) {
6249 			verbose(env, "R%d leaks addr into flow keys\n",
6250 				value_regno);
6251 			return -EACCES;
6252 		}
6253 
6254 		err = check_flow_keys_access(env, reg, argno, off, size);
6255 		if (!err && t == BPF_READ && value_regno >= 0)
6256 			mark_reg_unknown(env, regs, value_regno);
6257 	} else if (type_is_sk_pointer(reg->type)) {
6258 		if (t == BPF_WRITE) {
6259 			verbose(env, "%s cannot write into %s\n",
6260 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6261 			return -EACCES;
6262 		}
6263 		err = check_sock_access(env, insn_idx, reg, argno, off, size, t);
6264 		if (!err && value_regno >= 0)
6265 			mark_reg_unknown(env, regs, value_regno);
6266 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6267 		err = check_tp_buffer_access(env, reg, argno, off, size);
6268 		if (!err && t == BPF_READ && value_regno >= 0)
6269 			mark_reg_unknown(env, regs, value_regno);
6270 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6271 		   !type_may_be_null(reg->type)) {
6272 		err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t,
6273 					      value_regno);
6274 	} else if (reg->type == CONST_PTR_TO_MAP) {
6275 		err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t,
6276 					      value_regno);
6277 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6278 		   !type_may_be_null(reg->type)) {
6279 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6280 		u32 *max_access;
6281 
6282 		if (rdonly_mem) {
6283 			if (t == BPF_WRITE) {
6284 				verbose(env, "%s cannot write into %s\n",
6285 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6286 				return -EACCES;
6287 			}
6288 			max_access = &env->prog->aux->max_rdonly_access;
6289 		} else {
6290 			max_access = &env->prog->aux->max_rdwr_access;
6291 		}
6292 
6293 		err = check_buffer_access(env, reg, argno, off, size, false,
6294 					  max_access);
6295 
6296 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6297 			mark_reg_unknown(env, regs, value_regno);
6298 	} else if (reg->type == PTR_TO_ARENA) {
6299 		if (t == BPF_READ && value_regno >= 0)
6300 			mark_reg_unknown(env, regs, value_regno);
6301 	} else {
6302 		verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6303 			reg_type_str(env, reg->type));
6304 		return -EACCES;
6305 	}
6306 
6307 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6308 	    regs[value_regno].type == SCALAR_VALUE) {
6309 		if (!is_ldsx)
6310 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6311 			coerce_reg_to_size(&regs[value_regno], size);
6312 		else
6313 			coerce_reg_to_size_sx(&regs[value_regno], size);
6314 	}
6315 	return err;
6316 }
6317 
6318 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6319 			     bool allow_trust_mismatch);
6320 
6321 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6322 			  bool strict_alignment_once, bool is_ldsx,
6323 			  bool allow_trust_mismatch, const char *ctx)
6324 {
6325 	struct bpf_verifier_state *vstate = env->cur_state;
6326 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6327 	struct bpf_reg_state *regs = cur_regs(env);
6328 	enum bpf_reg_type src_reg_type;
6329 	int err;
6330 
6331 	/* Handle stack arg read */
6332 	if (is_stack_arg_ldx(insn)) {
6333 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6334 		if (err)
6335 			return err;
6336 		return check_stack_arg_read(env, state, insn->off, insn->dst_reg);
6337 	}
6338 
6339 	/* check src operand */
6340 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6341 	if (err)
6342 		return err;
6343 
6344 	/* check dst operand */
6345 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6346 	if (err)
6347 		return err;
6348 
6349 	src_reg_type = regs[insn->src_reg].type;
6350 
6351 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6352 	 * updated by this call.
6353 	 */
6354 	err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off,
6355 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6356 			       strict_alignment_once, is_ldsx);
6357 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6358 				       allow_trust_mismatch);
6359 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6360 
6361 	return err;
6362 }
6363 
6364 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6365 			   bool strict_alignment_once)
6366 {
6367 	struct bpf_verifier_state *vstate = env->cur_state;
6368 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6369 	struct bpf_reg_state *regs = cur_regs(env);
6370 	enum bpf_reg_type dst_reg_type;
6371 	int err;
6372 
6373 	/* Handle stack arg write */
6374 	if (is_stack_arg_stx(insn)) {
6375 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
6376 		if (err)
6377 			return err;
6378 		return check_stack_arg_write(env, state, insn->off, regs + insn->src_reg);
6379 	}
6380 
6381 	/* check src1 operand */
6382 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6383 	if (err)
6384 		return err;
6385 
6386 	/* check src2 operand */
6387 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6388 	if (err)
6389 		return err;
6390 
6391 	dst_reg_type = regs[insn->dst_reg].type;
6392 
6393 	/* Check if (dst_reg + off) is writeable. */
6394 	err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6395 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6396 			       strict_alignment_once, false);
6397 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6398 
6399 	return err;
6400 }
6401 
6402 static int check_atomic_rmw(struct bpf_verifier_env *env,
6403 			    struct bpf_insn *insn)
6404 {
6405 	struct bpf_reg_state *dst_reg;
6406 	int load_reg;
6407 	int err;
6408 
6409 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6410 		verbose(env, "invalid atomic operand size\n");
6411 		return -EINVAL;
6412 	}
6413 
6414 	/* check src1 operand */
6415 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6416 	if (err)
6417 		return err;
6418 
6419 	/* check src2 operand */
6420 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6421 	if (err)
6422 		return err;
6423 
6424 	if (insn->imm == BPF_CMPXCHG) {
6425 		/* Check comparison of R0 with memory location */
6426 		const u32 aux_reg = BPF_REG_0;
6427 
6428 		err = check_reg_arg(env, aux_reg, SRC_OP);
6429 		if (err)
6430 			return err;
6431 
6432 		if (is_pointer_value(env, aux_reg)) {
6433 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6434 			return -EACCES;
6435 		}
6436 	}
6437 
6438 	if (is_pointer_value(env, insn->src_reg)) {
6439 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6440 		return -EACCES;
6441 	}
6442 
6443 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6444 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6445 			insn->dst_reg,
6446 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6447 		return -EACCES;
6448 	}
6449 
6450 	if (insn->imm & BPF_FETCH) {
6451 		if (insn->imm == BPF_CMPXCHG)
6452 			load_reg = BPF_REG_0;
6453 		else
6454 			load_reg = insn->src_reg;
6455 
6456 		/* check and record load of old value */
6457 		err = check_reg_arg(env, load_reg, DST_OP);
6458 		if (err)
6459 			return err;
6460 	} else {
6461 		/* This instruction accesses a memory location but doesn't
6462 		 * actually load it into a register.
6463 		 */
6464 		load_reg = -1;
6465 	}
6466 
6467 	dst_reg = cur_regs(env) + insn->dst_reg;
6468 
6469 	/* Check whether we can read the memory, with second call for fetch
6470 	 * case to simulate the register fill.
6471 	 */
6472 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6473 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6474 	if (!err && load_reg >= 0)
6475 		err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg),
6476 				       insn->off, BPF_SIZE(insn->code),
6477 				       BPF_READ, load_reg, true, false);
6478 	if (err)
6479 		return err;
6480 
6481 	if (is_arena_reg(env, insn->dst_reg)) {
6482 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6483 		if (err)
6484 			return err;
6485 	}
6486 	/* Check whether we can write into the same memory. */
6487 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6488 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6489 	if (err)
6490 		return err;
6491 	return 0;
6492 }
6493 
6494 static int check_atomic_load(struct bpf_verifier_env *env,
6495 			     struct bpf_insn *insn)
6496 {
6497 	int err;
6498 
6499 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6500 	if (err)
6501 		return err;
6502 
6503 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6504 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6505 			insn->src_reg,
6506 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6507 		return -EACCES;
6508 	}
6509 
6510 	return 0;
6511 }
6512 
6513 static int check_atomic_store(struct bpf_verifier_env *env,
6514 			      struct bpf_insn *insn)
6515 {
6516 	int err;
6517 
6518 	err = check_store_reg(env, insn, true);
6519 	if (err)
6520 		return err;
6521 
6522 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6523 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6524 			insn->dst_reg,
6525 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6526 		return -EACCES;
6527 	}
6528 
6529 	return 0;
6530 }
6531 
6532 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6533 {
6534 	switch (insn->imm) {
6535 	case BPF_ADD:
6536 	case BPF_ADD | BPF_FETCH:
6537 	case BPF_AND:
6538 	case BPF_AND | BPF_FETCH:
6539 	case BPF_OR:
6540 	case BPF_OR | BPF_FETCH:
6541 	case BPF_XOR:
6542 	case BPF_XOR | BPF_FETCH:
6543 	case BPF_XCHG:
6544 	case BPF_CMPXCHG:
6545 		return check_atomic_rmw(env, insn);
6546 	case BPF_LOAD_ACQ:
6547 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6548 			verbose(env,
6549 				"64-bit load-acquires are only supported on 64-bit arches\n");
6550 			return -EOPNOTSUPP;
6551 		}
6552 		return check_atomic_load(env, insn);
6553 	case BPF_STORE_REL:
6554 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6555 			verbose(env,
6556 				"64-bit store-releases are only supported on 64-bit arches\n");
6557 			return -EOPNOTSUPP;
6558 		}
6559 		return check_atomic_store(env, insn);
6560 	default:
6561 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6562 			insn->imm);
6563 		return -EINVAL;
6564 	}
6565 }
6566 
6567 /* When register 'regno' is used to read the stack (either directly or through
6568  * a helper function) make sure that it's within stack boundary and, depending
6569  * on the access type and privileges, that all elements of the stack are
6570  * initialized.
6571  *
6572  * All registers that have been spilled on the stack in the slots within the
6573  * read offsets are marked as read.
6574  */
6575 static int check_stack_range_initialized(
6576 		struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
6577 		int access_size, bool zero_size_allowed,
6578 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6579 {
6580 	struct bpf_func_state *state = bpf_func(env, reg);
6581 	int err, min_off, max_off, i, j, slot, spi;
6582 	/* Some accesses can write anything into the stack, others are
6583 	 * read-only.
6584 	 */
6585 	bool clobber = type == BPF_WRITE;
6586 	/*
6587 	 * Negative access_size signals global subprog/kfunc arg check where
6588 	 * STACK_POISON slots are acceptable. static stack liveness
6589 	 * might have determined that subprog doesn't read them,
6590 	 * but BTF based global subprog validation isn't accurate enough.
6591 	 */
6592 	bool allow_poison = access_size < 0 || clobber;
6593 
6594 	access_size = abs(access_size);
6595 
6596 	if (access_size == 0 && !zero_size_allowed) {
6597 		verbose(env, "invalid zero-sized read\n");
6598 		return -EACCES;
6599 	}
6600 
6601 	err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type);
6602 	if (err)
6603 		return err;
6604 
6605 
6606 	if (tnum_is_const(reg->var_off)) {
6607 		min_off = max_off = reg->var_off.value + off;
6608 	} else {
6609 		/* Variable offset is prohibited for unprivileged mode for
6610 		 * simplicity since it requires corresponding support in
6611 		 * Spectre masking for stack ALU.
6612 		 * See also retrieve_ptr_limit().
6613 		 */
6614 		if (!env->bypass_spec_v1) {
6615 			char tn_buf[48];
6616 
6617 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6618 			verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n",
6619 				reg_arg_name(env, argno), tn_buf);
6620 			return -EACCES;
6621 		}
6622 		/* Only initialized buffer on stack is allowed to be accessed
6623 		 * with variable offset. With uninitialized buffer it's hard to
6624 		 * guarantee that whole memory is marked as initialized on
6625 		 * helper return since specific bounds are unknown what may
6626 		 * cause uninitialized stack leaking.
6627 		 */
6628 		if (meta && meta->raw_mode)
6629 			meta = NULL;
6630 
6631 		min_off = reg_smin(reg) + off;
6632 		max_off = reg_smax(reg) + off;
6633 	}
6634 
6635 	if (meta && meta->raw_mode) {
6636 		/* Ensure we won't be overwriting dynptrs when simulating byte
6637 		 * by byte access in check_helper_call using meta.access_size.
6638 		 * This would be a problem if we have a helper in the future
6639 		 * which takes:
6640 		 *
6641 		 *	helper(uninit_mem, len, dynptr)
6642 		 *
6643 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6644 		 * may end up writing to dynptr itself when touching memory from
6645 		 * arg 1. This can be relaxed on a case by case basis for known
6646 		 * safe cases, but reject due to the possibilitiy of aliasing by
6647 		 * default.
6648 		 */
6649 		for (i = min_off; i < max_off + access_size; i++) {
6650 			int stack_off = -i - 1;
6651 
6652 			spi = bpf_get_spi(i);
6653 			/* raw_mode may write past allocated_stack */
6654 			if (state->allocated_stack <= stack_off)
6655 				continue;
6656 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6657 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6658 				return -EACCES;
6659 			}
6660 		}
6661 		meta->access_size = access_size;
6662 		meta->regno = reg_from_argno(argno);
6663 		return 0;
6664 	}
6665 
6666 	for (i = min_off; i < max_off + access_size; i++) {
6667 		u8 *stype;
6668 
6669 		slot = -i - 1;
6670 		spi = slot / BPF_REG_SIZE;
6671 		if (state->allocated_stack <= slot) {
6672 			verbose(env, "allocated_stack too small\n");
6673 			return -EFAULT;
6674 		}
6675 
6676 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6677 		if (*stype == STACK_MISC)
6678 			goto mark;
6679 		if ((*stype == STACK_ZERO) ||
6680 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6681 			if (clobber) {
6682 				/* helper can write anything into the stack */
6683 				*stype = STACK_MISC;
6684 			}
6685 			goto mark;
6686 		}
6687 
6688 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6689 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6690 		     env->allow_ptr_leaks)) {
6691 			if (clobber) {
6692 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6693 				for (j = 0; j < BPF_REG_SIZE; j++)
6694 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6695 			}
6696 			goto mark;
6697 		}
6698 
6699 		if (*stype == STACK_POISON) {
6700 			if (allow_poison)
6701 				goto mark;
6702 			verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n",
6703 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6704 		} else if (tnum_is_const(reg->var_off)) {
6705 			verbose(env, "invalid read from stack %s off %d+%d size %d\n",
6706 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6707 		} else {
6708 			char tn_buf[48];
6709 
6710 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6711 			verbose(env, "invalid read from stack %s var_off %s+%d size %d\n",
6712 				reg_arg_name(env, argno), tn_buf, i - min_off, access_size);
6713 		}
6714 		return -EACCES;
6715 mark:
6716 		;
6717 	}
6718 	return 0;
6719 }
6720 
6721 static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
6722 				   int access_size, enum bpf_access_type access_type,
6723 				   bool zero_size_allowed,
6724 				   struct bpf_call_arg_meta *meta)
6725 {
6726 	struct bpf_reg_state *regs = cur_regs(env);
6727 	u32 *max_access;
6728 
6729 	switch (base_type(reg->type)) {
6730 	case PTR_TO_PACKET:
6731 	case PTR_TO_PACKET_META:
6732 		return check_packet_access(env, reg, argno, 0, access_size,
6733 					   zero_size_allowed);
6734 	case PTR_TO_MAP_KEY:
6735 		if (access_type == BPF_WRITE) {
6736 			verbose(env, "%s cannot write into %s\n",
6737 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6738 			return -EACCES;
6739 		}
6740 		return check_mem_region_access(env, reg, argno, 0, access_size,
6741 					       reg->map_ptr->key_size, false);
6742 	case PTR_TO_MAP_VALUE:
6743 		if (check_map_access_type(env, reg, 0, access_size, access_type))
6744 			return -EACCES;
6745 		return check_map_access(env, reg, argno, 0, access_size,
6746 					zero_size_allowed, ACCESS_HELPER);
6747 	case PTR_TO_MEM:
6748 		if (type_is_rdonly_mem(reg->type)) {
6749 			if (access_type == BPF_WRITE) {
6750 				verbose(env, "%s cannot write into %s\n",
6751 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6752 				return -EACCES;
6753 			}
6754 		}
6755 		return check_mem_region_access(env, reg, argno, 0,
6756 					       access_size, reg->mem_size,
6757 					       zero_size_allowed);
6758 	case PTR_TO_BUF:
6759 		if (type_is_rdonly_mem(reg->type)) {
6760 			if (access_type == BPF_WRITE) {
6761 				verbose(env, "%s cannot write into %s\n",
6762 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6763 				return -EACCES;
6764 			}
6765 
6766 			max_access = &env->prog->aux->max_rdonly_access;
6767 		} else {
6768 			max_access = &env->prog->aux->max_rdwr_access;
6769 		}
6770 		return check_buffer_access(env, reg, argno, 0,
6771 					   access_size, zero_size_allowed,
6772 					   max_access);
6773 	case PTR_TO_STACK:
6774 		return check_stack_range_initialized(
6775 				env, reg,
6776 				argno, 0, access_size,
6777 				zero_size_allowed, access_type, meta);
6778 	case PTR_TO_BTF_ID:
6779 		return check_ptr_to_btf_access(env, regs, reg, argno, 0,
6780 					       access_size, access_type, -1);
6781 	case PTR_TO_CTX:
6782 		/* Only permit reading or writing syscall context using helper calls. */
6783 		if (is_var_ctx_off_allowed(env->prog)) {
6784 			int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX,
6785 							  zero_size_allowed);
6786 			if (err)
6787 				return err;
6788 			if (env->prog->aux->max_ctx_offset < reg_umax(reg) + access_size)
6789 				env->prog->aux->max_ctx_offset = reg_umax(reg) + access_size;
6790 			return 0;
6791 		}
6792 		fallthrough;
6793 	default: /* scalar_value or invalid ptr */
6794 		/* Allow zero-byte read from NULL, regardless of pointer type */
6795 		if (zero_size_allowed && access_size == 0 &&
6796 		    bpf_register_is_null(reg))
6797 			return 0;
6798 
6799 		verbose(env, "%s type=%s ", reg_arg_name(env, argno),
6800 			reg_type_str(env, reg->type));
6801 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6802 		return -EACCES;
6803 	}
6804 }
6805 
6806 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
6807  * size.
6808  *
6809  * @mem_reg contains the pointer, @size_reg contains the access size.
6810  */
6811 static int check_mem_size_reg(struct bpf_verifier_env *env,
6812 			      struct bpf_reg_state *mem_reg,
6813 			      struct bpf_reg_state *size_reg, argno_t mem_argno,
6814 			      argno_t size_argno, enum bpf_access_type access_type,
6815 			      bool zero_size_allowed,
6816 			      struct bpf_call_arg_meta *meta)
6817 {
6818 	int err;
6819 
6820 	/* This is used to refine r0 return value bounds for helpers
6821 	 * that enforce this value as an upper bound on return values.
6822 	 * See do_refine_retval_range() for helpers that can refine
6823 	 * the return value. C type of helper is u32 so we pull register
6824 	 * bound from umax_value however, if negative verifier errors
6825 	 * out. Only upper bounds can be learned because retval is an
6826 	 * int type and negative retvals are allowed.
6827 	 */
6828 	meta->msize_max_value = reg_umax(size_reg);
6829 
6830 	/* The register is SCALAR_VALUE; the access check happens using
6831 	 * its boundaries. For unprivileged variable accesses, disable
6832 	 * raw mode so that the program is required to initialize all
6833 	 * the memory that the helper could just partially fill up.
6834 	 */
6835 	if (!tnum_is_const(size_reg->var_off))
6836 		meta = NULL;
6837 
6838 	if (reg_smin(size_reg) < 0) {
6839 		verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n",
6840 			reg_arg_name(env, size_argno));
6841 		return -EACCES;
6842 	}
6843 
6844 	if (reg_umin(size_reg) == 0 && !zero_size_allowed) {
6845 		verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n",
6846 			reg_arg_name(env, size_argno), reg_umin(size_reg), reg_umax(size_reg));
6847 		return -EACCES;
6848 	}
6849 
6850 	if (reg_umax(size_reg) >= BPF_MAX_VAR_SIZ) {
6851 		verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6852 			reg_arg_name(env, size_argno));
6853 		return -EACCES;
6854 	}
6855 	err = check_helper_mem_access(env, mem_reg, mem_argno, reg_umax(size_reg),
6856 				      access_type, zero_size_allowed, meta);
6857 	if (!err) {
6858 		int regno = reg_from_argno(size_argno);
6859 
6860 		if (regno >= 0)
6861 			err = mark_chain_precision(env, regno);
6862 		else
6863 			err = mark_stack_arg_precision(env, arg_idx_from_argno(size_argno));
6864 	}
6865 	return err;
6866 }
6867 
6868 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6869 			 argno_t argno, u32 mem_size)
6870 {
6871 	bool may_be_null = type_may_be_null(reg->type);
6872 	struct bpf_reg_state saved_reg;
6873 	int err;
6874 
6875 	if (bpf_register_is_null(reg))
6876 		return 0;
6877 
6878 	if (mem_size > S32_MAX) {
6879 		verbose(env, "%s memory size %u is too large\n",
6880 			reg_arg_name(env, argno), mem_size);
6881 		return -EACCES;
6882 	}
6883 
6884 	/* Assuming that the register contains a value check if the memory
6885 	 * access is safe. Temporarily save and restore the register's state as
6886 	 * the conversion shouldn't be visible to a caller.
6887 	 */
6888 	if (may_be_null) {
6889 		saved_reg = *reg;
6890 		mark_ptr_not_null_reg(reg);
6891 	}
6892 
6893 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
6894 
6895 	err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL);
6896 	err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL);
6897 
6898 	if (may_be_null)
6899 		*reg = saved_reg;
6900 
6901 	return err;
6902 }
6903 
6904 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg,
6905 				    struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno)
6906 {
6907 	bool may_be_null = type_may_be_null(mem_reg->type);
6908 	struct bpf_reg_state saved_reg;
6909 	struct bpf_call_arg_meta meta;
6910 	int err;
6911 
6912 	memset(&meta, 0, sizeof(meta));
6913 
6914 	if (may_be_null) {
6915 		saved_reg = *mem_reg;
6916 		mark_ptr_not_null_reg(mem_reg);
6917 	}
6918 
6919 	err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta);
6920 	err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta);
6921 
6922 	if (may_be_null)
6923 		*mem_reg = saved_reg;
6924 
6925 	return err;
6926 }
6927 
6928 enum {
6929 	PROCESS_SPIN_LOCK = (1 << 0),
6930 	PROCESS_RES_LOCK  = (1 << 1),
6931 	PROCESS_LOCK_IRQ  = (1 << 2),
6932 };
6933 
6934 /* Implementation details:
6935  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6936  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6937  * Two bpf_map_lookups (even with the same key) will have different reg->id.
6938  * Two separate bpf_obj_new will also have different reg->id.
6939  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6940  * clears reg->id after value_or_null->value transition, since the verifier only
6941  * cares about the range of access to valid map value pointer and doesn't care
6942  * about actual address of the map element.
6943  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6944  * reg->id > 0 after value_or_null->value transition. By doing so
6945  * two bpf_map_lookups will be considered two different pointers that
6946  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6947  * returned from bpf_obj_new.
6948  * The verifier allows taking only one bpf_spin_lock at a time to avoid
6949  * dead-locks.
6950  * Since only one bpf_spin_lock is allowed the checks are simpler than
6951  * reg_is_refcounted() logic. The verifier needs to remember only
6952  * one spin_lock instead of array of acquired_refs.
6953  * env->cur_state->active_locks remembers which map value element or allocated
6954  * object got locked and clears it after bpf_spin_unlock.
6955  */
6956 static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags)
6957 {
6958 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
6959 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
6960 	struct bpf_verifier_state *cur = env->cur_state;
6961 	bool is_const = tnum_is_const(reg->var_off);
6962 	bool is_irq = flags & PROCESS_LOCK_IRQ;
6963 	u64 val = reg->var_off.value;
6964 	struct bpf_map *map = NULL;
6965 	struct btf *btf = NULL;
6966 	struct btf_record *rec;
6967 	u32 spin_lock_off;
6968 	int err;
6969 
6970 	if (!is_const) {
6971 		verbose(env,
6972 			"%s doesn't have constant offset. %s_lock has to be at the constant offset\n",
6973 			reg_arg_name(env, argno), lock_str);
6974 		return -EINVAL;
6975 	}
6976 	if (reg->type == PTR_TO_MAP_VALUE) {
6977 		map = reg->map_ptr;
6978 		if (!map->btf) {
6979 			verbose(env,
6980 				"map '%s' has to have BTF in order to use %s_lock\n",
6981 				map->name, lock_str);
6982 			return -EINVAL;
6983 		}
6984 	} else {
6985 		btf = reg->btf;
6986 	}
6987 
6988 	rec = reg_btf_record(reg);
6989 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
6990 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
6991 			map ? map->name : "kptr", lock_str);
6992 		return -EINVAL;
6993 	}
6994 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
6995 	if (spin_lock_off != val) {
6996 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
6997 			val, lock_str, spin_lock_off);
6998 		return -EINVAL;
6999 	}
7000 	if (is_lock) {
7001 		void *ptr;
7002 		int type;
7003 
7004 		if (map)
7005 			ptr = map;
7006 		else
7007 			ptr = btf;
7008 
7009 		if (!is_res_lock && cur->active_locks) {
7010 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
7011 				verbose(env,
7012 					"Locking two bpf_spin_locks are not allowed\n");
7013 				return -EINVAL;
7014 			}
7015 		} else if (is_res_lock && cur->active_locks) {
7016 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7017 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7018 				return -EINVAL;
7019 			}
7020 		}
7021 
7022 		if (is_res_lock && is_irq)
7023 			type = REF_TYPE_RES_LOCK_IRQ;
7024 		else if (is_res_lock)
7025 			type = REF_TYPE_RES_LOCK;
7026 		else
7027 			type = REF_TYPE_LOCK;
7028 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7029 		if (err < 0) {
7030 			verbose(env, "Failed to acquire lock state\n");
7031 			return err;
7032 		}
7033 	} else {
7034 		void *ptr;
7035 		int type;
7036 
7037 		if (map)
7038 			ptr = map;
7039 		else
7040 			ptr = btf;
7041 
7042 		if (!cur->active_locks) {
7043 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7044 			return -EINVAL;
7045 		}
7046 
7047 		if (is_res_lock && is_irq)
7048 			type = REF_TYPE_RES_LOCK_IRQ;
7049 		else if (is_res_lock)
7050 			type = REF_TYPE_RES_LOCK;
7051 		else
7052 			type = REF_TYPE_LOCK;
7053 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7054 			verbose(env, "%s_unlock of different lock\n", lock_str);
7055 			return -EINVAL;
7056 		}
7057 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7058 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7059 			return -EINVAL;
7060 		}
7061 		if (release_lock_state(cur, type, reg->id, ptr)) {
7062 			verbose(env, "%s_unlock of different lock\n", lock_str);
7063 			return -EINVAL;
7064 		}
7065 
7066 		invalidate_non_owning_refs(env);
7067 	}
7068 	return 0;
7069 }
7070 
7071 /* Check if @regno is a pointer to a specific field in a map value */
7072 static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7073 				   enum btf_field_type field_type,
7074 				   struct bpf_map_desc *map_desc)
7075 {
7076 	bool is_const = tnum_is_const(reg->var_off);
7077 	struct bpf_map *map = reg->map_ptr;
7078 	u64 val = reg->var_off.value;
7079 	const char *struct_name = btf_field_type_name(field_type);
7080 	int field_off = -1;
7081 
7082 	if (!is_const) {
7083 		verbose(env,
7084 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
7085 			reg_arg_name(env, argno), struct_name);
7086 		return -EINVAL;
7087 	}
7088 	if (!map->btf) {
7089 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7090 			struct_name);
7091 		return -EINVAL;
7092 	}
7093 	if (!btf_record_has_field(map->record, field_type)) {
7094 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7095 		return -EINVAL;
7096 	}
7097 	switch (field_type) {
7098 	case BPF_TIMER:
7099 		field_off = map->record->timer_off;
7100 		break;
7101 	case BPF_TASK_WORK:
7102 		field_off = map->record->task_work_off;
7103 		break;
7104 	case BPF_WORKQUEUE:
7105 		field_off = map->record->wq_off;
7106 		break;
7107 	default:
7108 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7109 		return -EINVAL;
7110 	}
7111 	if (field_off != val) {
7112 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7113 			val, struct_name, field_off);
7114 		return -EINVAL;
7115 	}
7116 	if (map_desc->ptr) {
7117 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7118 		return -EFAULT;
7119 	}
7120 	map_desc->uid = reg->map_uid;
7121 	map_desc->ptr = map;
7122 	return 0;
7123 }
7124 
7125 static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7126 			      struct bpf_map_desc *map)
7127 {
7128 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7129 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7130 		return -EOPNOTSUPP;
7131 	}
7132 	return check_map_field_pointer(env, reg, argno, BPF_TIMER, map);
7133 }
7134 
7135 static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7136 				struct bpf_call_arg_meta *meta)
7137 {
7138 	return process_timer_func(env, reg, argno, &meta->map);
7139 }
7140 
7141 static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7142 			       struct bpf_kfunc_call_arg_meta *meta)
7143 {
7144 	return process_timer_func(env, reg, argno, &meta->map);
7145 }
7146 
7147 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7148 			     struct bpf_call_arg_meta *meta)
7149 {
7150 	struct bpf_reg_state *reg = reg_state(env, regno);
7151 	struct btf_field *kptr_field;
7152 	struct bpf_map *map_ptr;
7153 	struct btf_record *rec;
7154 	u32 kptr_off;
7155 
7156 	if (type_is_ptr_alloc_obj(reg->type)) {
7157 		rec = reg_btf_record(reg);
7158 	} else { /* PTR_TO_MAP_VALUE */
7159 		map_ptr = reg->map_ptr;
7160 		if (!map_ptr->btf) {
7161 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7162 				map_ptr->name);
7163 			return -EINVAL;
7164 		}
7165 		rec = map_ptr->record;
7166 		meta->map.ptr = map_ptr;
7167 	}
7168 
7169 	if (!tnum_is_const(reg->var_off)) {
7170 		verbose(env,
7171 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7172 			regno);
7173 		return -EINVAL;
7174 	}
7175 
7176 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7177 		verbose(env, "R%d has no valid kptr\n", regno);
7178 		return -EINVAL;
7179 	}
7180 
7181 	kptr_off = reg->var_off.value;
7182 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7183 	if (!kptr_field) {
7184 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7185 		return -EACCES;
7186 	}
7187 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7188 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7189 		return -EACCES;
7190 	}
7191 	meta->kptr_field = kptr_field;
7192 	return 0;
7193 }
7194 
7195 /*
7196  * Validate dynptr arguments for helper, kfunc and subprog.
7197  *
7198  * @dynptr is both input and output. It is populated when the argument is
7199  * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed)
7200  * and consumed when the argument is expecting to be an initialized dynptr.
7201  * @parent_id is used to track the referenced parent object (e.g., file or skb in
7202  * qdisc program) when constructing a dynptr.
7203  *
7204  * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7205  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7206  *
7207  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7208  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7209  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7210  *
7211  * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the
7212  * dynptr points to. At the first level, the verifier will make sure a
7213  * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of
7214  * a dynptr's view (i.e., start and offset) is not tracked as there is not such
7215  * use case. The second level is tracked using the upper bit of bpf_dynptr->size
7216  * and checked dynamically during runtime.
7217  */
7218 static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7219 			       argno_t argno, int insn_idx, enum bpf_arg_type arg_type,
7220 			       struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
7221 {
7222 	int spi, err = 0;
7223 
7224 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7225 		verbose(env,
7226 			"%s expected pointer to stack or const struct bpf_dynptr\n",
7227 			reg_arg_name(env, argno));
7228 		return -EINVAL;
7229 	}
7230 
7231 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7232 	 *		 constructing a mutable bpf_dynptr object.
7233 	 *
7234 	 *		 Currently, this is only possible with PTR_TO_STACK
7235 	 *		 pointing to a region of at least 16 bytes which doesn't
7236 	 *		 contain an existing bpf_dynptr.
7237 	 *
7238 	 *  OBJ_RELEASE - Points to a initialized bpf_dynptr that will be
7239 	 *		  destroyed.
7240 	 *
7241 	 *  None       - Points to a initialized dynptr that cannot be
7242 	 *		 reinitialized or destroyed. However, the view of the
7243 	 *		 dynptr and the memory it points to may be mutated.
7244 	 */
7245 	if (arg_type & MEM_UNINIT) {
7246 		int i;
7247 
7248 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7249 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7250 			return -EINVAL;
7251 		}
7252 
7253 		/* we write BPF_DW bits (8 bytes) at a time */
7254 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7255 			err = check_mem_access(env, insn_idx, reg, argno,
7256 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7257 			if (err)
7258 				return err;
7259 		}
7260 
7261 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr);
7262 	} else /* OBJ_RELEASE and None case from above */ {
7263 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7264 		if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) {
7265 			verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n");
7266 			return -EINVAL;
7267 		}
7268 
7269 		if (!is_dynptr_reg_valid_init(env, reg)) {
7270 			verbose(env, "Expected an initialized dynptr as %s\n",
7271 				reg_arg_name(env, argno));
7272 			return -EINVAL;
7273 		}
7274 
7275 		/* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */
7276 		if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) {
7277 			verbose(env,
7278 				"Expected a dynptr of type %s as %s\n",
7279 				dynptr_type_str(arg_to_dynptr_type(arg_type)),
7280 				reg_arg_name(env, argno));
7281 			return -EINVAL;
7282 		}
7283 
7284 		if (reg->type != CONST_PTR_TO_DYNPTR) {
7285 			struct bpf_func_state *state = bpf_func(env, reg);
7286 
7287 			spi = dynptr_get_spi(env, reg);
7288 			if (spi < 0)
7289 				return spi;
7290 
7291 			/*
7292 			 * For CONST_PTR_TO_DYNPTR, reg is already scratched by check_reg_arg
7293 			 * in check_helper_call and mark_btf_func_reg_size in check_kfunc_call.
7294 			 */
7295 			mark_stack_slots_scratched(env, spi, BPF_DYNPTR_NR_SLOTS);
7296 
7297 			reg = &state->stack[spi].spilled_ptr;
7298 		}
7299 
7300 		if (dynptr) {
7301 			dynptr->type = reg->dynptr.type;
7302 			dynptr->id = reg->id;
7303 			dynptr->parent_id = reg->parent_id;
7304 		}
7305 	}
7306 	return err;
7307 }
7308 
7309 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7310 {
7311 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7312 }
7313 
7314 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7315 {
7316 	return meta->kfunc_flags & KF_ITER_NEW;
7317 }
7318 
7319 
7320 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7321 {
7322 	return meta->kfunc_flags & KF_ITER_DESTROY;
7323 }
7324 
7325 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7326 			      const struct btf_param *arg)
7327 {
7328 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7329 	 * kfunc is iter state pointer
7330 	 */
7331 	if (is_iter_kfunc(meta))
7332 		return arg_idx == 0;
7333 
7334 	/* iter passed as an argument to a generic kfunc */
7335 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7336 }
7337 
7338 static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx,
7339 			    struct bpf_kfunc_call_arg_meta *meta)
7340 {
7341 	struct bpf_func_state *state = bpf_func(env, reg);
7342 	const struct btf_type *t;
7343 	u32 arg_idx = arg_idx_from_argno(argno);
7344 	int spi, err, i, nr_slots, btf_id;
7345 
7346 	if (reg->type != PTR_TO_STACK) {
7347 		verbose(env, "%s expected pointer to an iterator on stack\n",
7348 			reg_arg_name(env, argno));
7349 		return -EINVAL;
7350 	}
7351 
7352 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7353 	 * ensures struct convention, so we wouldn't need to do any BTF
7354 	 * validation here. But given iter state can be passed as a parameter
7355 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7356 	 * conservative here.
7357 	 */
7358 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx);
7359 	if (btf_id < 0) {
7360 		verbose(env, "expected valid iter pointer as %s\n",
7361 			reg_arg_name(env, argno));
7362 		return -EINVAL;
7363 	}
7364 	t = btf_type_by_id(meta->btf, btf_id);
7365 	nr_slots = t->size / BPF_REG_SIZE;
7366 
7367 	if (is_iter_new_kfunc(meta)) {
7368 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7369 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7370 			verbose(env, "expected uninitialized iter_%s as %s\n",
7371 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7372 			return -EINVAL;
7373 		}
7374 
7375 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7376 			err = check_mem_access(env, insn_idx, reg, argno,
7377 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7378 			if (err)
7379 				return err;
7380 		}
7381 
7382 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7383 		if (err)
7384 			return err;
7385 	} else {
7386 		/* iter_next() or iter_destroy(), as well as any kfunc
7387 		 * accepting iter argument, expect initialized iter state
7388 		 */
7389 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7390 		switch (err) {
7391 		case 0:
7392 			break;
7393 		case -EINVAL:
7394 			verbose(env, "expected an initialized iter_%s as %s\n",
7395 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7396 			return err;
7397 		case -EPROTO:
7398 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7399 			return err;
7400 		default:
7401 			return err;
7402 		}
7403 
7404 		spi = iter_get_spi(env, reg, nr_slots);
7405 		if (spi < 0)
7406 			return spi;
7407 
7408 		mark_stack_slots_scratched(env, spi, nr_slots);
7409 
7410 		/* remember meta->iter info for process_iter_next_call() */
7411 		meta->iter.spi = spi;
7412 		meta->iter.frameno = reg->frameno;
7413 		update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr);
7414 
7415 		if (is_iter_destroy_kfunc(meta)) {
7416 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7417 			if (err)
7418 				return err;
7419 		}
7420 	}
7421 
7422 	return 0;
7423 }
7424 
7425 /* Look for a previous loop entry at insn_idx: nearest parent state
7426  * stopped at insn_idx with callsites matching those in cur->frame.
7427  */
7428 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7429 						  struct bpf_verifier_state *cur,
7430 						  int insn_idx)
7431 {
7432 	struct bpf_verifier_state_list *sl;
7433 	struct bpf_verifier_state *st;
7434 	struct list_head *pos, *head;
7435 
7436 	/* Explored states are pushed in stack order, most recent states come first */
7437 	head = bpf_explored_state(env, insn_idx);
7438 	list_for_each(pos, head) {
7439 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7440 		/* If st->branches != 0 state is a part of current DFS verification path,
7441 		 * hence cur & st for a loop.
7442 		 */
7443 		st = &sl->state;
7444 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7445 		    st->dfs_depth < cur->dfs_depth)
7446 			return st;
7447 	}
7448 
7449 	return NULL;
7450 }
7451 
7452 /*
7453  * Check if scalar registers are exact for the purpose of not widening.
7454  * More lenient than regs_exact()
7455  */
7456 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7457 				    const struct bpf_reg_state *rcur)
7458 {
7459 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7460 }
7461 
7462 static void maybe_widen_reg(struct bpf_verifier_env *env,
7463 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7464 {
7465 	if (rold->type != SCALAR_VALUE)
7466 		return;
7467 	if (rold->type != rcur->type)
7468 		return;
7469 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7470 		return;
7471 	__mark_reg_unknown(env, rcur);
7472 }
7473 
7474 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7475 				   struct bpf_verifier_state *old,
7476 				   struct bpf_verifier_state *cur)
7477 {
7478 	struct bpf_func_state *fold, *fcur;
7479 	int i, fr, num_slots;
7480 
7481 	for (fr = old->curframe; fr >= 0; fr--) {
7482 		fold = old->frame[fr];
7483 		fcur = cur->frame[fr];
7484 
7485 		for (i = 0; i < MAX_BPF_REG; i++)
7486 			maybe_widen_reg(env,
7487 					&fold->regs[i],
7488 					&fcur->regs[i]);
7489 
7490 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7491 				fcur->allocated_stack / BPF_REG_SIZE);
7492 		for (i = 0; i < num_slots; i++) {
7493 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7494 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7495 				continue;
7496 
7497 			maybe_widen_reg(env,
7498 					&fold->stack[i].spilled_ptr,
7499 					&fcur->stack[i].spilled_ptr);
7500 		}
7501 	}
7502 	return 0;
7503 }
7504 
7505 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7506 						 struct bpf_kfunc_call_arg_meta *meta)
7507 {
7508 	int iter_frameno = meta->iter.frameno;
7509 	int iter_spi = meta->iter.spi;
7510 
7511 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7512 }
7513 
7514 /* process_iter_next_call() is called when verifier gets to iterator's next
7515  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7516  * to it as just "iter_next()" in comments below.
7517  *
7518  * BPF verifier relies on a crucial contract for any iter_next()
7519  * implementation: it should *eventually* return NULL, and once that happens
7520  * it should keep returning NULL. That is, once iterator exhausts elements to
7521  * iterate, it should never reset or spuriously return new elements.
7522  *
7523  * With the assumption of such contract, process_iter_next_call() simulates
7524  * a fork in the verifier state to validate loop logic correctness and safety
7525  * without having to simulate infinite amount of iterations.
7526  *
7527  * In current state, we first assume that iter_next() returned NULL and
7528  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7529  * conditions we should not form an infinite loop and should eventually reach
7530  * exit.
7531  *
7532  * Besides that, we also fork current state and enqueue it for later
7533  * verification. In a forked state we keep iterator state as ACTIVE
7534  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7535  * also bump iteration depth to prevent erroneous infinite loop detection
7536  * later on (see iter_active_depths_differ() comment for details). In this
7537  * state we assume that we'll eventually loop back to another iter_next()
7538  * calls (it could be in exactly same location or in some other instruction,
7539  * it doesn't matter, we don't make any unnecessary assumptions about this,
7540  * everything revolves around iterator state in a stack slot, not which
7541  * instruction is calling iter_next()). When that happens, we either will come
7542  * to iter_next() with equivalent state and can conclude that next iteration
7543  * will proceed in exactly the same way as we just verified, so it's safe to
7544  * assume that loop converges. If not, we'll go on another iteration
7545  * simulation with a different input state, until all possible starting states
7546  * are validated or we reach maximum number of instructions limit.
7547  *
7548  * This way, we will either exhaustively discover all possible input states
7549  * that iterator loop can start with and eventually will converge, or we'll
7550  * effectively regress into bounded loop simulation logic and either reach
7551  * maximum number of instructions if loop is not provably convergent, or there
7552  * is some statically known limit on number of iterations (e.g., if there is
7553  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7554  *
7555  * Iteration convergence logic in is_state_visited() relies on exact
7556  * states comparison, which ignores read and precision marks.
7557  * This is necessary because read and precision marks are not finalized
7558  * while in the loop. Exact comparison might preclude convergence for
7559  * simple programs like below:
7560  *
7561  *     i = 0;
7562  *     while(iter_next(&it))
7563  *       i++;
7564  *
7565  * At each iteration step i++ would produce a new distinct state and
7566  * eventually instruction processing limit would be reached.
7567  *
7568  * To avoid such behavior speculatively forget (widen) range for
7569  * imprecise scalar registers, if those registers were not precise at the
7570  * end of the previous iteration and do not match exactly.
7571  *
7572  * This is a conservative heuristic that allows to verify wide range of programs,
7573  * however it precludes verification of programs that conjure an
7574  * imprecise value on the first loop iteration and use it as precise on a second.
7575  * For example, the following safe program would fail to verify:
7576  *
7577  *     struct bpf_num_iter it;
7578  *     int arr[10];
7579  *     int i = 0, a = 0;
7580  *     bpf_iter_num_new(&it, 0, 10);
7581  *     while (bpf_iter_num_next(&it)) {
7582  *       if (a == 0) {
7583  *         a = 1;
7584  *         i = 7; // Because i changed verifier would forget
7585  *                // it's range on second loop entry.
7586  *       } else {
7587  *         arr[i] = 42; // This would fail to verify.
7588  *       }
7589  *     }
7590  *     bpf_iter_num_destroy(&it);
7591  */
7592 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7593 				  struct bpf_kfunc_call_arg_meta *meta)
7594 {
7595 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7596 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7597 	struct bpf_reg_state *cur_iter, *queued_iter;
7598 
7599 	BTF_TYPE_EMIT(struct bpf_iter);
7600 
7601 	cur_iter = get_iter_from_state(cur_st, meta);
7602 
7603 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7604 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7605 		verifier_bug(env, "unexpected iterator state %d (%s)",
7606 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7607 		return -EFAULT;
7608 	}
7609 
7610 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7611 		/* Because iter_next() call is a checkpoint is_state_visitied()
7612 		 * should guarantee parent state with same call sites and insn_idx.
7613 		 */
7614 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7615 		    !same_callsites(cur_st->parent, cur_st)) {
7616 			verifier_bug(env, "bad parent state for iter next call");
7617 			return -EFAULT;
7618 		}
7619 		/* Note cur_st->parent in the call below, it is necessary to skip
7620 		 * checkpoint created for cur_st by is_state_visited()
7621 		 * right at this instruction.
7622 		 */
7623 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7624 		/* branch out active iter state */
7625 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7626 		if (IS_ERR(queued_st))
7627 			return PTR_ERR(queued_st);
7628 
7629 		queued_iter = get_iter_from_state(queued_st, meta);
7630 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7631 		queued_iter->iter.depth++;
7632 		if (prev_st)
7633 			widen_imprecise_scalars(env, prev_st, queued_st);
7634 
7635 		queued_fr = queued_st->frame[queued_st->curframe];
7636 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7637 	}
7638 
7639 	/* switch to DRAINED state, but keep the depth unchanged */
7640 	/* mark current iter state as drained and assume returned NULL */
7641 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7642 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7643 
7644 	return 0;
7645 }
7646 
7647 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7648 {
7649 	return type == ARG_CONST_SIZE ||
7650 	       type == ARG_CONST_SIZE_OR_ZERO;
7651 }
7652 
7653 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7654 {
7655 	return base_type(type) == ARG_PTR_TO_MEM &&
7656 	       type & MEM_UNINIT;
7657 }
7658 
7659 static bool arg_type_is_release(enum bpf_arg_type type)
7660 {
7661 	return type & OBJ_RELEASE;
7662 }
7663 
7664 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7665 {
7666 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7667 }
7668 
7669 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7670 				 const struct bpf_call_arg_meta *meta,
7671 				 enum bpf_arg_type *arg_type)
7672 {
7673 	if (!meta->map.ptr) {
7674 		/* kernel subsystem misconfigured verifier */
7675 		verifier_bug(env, "invalid map_ptr to access map->type");
7676 		return -EFAULT;
7677 	}
7678 
7679 	switch (meta->map.ptr->map_type) {
7680 	case BPF_MAP_TYPE_SOCKMAP:
7681 	case BPF_MAP_TYPE_SOCKHASH:
7682 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7683 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7684 		} else {
7685 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7686 			return -EINVAL;
7687 		}
7688 		break;
7689 	case BPF_MAP_TYPE_BLOOM_FILTER:
7690 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7691 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7692 		break;
7693 	default:
7694 		break;
7695 	}
7696 	return 0;
7697 }
7698 
7699 struct bpf_reg_types {
7700 	const enum bpf_reg_type types[10];
7701 	u32 *btf_id;
7702 };
7703 
7704 static const struct bpf_reg_types sock_types = {
7705 	.types = {
7706 		PTR_TO_SOCK_COMMON,
7707 		PTR_TO_SOCKET,
7708 		PTR_TO_TCP_SOCK,
7709 		PTR_TO_XDP_SOCK,
7710 	},
7711 };
7712 
7713 #ifdef CONFIG_NET
7714 static const struct bpf_reg_types btf_id_sock_common_types = {
7715 	.types = {
7716 		PTR_TO_SOCK_COMMON,
7717 		PTR_TO_SOCKET,
7718 		PTR_TO_TCP_SOCK,
7719 		PTR_TO_XDP_SOCK,
7720 		PTR_TO_BTF_ID,
7721 		PTR_TO_BTF_ID | PTR_TRUSTED,
7722 	},
7723 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7724 };
7725 #endif
7726 
7727 static const struct bpf_reg_types mem_types = {
7728 	.types = {
7729 		PTR_TO_STACK,
7730 		PTR_TO_PACKET,
7731 		PTR_TO_PACKET_META,
7732 		PTR_TO_MAP_KEY,
7733 		PTR_TO_MAP_VALUE,
7734 		PTR_TO_MEM,
7735 		PTR_TO_MEM | MEM_RINGBUF,
7736 		PTR_TO_BUF,
7737 		PTR_TO_BTF_ID | PTR_TRUSTED,
7738 		PTR_TO_CTX,
7739 	},
7740 };
7741 
7742 static const struct bpf_reg_types spin_lock_types = {
7743 	.types = {
7744 		PTR_TO_MAP_VALUE,
7745 		PTR_TO_BTF_ID | MEM_ALLOC,
7746 	}
7747 };
7748 
7749 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7750 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7751 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7752 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7753 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7754 static const struct bpf_reg_types btf_ptr_types = {
7755 	.types = {
7756 		PTR_TO_BTF_ID,
7757 		PTR_TO_BTF_ID | PTR_TRUSTED,
7758 		PTR_TO_BTF_ID | MEM_RCU,
7759 	},
7760 };
7761 static const struct bpf_reg_types percpu_btf_ptr_types = {
7762 	.types = {
7763 		PTR_TO_BTF_ID | MEM_PERCPU,
7764 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7765 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7766 	}
7767 };
7768 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7769 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7770 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7771 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7772 static const struct bpf_reg_types kptr_xchg_dest_types = {
7773 	.types = {
7774 		PTR_TO_MAP_VALUE,
7775 		PTR_TO_BTF_ID | MEM_ALLOC,
7776 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
7777 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
7778 	}
7779 };
7780 static const struct bpf_reg_types dynptr_types = {
7781 	.types = {
7782 		PTR_TO_STACK,
7783 		CONST_PTR_TO_DYNPTR,
7784 	}
7785 };
7786 
7787 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7788 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7789 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7790 	[ARG_CONST_SIZE]		= &scalar_types,
7791 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7792 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7793 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7794 	[ARG_PTR_TO_CTX]		= &context_types,
7795 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7796 #ifdef CONFIG_NET
7797 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7798 #endif
7799 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7800 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7801 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7802 	[ARG_PTR_TO_MEM]		= &mem_types,
7803 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7804 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7805 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7806 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7807 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7808 	[ARG_PTR_TO_TIMER]		= &timer_types,
7809 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
7810 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7811 };
7812 
7813 static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7814 			  enum bpf_arg_type arg_type,
7815 			  const u32 *arg_btf_id,
7816 			  struct bpf_call_arg_meta *meta)
7817 {
7818 	enum bpf_reg_type expected, type = reg->type;
7819 	const struct bpf_reg_types *compatible;
7820 	int i, j, err;
7821 
7822 	compatible = compatible_reg_types[base_type(arg_type)];
7823 	if (!compatible) {
7824 		verifier_bug(env, "unsupported arg type %d", arg_type);
7825 		return -EFAULT;
7826 	}
7827 
7828 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7829 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7830 	 *
7831 	 * Same for MAYBE_NULL:
7832 	 *
7833 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7834 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7835 	 *
7836 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
7837 	 *
7838 	 * Therefore we fold these flags depending on the arg_type before comparison.
7839 	 */
7840 	if (arg_type & MEM_RDONLY)
7841 		type &= ~MEM_RDONLY;
7842 	if (arg_type & PTR_MAYBE_NULL)
7843 		type &= ~PTR_MAYBE_NULL;
7844 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
7845 		type &= ~DYNPTR_TYPE_FLAG_MASK;
7846 
7847 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
7848 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) {
7849 		type &= ~MEM_ALLOC;
7850 		type &= ~MEM_PERCPU;
7851 	}
7852 
7853 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7854 		expected = compatible->types[i];
7855 		if (expected == NOT_INIT)
7856 			break;
7857 
7858 		if (type == expected)
7859 			goto found;
7860 	}
7861 
7862 	verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type));
7863 	for (j = 0; j + 1 < i; j++)
7864 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7865 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7866 	return -EACCES;
7867 
7868 found:
7869 	if (base_type(reg->type) != PTR_TO_BTF_ID)
7870 		return 0;
7871 
7872 	if (compatible == &mem_types) {
7873 		if (!(arg_type & MEM_RDONLY)) {
7874 			verbose(env,
7875 				"%s() may write into memory pointed by %s type=%s\n",
7876 				func_id_name(meta->func_id),
7877 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
7878 			return -EACCES;
7879 		}
7880 		return 0;
7881 	}
7882 
7883 	switch ((int)reg->type) {
7884 	case PTR_TO_BTF_ID:
7885 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7886 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
7887 	case PTR_TO_BTF_ID | MEM_RCU:
7888 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7889 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7890 	{
7891 		/* For bpf_sk_release, it needs to match against first member
7892 		 * 'struct sock_common', hence make an exception for it. This
7893 		 * allows bpf_sk_release to work for multiple socket types.
7894 		 */
7895 		bool strict_type_match = arg_type_is_release(arg_type) &&
7896 					 meta->func_id != BPF_FUNC_sk_release;
7897 
7898 		if (type_may_be_null(reg->type) &&
7899 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7900 			verbose(env, "Possibly NULL pointer passed to helper %s\n",
7901 				reg_arg_name(env, argno));
7902 			return -EACCES;
7903 		}
7904 
7905 		if (!arg_btf_id) {
7906 			if (!compatible->btf_id) {
7907 				verifier_bug(env, "missing arg compatible BTF ID");
7908 				return -EFAULT;
7909 			}
7910 			arg_btf_id = compatible->btf_id;
7911 		}
7912 
7913 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7914 			if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno)))
7915 				return -EACCES;
7916 		} else {
7917 			if (arg_btf_id == BPF_PTR_POISON) {
7918 				verbose(env, "verifier internal error:");
7919 				verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n",
7920 					reg_arg_name(env, argno));
7921 				return -EACCES;
7922 			}
7923 
7924 			err = __check_ptr_off_reg(env, reg, argno, true);
7925 			if (err)
7926 				return err;
7927 
7928 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
7929 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
7930 						  strict_type_match)) {
7931 				verbose(env, "%s is of type %s but %s is expected\n",
7932 					reg_arg_name(env, argno),
7933 					btf_type_name(reg->btf, reg->btf_id),
7934 					btf_type_name(btf_vmlinux, *arg_btf_id));
7935 				return -EACCES;
7936 			}
7937 		}
7938 		break;
7939 	}
7940 	case PTR_TO_BTF_ID | MEM_ALLOC:
7941 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
7942 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
7943 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
7944 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7945 		    meta->func_id != BPF_FUNC_kptr_xchg) {
7946 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
7947 			return -EFAULT;
7948 		}
7949 		/* Check if local kptr in src arg matches kptr in dst arg */
7950 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7951 			int regno = reg_from_argno(argno);
7952 
7953 			if (regno == BPF_REG_2 &&
7954 			    map_kptr_match_type(env, meta->kptr_field, reg, regno))
7955 				return -EACCES;
7956 		}
7957 		break;
7958 	case PTR_TO_BTF_ID | MEM_PERCPU:
7959 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
7960 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7961 		/* Handled by helper specific checks */
7962 		break;
7963 	default:
7964 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
7965 		return -EFAULT;
7966 	}
7967 	return 0;
7968 }
7969 
7970 static struct btf_field *
7971 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7972 {
7973 	struct btf_field *field;
7974 	struct btf_record *rec;
7975 
7976 	rec = reg_btf_record(reg);
7977 	if (!rec)
7978 		return NULL;
7979 
7980 	field = btf_record_find(rec, off, fields);
7981 	if (!field)
7982 		return NULL;
7983 
7984 	return field;
7985 }
7986 
7987 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
7988 				  const struct bpf_reg_state *reg, argno_t argno,
7989 				  enum bpf_arg_type arg_type)
7990 {
7991 	u32 type = reg->type;
7992 
7993 	/* When referenced register is passed to release function, its fixed
7994 	 * offset must be 0.
7995 	 *
7996 	 * We will check arg_type_is_release reg has id when storing
7997 	 * meta->release_regno.
7998 	 */
7999 	if (arg_type_is_release(arg_type)) {
8000 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8001 		 * may not directly point to the object being released, but to
8002 		 * dynptr pointing to such object, which might be at some offset
8003 		 * on the stack. In that case, we simply to fallback to the
8004 		 * default handling.
8005 		 */
8006 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8007 			return 0;
8008 
8009 		/* Doing check_ptr_off_reg check for the offset will catch this
8010 		 * because fixed_off_ok is false, but checking here allows us
8011 		 * to give the user a better error message.
8012 		 */
8013 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8014 			verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n",
8015 				reg_arg_name(env, argno));
8016 			return -EINVAL;
8017 		}
8018 	}
8019 
8020 	switch (type) {
8021 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8022 	case PTR_TO_STACK:
8023 	case PTR_TO_PACKET:
8024 	case PTR_TO_PACKET_META:
8025 	case PTR_TO_MAP_KEY:
8026 	case PTR_TO_MAP_VALUE:
8027 	case PTR_TO_MEM:
8028 	case PTR_TO_MEM | MEM_RDONLY:
8029 	case PTR_TO_MEM | MEM_RINGBUF:
8030 	case PTR_TO_BUF:
8031 	case PTR_TO_BUF | MEM_RDONLY:
8032 	case PTR_TO_ARENA:
8033 	case SCALAR_VALUE:
8034 		return 0;
8035 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8036 	 * fixed offset.
8037 	 */
8038 	case PTR_TO_BTF_ID:
8039 	case PTR_TO_BTF_ID | MEM_ALLOC:
8040 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8041 	case PTR_TO_BTF_ID | MEM_RCU:
8042 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8043 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8044 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8045 		 * its fixed offset must be 0. In the other cases, fixed offset
8046 		 * can be non-zero. This was already checked above. So pass
8047 		 * fixed_off_ok as true to allow fixed offset for all other
8048 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8049 		 * still need to do checks instead of returning.
8050 		 */
8051 		return __check_ptr_off_reg(env, reg, argno, true);
8052 	case PTR_TO_CTX:
8053 		/*
8054 		 * Allow fixed and variable offsets for syscall context, but
8055 		 * only when the argument is passed as memory, not ctx,
8056 		 * otherwise we may get modified ctx in tail called programs and
8057 		 * global subprogs (that may act as extension prog hooks).
8058 		 */
8059 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8060 			return 0;
8061 		fallthrough;
8062 	default:
8063 		return __check_ptr_off_reg(env, reg, argno, false);
8064 	}
8065 }
8066 
8067 static int check_arg_const_str(struct bpf_verifier_env *env,
8068 			       struct bpf_reg_state *reg, argno_t argno)
8069 {
8070 	struct bpf_map *map = reg->map_ptr;
8071 	int err;
8072 	int map_off;
8073 	u64 map_addr;
8074 	char *str_ptr;
8075 
8076 	if (reg->type != PTR_TO_MAP_VALUE)
8077 		return -EINVAL;
8078 
8079 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8080 		verbose(env, "%s points to insn_array map which cannot be used as const string\n",
8081 			reg_arg_name(env, argno));
8082 		return -EACCES;
8083 	}
8084 
8085 	if (!bpf_map_is_rdonly(map)) {
8086 		verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno));
8087 		return -EACCES;
8088 	}
8089 
8090 	if (!tnum_is_const(reg->var_off)) {
8091 		verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno));
8092 		return -EACCES;
8093 	}
8094 
8095 	if (!map->ops->map_direct_value_addr) {
8096 		verbose(env, "no direct value access support for this map type\n");
8097 		return -EACCES;
8098 	}
8099 
8100 	err = check_map_access(env, reg, argno, 0,
8101 			       map->value_size - reg->var_off.value, false,
8102 			       ACCESS_HELPER);
8103 	if (err)
8104 		return err;
8105 
8106 	map_off = reg->var_off.value;
8107 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8108 	if (err) {
8109 		verbose(env, "direct value access on string failed\n");
8110 		return err;
8111 	}
8112 
8113 	str_ptr = (char *)(long)(map_addr);
8114 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8115 		verbose(env, "string is not zero-terminated\n");
8116 		return -EINVAL;
8117 	}
8118 	return 0;
8119 }
8120 
8121 /* Returns constant key value in `value` if possible, else negative error */
8122 static int get_constant_map_key(struct bpf_verifier_env *env,
8123 				struct bpf_reg_state *key,
8124 				u32 key_size,
8125 				s64 *value)
8126 {
8127 	struct bpf_func_state *state = bpf_func(env, key);
8128 	struct bpf_reg_state *reg;
8129 	int slot, spi, off;
8130 	int spill_size = 0;
8131 	int zero_size = 0;
8132 	int stack_off;
8133 	int i, err;
8134 	u8 *stype;
8135 
8136 	if (!env->bpf_capable)
8137 		return -EOPNOTSUPP;
8138 	if (key->type != PTR_TO_STACK)
8139 		return -EOPNOTSUPP;
8140 	if (!tnum_is_const(key->var_off))
8141 		return -EOPNOTSUPP;
8142 
8143 	stack_off = key->var_off.value;
8144 	slot = -stack_off - 1;
8145 	spi = slot / BPF_REG_SIZE;
8146 	off = slot % BPF_REG_SIZE;
8147 	stype = state->stack[spi].slot_type;
8148 
8149 	/* First handle precisely tracked STACK_ZERO */
8150 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8151 		zero_size++;
8152 	if (zero_size >= key_size) {
8153 		*value = 0;
8154 		return 0;
8155 	}
8156 
8157 	/* Check that stack contains a scalar spill of expected size */
8158 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8159 		return -EOPNOTSUPP;
8160 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8161 		spill_size++;
8162 	if (spill_size != key_size)
8163 		return -EOPNOTSUPP;
8164 
8165 	reg = &state->stack[spi].spilled_ptr;
8166 	if (!tnum_is_const(reg->var_off))
8167 		/* Stack value not statically known */
8168 		return -EOPNOTSUPP;
8169 
8170 	/* We are relying on a constant value. So mark as precise
8171 	 * to prevent pruning on it.
8172 	 */
8173 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8174 	err = mark_chain_precision_batch(env, env->cur_state);
8175 	if (err < 0)
8176 		return err;
8177 
8178 	*value = reg->var_off.value;
8179 	return 0;
8180 }
8181 
8182 static bool can_elide_value_nullness(const struct bpf_map *map);
8183 
8184 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8185 			  struct bpf_call_arg_meta *meta,
8186 			  const struct bpf_func_proto *fn,
8187 			  int insn_idx)
8188 {
8189 	u32 regno = BPF_REG_1 + arg;
8190 	struct bpf_reg_state *reg = reg_state(env, regno);
8191 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8192 	argno_t argno = argno_from_arg(arg + 1);
8193 	enum bpf_reg_type type = reg->type;
8194 	u32 *arg_btf_id = NULL;
8195 	u32 key_size;
8196 	int err = 0;
8197 
8198 	if (arg_type == ARG_DONTCARE)
8199 		return 0;
8200 
8201 	err = check_reg_arg(env, regno, SRC_OP);
8202 	if (err)
8203 		return err;
8204 
8205 	if (arg_type == ARG_ANYTHING) {
8206 		if (is_pointer_value(env, regno)) {
8207 			verbose(env, "R%d leaks addr into helper function\n",
8208 				regno);
8209 			return -EACCES;
8210 		}
8211 		return 0;
8212 	}
8213 
8214 	if (type_is_pkt_pointer(type) &&
8215 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8216 		verbose(env, "helper access to the packet is not allowed\n");
8217 		return -EACCES;
8218 	}
8219 
8220 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8221 		err = resolve_map_arg_type(env, meta, &arg_type);
8222 		if (err)
8223 			return err;
8224 	}
8225 
8226 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8227 		/* A NULL register has a SCALAR_VALUE type, so skip
8228 		 * type checking.
8229 		 */
8230 		goto skip_type_check;
8231 
8232 	/* arg_btf_id and arg_size are in a union. */
8233 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8234 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8235 		arg_btf_id = fn->arg_btf_id[arg];
8236 
8237 	err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta);
8238 	if (err)
8239 		return err;
8240 
8241 	err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type);
8242 	if (err)
8243 		return err;
8244 
8245 skip_type_check:
8246 	if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) &&
8247 	    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
8248 		verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n",
8249 			func_id_name(meta->func_id), reg_arg_name(env, argno));
8250 		return -EINVAL;
8251 	}
8252 
8253 	if (reg_is_referenced(env, reg))
8254 		update_ref_obj(&meta->ref_obj, reg);
8255 
8256 	switch (base_type(arg_type)) {
8257 	case ARG_CONST_MAP_PTR:
8258 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8259 		if (meta->map.ptr) {
8260 			/* Use map_uid (which is unique id of inner map) to reject:
8261 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8262 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8263 			 * if (inner_map1 && inner_map2) {
8264 			 *     timer = bpf_map_lookup_elem(inner_map1);
8265 			 *     if (timer)
8266 			 *         // mismatch would have been allowed
8267 			 *         bpf_timer_init(timer, inner_map2);
8268 			 * }
8269 			 *
8270 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8271 			 */
8272 			if (meta->map.ptr != reg->map_ptr ||
8273 			    meta->map.uid != reg->map_uid) {
8274 				verbose(env,
8275 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8276 					meta->map.uid, reg->map_uid);
8277 				return -EINVAL;
8278 			}
8279 		}
8280 		meta->map.ptr = reg->map_ptr;
8281 		meta->map.uid = reg->map_uid;
8282 		break;
8283 	case ARG_PTR_TO_MAP_KEY:
8284 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8285 		 * check that [key, key + map->key_size) are within
8286 		 * stack limits and initialized
8287 		 */
8288 		if (!meta->map.ptr) {
8289 			/* in function declaration map_ptr must come before
8290 			 * map_key, so that it's verified and known before
8291 			 * we have to check map_key here. Otherwise it means
8292 			 * that kernel subsystem misconfigured verifier
8293 			 */
8294 			verifier_bug(env, "invalid map_ptr to access map->key");
8295 			return -EFAULT;
8296 		}
8297 		key_size = meta->map.ptr->key_size;
8298 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL);
8299 		if (err)
8300 			return err;
8301 		if (can_elide_value_nullness(meta->map.ptr)) {
8302 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8303 			if (err < 0) {
8304 				meta->const_map_key = -1;
8305 				if (err == -EOPNOTSUPP)
8306 					err = 0;
8307 				else
8308 					return err;
8309 			}
8310 		}
8311 		break;
8312 	case ARG_PTR_TO_MAP_VALUE:
8313 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8314 			return 0;
8315 
8316 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8317 		 * check [value, value + map->value_size) validity
8318 		 */
8319 		if (!meta->map.ptr) {
8320 			/* kernel subsystem misconfigured verifier */
8321 			verifier_bug(env, "invalid map_ptr to access map->value");
8322 			return -EFAULT;
8323 		}
8324 		meta->raw_mode = arg_type & MEM_UNINIT;
8325 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size,
8326 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8327 					      false, meta);
8328 		break;
8329 	case ARG_PTR_TO_PERCPU_BTF_ID:
8330 		if (!reg->btf_id) {
8331 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8332 			return -EACCES;
8333 		}
8334 		meta->ret_btf = reg->btf;
8335 		meta->ret_btf_id = reg->btf_id;
8336 		break;
8337 	case ARG_PTR_TO_SPIN_LOCK:
8338 		if (in_rbtree_lock_required_cb(env)) {
8339 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8340 			return -EACCES;
8341 		}
8342 		if (meta->func_id == BPF_FUNC_spin_lock) {
8343 			err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK);
8344 			if (err)
8345 				return err;
8346 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8347 			err = process_spin_lock(env, reg, argno_from_reg(regno), 0);
8348 			if (err)
8349 				return err;
8350 		} else {
8351 			verifier_bug(env, "spin lock arg on unexpected helper");
8352 			return -EFAULT;
8353 		}
8354 		break;
8355 	case ARG_PTR_TO_TIMER:
8356 		err = process_timer_helper(env, reg, argno_from_reg(regno), meta);
8357 		if (err)
8358 			return err;
8359 		break;
8360 	case ARG_PTR_TO_FUNC:
8361 		meta->subprogno = reg->subprogno;
8362 		break;
8363 	case ARG_PTR_TO_MEM:
8364 		/* The access to this pointer is only checked when we hit the
8365 		 * next is_mem_size argument below.
8366 		 */
8367 		meta->raw_mode = arg_type & MEM_UNINIT;
8368 		if (arg_type & MEM_FIXED_SIZE) {
8369 			err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg],
8370 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8371 						      false, meta);
8372 			if (err)
8373 				return err;
8374 			if (arg_type & MEM_ALIGNED)
8375 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8376 		}
8377 		break;
8378 	case ARG_CONST_SIZE:
8379 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8380 					 argno_from_reg(regno),
8381 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8382 					 BPF_WRITE : BPF_READ,
8383 					 false, meta);
8384 		break;
8385 	case ARG_CONST_SIZE_OR_ZERO:
8386 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8387 					 argno_from_reg(regno),
8388 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8389 					 BPF_WRITE : BPF_READ,
8390 					 true, meta);
8391 		break;
8392 	case ARG_PTR_TO_DYNPTR:
8393 		err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj,
8394 					  &meta->dynptr);
8395 		if (err)
8396 			return err;
8397 		break;
8398 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8399 		if (!tnum_is_const(reg->var_off)) {
8400 			verbose(env, "R%d is not a known constant'\n",
8401 				regno);
8402 			return -EACCES;
8403 		}
8404 		meta->mem_size = reg->var_off.value;
8405 		err = mark_chain_precision(env, regno);
8406 		if (err)
8407 			return err;
8408 		break;
8409 	case ARG_PTR_TO_CONST_STR:
8410 	{
8411 		err = check_arg_const_str(env, reg, argno_from_reg(regno));
8412 		if (err)
8413 			return err;
8414 		break;
8415 	}
8416 	case ARG_KPTR_XCHG_DEST:
8417 		err = process_kptr_func(env, regno, meta);
8418 		if (err)
8419 			return err;
8420 		break;
8421 	}
8422 
8423 	return err;
8424 }
8425 
8426 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8427 {
8428 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8429 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8430 
8431 	if (func_id != BPF_FUNC_map_update_elem &&
8432 	    func_id != BPF_FUNC_map_delete_elem)
8433 		return false;
8434 
8435 	/* It's not possible to get access to a locked struct sock in these
8436 	 * contexts, so updating is safe.
8437 	 */
8438 	switch (type) {
8439 	case BPF_PROG_TYPE_TRACING:
8440 		if (eatype == BPF_TRACE_ITER)
8441 			return true;
8442 		break;
8443 	case BPF_PROG_TYPE_SOCK_OPS:
8444 		/* map_update allowed only via dedicated helpers with event type checks */
8445 		if (func_id == BPF_FUNC_map_delete_elem)
8446 			return true;
8447 		break;
8448 	case BPF_PROG_TYPE_SOCKET_FILTER:
8449 	case BPF_PROG_TYPE_SCHED_CLS:
8450 	case BPF_PROG_TYPE_SCHED_ACT:
8451 	case BPF_PROG_TYPE_XDP:
8452 	case BPF_PROG_TYPE_SK_REUSEPORT:
8453 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8454 	case BPF_PROG_TYPE_SK_LOOKUP:
8455 		return true;
8456 	default:
8457 		break;
8458 	}
8459 
8460 	verbose(env, "cannot update sockmap in this context\n");
8461 	return false;
8462 }
8463 
8464 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8465 {
8466 	return env->prog->jit_requested &&
8467 	       bpf_jit_supports_subprog_tailcalls();
8468 }
8469 
8470 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8471 					struct bpf_map *map, int func_id)
8472 {
8473 	if (!map)
8474 		return 0;
8475 
8476 	/* We need a two way check, first is from map perspective ... */
8477 	switch (map->map_type) {
8478 	case BPF_MAP_TYPE_PROG_ARRAY:
8479 		if (func_id != BPF_FUNC_tail_call)
8480 			goto error;
8481 		break;
8482 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8483 		if (func_id != BPF_FUNC_perf_event_read &&
8484 		    func_id != BPF_FUNC_perf_event_output &&
8485 		    func_id != BPF_FUNC_skb_output &&
8486 		    func_id != BPF_FUNC_perf_event_read_value &&
8487 		    func_id != BPF_FUNC_xdp_output)
8488 			goto error;
8489 		break;
8490 	case BPF_MAP_TYPE_RINGBUF:
8491 		if (func_id != BPF_FUNC_ringbuf_output &&
8492 		    func_id != BPF_FUNC_ringbuf_reserve &&
8493 		    func_id != BPF_FUNC_ringbuf_query &&
8494 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8495 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8496 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8497 			goto error;
8498 		break;
8499 	case BPF_MAP_TYPE_USER_RINGBUF:
8500 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8501 			goto error;
8502 		break;
8503 	case BPF_MAP_TYPE_STACK_TRACE:
8504 		if (func_id != BPF_FUNC_get_stackid)
8505 			goto error;
8506 		break;
8507 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8508 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8509 		    func_id != BPF_FUNC_current_task_under_cgroup)
8510 			goto error;
8511 		break;
8512 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8513 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8514 		if (func_id != BPF_FUNC_get_local_storage)
8515 			goto error;
8516 		break;
8517 	case BPF_MAP_TYPE_DEVMAP:
8518 	case BPF_MAP_TYPE_DEVMAP_HASH:
8519 		if (func_id != BPF_FUNC_redirect_map &&
8520 		    func_id != BPF_FUNC_map_lookup_elem)
8521 			goto error;
8522 		break;
8523 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8524 	 * appear.
8525 	 */
8526 	case BPF_MAP_TYPE_CPUMAP:
8527 		if (func_id != BPF_FUNC_redirect_map)
8528 			goto error;
8529 		break;
8530 	case BPF_MAP_TYPE_XSKMAP:
8531 		if (func_id != BPF_FUNC_redirect_map &&
8532 		    func_id != BPF_FUNC_map_lookup_elem)
8533 			goto error;
8534 		break;
8535 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8536 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8537 		if (func_id != BPF_FUNC_map_lookup_elem)
8538 			goto error;
8539 		break;
8540 	case BPF_MAP_TYPE_SOCKMAP:
8541 		if (func_id != BPF_FUNC_sk_redirect_map &&
8542 		    func_id != BPF_FUNC_sock_map_update &&
8543 		    func_id != BPF_FUNC_msg_redirect_map &&
8544 		    func_id != BPF_FUNC_sk_select_reuseport &&
8545 		    func_id != BPF_FUNC_map_lookup_elem &&
8546 		    !may_update_sockmap(env, func_id))
8547 			goto error;
8548 		break;
8549 	case BPF_MAP_TYPE_SOCKHASH:
8550 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8551 		    func_id != BPF_FUNC_sock_hash_update &&
8552 		    func_id != BPF_FUNC_msg_redirect_hash &&
8553 		    func_id != BPF_FUNC_sk_select_reuseport &&
8554 		    func_id != BPF_FUNC_map_lookup_elem &&
8555 		    !may_update_sockmap(env, func_id))
8556 			goto error;
8557 		break;
8558 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8559 		if (func_id != BPF_FUNC_sk_select_reuseport)
8560 			goto error;
8561 		break;
8562 	case BPF_MAP_TYPE_QUEUE:
8563 	case BPF_MAP_TYPE_STACK:
8564 		if (func_id != BPF_FUNC_map_peek_elem &&
8565 		    func_id != BPF_FUNC_map_pop_elem &&
8566 		    func_id != BPF_FUNC_map_push_elem)
8567 			goto error;
8568 		break;
8569 	case BPF_MAP_TYPE_SK_STORAGE:
8570 		if (func_id != BPF_FUNC_sk_storage_get &&
8571 		    func_id != BPF_FUNC_sk_storage_delete &&
8572 		    func_id != BPF_FUNC_kptr_xchg)
8573 			goto error;
8574 		break;
8575 	case BPF_MAP_TYPE_INODE_STORAGE:
8576 		if (func_id != BPF_FUNC_inode_storage_get &&
8577 		    func_id != BPF_FUNC_inode_storage_delete &&
8578 		    func_id != BPF_FUNC_kptr_xchg)
8579 			goto error;
8580 		break;
8581 	case BPF_MAP_TYPE_TASK_STORAGE:
8582 		if (func_id != BPF_FUNC_task_storage_get &&
8583 		    func_id != BPF_FUNC_task_storage_delete &&
8584 		    func_id != BPF_FUNC_kptr_xchg)
8585 			goto error;
8586 		break;
8587 	case BPF_MAP_TYPE_CGRP_STORAGE:
8588 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8589 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8590 		    func_id != BPF_FUNC_kptr_xchg)
8591 			goto error;
8592 		break;
8593 	case BPF_MAP_TYPE_BLOOM_FILTER:
8594 		if (func_id != BPF_FUNC_map_peek_elem &&
8595 		    func_id != BPF_FUNC_map_push_elem)
8596 			goto error;
8597 		break;
8598 	case BPF_MAP_TYPE_INSN_ARRAY:
8599 		goto error;
8600 	default:
8601 		break;
8602 	}
8603 
8604 	/* ... and second from the function itself. */
8605 	switch (func_id) {
8606 	case BPF_FUNC_tail_call:
8607 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8608 			goto error;
8609 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8610 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8611 			return -EINVAL;
8612 		}
8613 		break;
8614 	case BPF_FUNC_perf_event_read:
8615 	case BPF_FUNC_perf_event_output:
8616 	case BPF_FUNC_perf_event_read_value:
8617 	case BPF_FUNC_skb_output:
8618 	case BPF_FUNC_xdp_output:
8619 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8620 			goto error;
8621 		break;
8622 	case BPF_FUNC_ringbuf_output:
8623 	case BPF_FUNC_ringbuf_reserve:
8624 	case BPF_FUNC_ringbuf_query:
8625 	case BPF_FUNC_ringbuf_reserve_dynptr:
8626 	case BPF_FUNC_ringbuf_submit_dynptr:
8627 	case BPF_FUNC_ringbuf_discard_dynptr:
8628 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8629 			goto error;
8630 		break;
8631 	case BPF_FUNC_user_ringbuf_drain:
8632 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8633 			goto error;
8634 		break;
8635 	case BPF_FUNC_get_stackid:
8636 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8637 			goto error;
8638 		break;
8639 	case BPF_FUNC_current_task_under_cgroup:
8640 	case BPF_FUNC_skb_under_cgroup:
8641 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8642 			goto error;
8643 		break;
8644 	case BPF_FUNC_redirect_map:
8645 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8646 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8647 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8648 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8649 			goto error;
8650 		break;
8651 	case BPF_FUNC_sk_redirect_map:
8652 	case BPF_FUNC_msg_redirect_map:
8653 	case BPF_FUNC_sock_map_update:
8654 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8655 			goto error;
8656 		break;
8657 	case BPF_FUNC_sk_redirect_hash:
8658 	case BPF_FUNC_msg_redirect_hash:
8659 	case BPF_FUNC_sock_hash_update:
8660 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8661 			goto error;
8662 		break;
8663 	case BPF_FUNC_get_local_storage:
8664 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8665 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8666 			goto error;
8667 		break;
8668 	case BPF_FUNC_sk_select_reuseport:
8669 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8670 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8671 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8672 			goto error;
8673 		break;
8674 	case BPF_FUNC_map_pop_elem:
8675 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8676 		    map->map_type != BPF_MAP_TYPE_STACK)
8677 			goto error;
8678 		break;
8679 	case BPF_FUNC_map_peek_elem:
8680 	case BPF_FUNC_map_push_elem:
8681 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8682 		    map->map_type != BPF_MAP_TYPE_STACK &&
8683 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8684 			goto error;
8685 		break;
8686 	case BPF_FUNC_map_lookup_percpu_elem:
8687 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8688 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8689 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8690 			goto error;
8691 		break;
8692 	case BPF_FUNC_sk_storage_get:
8693 	case BPF_FUNC_sk_storage_delete:
8694 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8695 			goto error;
8696 		break;
8697 	case BPF_FUNC_inode_storage_get:
8698 	case BPF_FUNC_inode_storage_delete:
8699 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8700 			goto error;
8701 		break;
8702 	case BPF_FUNC_task_storage_get:
8703 	case BPF_FUNC_task_storage_delete:
8704 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8705 			goto error;
8706 		break;
8707 	case BPF_FUNC_cgrp_storage_get:
8708 	case BPF_FUNC_cgrp_storage_delete:
8709 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8710 			goto error;
8711 		break;
8712 	default:
8713 		break;
8714 	}
8715 
8716 	return 0;
8717 error:
8718 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
8719 		map->map_type, func_id_name(func_id), func_id);
8720 	return -EINVAL;
8721 }
8722 
8723 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8724 {
8725 	int count = 0;
8726 
8727 	if (arg_type_is_raw_mem(fn->arg1_type))
8728 		count++;
8729 	if (arg_type_is_raw_mem(fn->arg2_type))
8730 		count++;
8731 	if (arg_type_is_raw_mem(fn->arg3_type))
8732 		count++;
8733 	if (arg_type_is_raw_mem(fn->arg4_type))
8734 		count++;
8735 	if (arg_type_is_raw_mem(fn->arg5_type))
8736 		count++;
8737 
8738 	/* We only support one arg being in raw mode at the moment,
8739 	 * which is sufficient for the helper functions we have
8740 	 * right now.
8741 	 */
8742 	return count <= 1;
8743 }
8744 
8745 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
8746 {
8747 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
8748 	bool has_size = fn->arg_size[arg] != 0;
8749 	bool is_next_size = false;
8750 
8751 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
8752 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
8753 
8754 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
8755 		return is_next_size;
8756 
8757 	return has_size == is_next_size || is_next_size == is_fixed;
8758 }
8759 
8760 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
8761 {
8762 	/* bpf_xxx(..., buf, len) call will access 'len'
8763 	 * bytes from memory 'buf'. Both arg types need
8764 	 * to be paired, so make sure there's no buggy
8765 	 * helper function specification.
8766 	 */
8767 	if (arg_type_is_mem_size(fn->arg1_type) ||
8768 	    check_args_pair_invalid(fn, 0) ||
8769 	    check_args_pair_invalid(fn, 1) ||
8770 	    check_args_pair_invalid(fn, 2) ||
8771 	    check_args_pair_invalid(fn, 3) ||
8772 	    check_args_pair_invalid(fn, 4))
8773 		return false;
8774 
8775 	return true;
8776 }
8777 
8778 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8779 {
8780 	int i;
8781 
8782 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8783 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8784 			return !!fn->arg_btf_id[i];
8785 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8786 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8787 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8788 		    /* arg_btf_id and arg_size are in a union. */
8789 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8790 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8791 			return false;
8792 	}
8793 
8794 	return true;
8795 }
8796 
8797 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
8798 {
8799 	int i;
8800 
8801 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8802 		enum bpf_arg_type arg_type = fn->arg_type[i];
8803 
8804 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
8805 			continue;
8806 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
8807 			return false;
8808 	}
8809 
8810 	return true;
8811 }
8812 
8813 static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8814 {
8815 	int i;
8816 
8817 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8818 		enum bpf_arg_type arg_type = fn->arg_type[i];
8819 
8820 		if (arg_type_is_release(arg_type)) {
8821 			if (meta->release_regno)
8822 				return false;
8823 			meta->release_regno = i + 1;
8824 		}
8825 	}
8826 
8827 	return true;
8828 }
8829 
8830 static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8831 {
8832 	return check_raw_mode_ok(fn) &&
8833 	       check_arg_pair_ok(fn) &&
8834 	       check_mem_arg_rw_flag_ok(fn) &&
8835 	       check_proto_release_reg(fn, meta) &&
8836 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8837 }
8838 
8839 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8840  * are now invalid, so turn them into unknown SCALAR_VALUE.
8841  *
8842  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8843  * since these slices point to packet data.
8844  */
8845 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8846 {
8847 	struct bpf_func_state *state;
8848 	struct bpf_reg_state *reg;
8849 
8850 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8851 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8852 			mark_reg_invalid(env, reg);
8853 	}));
8854 }
8855 
8856 enum {
8857 	AT_PKT_END = -1,
8858 	BEYOND_PKT_END = -2,
8859 };
8860 
8861 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8862 {
8863 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8864 	struct bpf_reg_state *reg = &state->regs[regn];
8865 
8866 	if (reg->type != PTR_TO_PACKET)
8867 		/* PTR_TO_PACKET_META is not supported yet */
8868 		return;
8869 
8870 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8871 	 * How far beyond pkt_end it goes is unknown.
8872 	 * if (!range_open) it's the case of pkt >= pkt_end
8873 	 * if (range_open) it's the case of pkt > pkt_end
8874 	 * hence this pointer is at least 1 byte bigger than pkt_end
8875 	 */
8876 	if (range_open)
8877 		reg->range = BEYOND_PKT_END;
8878 	else
8879 		reg->range = AT_PKT_END;
8880 }
8881 
8882 static int release_reference_nomark(struct bpf_verifier_state *state, int id)
8883 {
8884 	int i;
8885 
8886 	for (i = 0; i < state->acquired_refs; i++) {
8887 		if (state->refs[i].type != REF_TYPE_PTR)
8888 			continue;
8889 		if (state->refs[i].id == id) {
8890 			release_reference_state(state, i);
8891 			return 0;
8892 		}
8893 	}
8894 	return -EINVAL;
8895 }
8896 
8897 static int idstack_push(struct bpf_idmap *idmap, u32 id)
8898 {
8899 	int i;
8900 
8901 	if (!id)
8902 		return 0;
8903 
8904 	for (i = 0; i < idmap->cnt; i++)
8905 		if (idmap->map[i].old == id)
8906 			return 0;
8907 
8908 	if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE))
8909 		return -EFAULT;
8910 
8911 	idmap->map[idmap->cnt++].old = id;
8912 	return 0;
8913 }
8914 
8915 static int idstack_pop(struct bpf_idmap *idmap)
8916 {
8917 	if (!idmap->cnt)
8918 		return 0;
8919 
8920 	return idmap->map[--idmap->cnt].old;
8921 }
8922 
8923 /* Release id and objects derived from it iteratively in a DFS manner */
8924 static int release_reference(struct bpf_verifier_env *env, int id)
8925 {
8926 	u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR);
8927 	struct bpf_verifier_state *vstate = env->cur_state;
8928 	struct bpf_idmap *idstack = &env->idmap_scratch;
8929 	struct bpf_stack_state *stack;
8930 	struct bpf_func_state *state;
8931 	struct bpf_reg_state *reg;
8932 	int i, err;
8933 
8934 	idstack->cnt = 0;
8935 	err = idstack_push(idstack, id);
8936 	if (err)
8937 		return err;
8938 
8939 	if (find_reference_state(vstate, id))
8940 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
8941 
8942 	while ((id = idstack_pop(idstack))) {
8943 		/*
8944 		 * Child references are inaccessible after parent is released,
8945 		 * any child references that exist at this point are a leak.
8946 		 */
8947 		for (i = 0; i < vstate->acquired_refs; i++) {
8948 			if (vstate->refs[i].type != REF_TYPE_PTR)
8949 				continue;
8950 			if (vstate->refs[i].parent_id != id)
8951 				continue;
8952 			verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n",
8953 				vstate->refs[i].id, vstate->refs[i].insn_idx);
8954 			return -EINVAL;
8955 		}
8956 
8957 		bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({
8958 			if (reg->id != id && reg->parent_id != id)
8959 				continue;
8960 
8961 			/* Free objects derived from the current object */
8962 			if (reg->parent_id == id) {
8963 				err = idstack_push(idstack, reg->id);
8964 				if (err)
8965 					return err;
8966 			}
8967 
8968 			if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL)
8969 				mark_reg_invalid(env, reg);
8970 			else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR)
8971 				invalidate_dynptr(env, stack);
8972 		}));
8973 	}
8974 
8975 	return 0;
8976 }
8977 
8978 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8979 {
8980 	struct bpf_func_state *unused;
8981 	struct bpf_reg_state *reg;
8982 
8983 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8984 		if (type_is_non_owning_ref(reg->type))
8985 			mark_reg_invalid(env, reg);
8986 	}));
8987 }
8988 
8989 static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env)
8990 {
8991 	struct bpf_stack_state *stack;
8992 	struct bpf_func_state *state;
8993 	struct bpf_reg_state *reg;
8994 	u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
8995 
8996 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({
8997 		if (reg->type & MEM_RCU) {
8998 			reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
8999 			reg->type |= PTR_UNTRUSTED;
9000 		}
9001 	}));
9002 }
9003 
9004 static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id)
9005 {
9006 	struct bpf_func_state *state;
9007 	struct bpf_reg_state *reg;
9008 	int err;
9009 
9010 	err = release_reference_nomark(env->cur_state, id);
9011 
9012 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9013 		if (reg->id != id)
9014 			continue;
9015 		if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
9016 			reg->id = 0;
9017 			reg->type &= ~MEM_ALLOC;
9018 			reg->type |= MEM_RCU;
9019 		}
9020 	}));
9021 
9022 	return err;
9023 }
9024 
9025 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9026 				    struct bpf_reg_state *regs)
9027 {
9028 	int i;
9029 
9030 	/* after the call registers r0 - r5 were scratched */
9031 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9032 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9033 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9034 	}
9035 }
9036 
9037 static void invalidate_outgoing_stack_args(const struct bpf_verifier_env *env,
9038 					   struct bpf_func_state *state)
9039 {
9040 	int i, nslots = state->out_stack_arg_cnt;
9041 
9042 	for (i = 0; i < nslots; i++)
9043 		bpf_mark_reg_not_init(env, &state->stack_arg_regs[i]);
9044 }
9045 
9046 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9047 				   struct bpf_func_state *caller,
9048 				   struct bpf_func_state *callee,
9049 				   int insn_idx);
9050 
9051 static int set_callee_state(struct bpf_verifier_env *env,
9052 			    struct bpf_func_state *caller,
9053 			    struct bpf_func_state *callee, int insn_idx);
9054 
9055 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9056 			    set_callee_state_fn set_callee_state_cb,
9057 			    struct bpf_verifier_state *state)
9058 {
9059 	struct bpf_func_state *caller, *callee;
9060 	int err;
9061 
9062 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9063 		verbose(env, "the call stack of %d frames is too deep\n",
9064 			state->curframe + 2);
9065 		return -E2BIG;
9066 	}
9067 
9068 	if (state->frame[state->curframe + 1]) {
9069 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9070 		return -EFAULT;
9071 	}
9072 
9073 	caller = state->frame[state->curframe];
9074 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9075 	if (!callee)
9076 		return -ENOMEM;
9077 	state->frame[state->curframe + 1] = callee;
9078 
9079 	/* callee cannot access r0, r6 - r9 for reading and has to write
9080 	 * into its own stack before reading from it.
9081 	 * callee can read/write into caller's stack
9082 	 */
9083 	init_func_state(env, callee,
9084 			/* remember the callsite, it will be used by bpf_exit */
9085 			callsite,
9086 			state->curframe + 1 /* frameno within this callchain */,
9087 			subprog /* subprog number within this prog */);
9088 	err = set_callee_state_cb(env, caller, callee, callsite);
9089 	if (err)
9090 		goto err_out;
9091 
9092 	/* only increment it after check_reg_arg() finished */
9093 	state->curframe++;
9094 
9095 	return 0;
9096 
9097 err_out:
9098 	free_func_state(callee);
9099 	state->frame[state->curframe + 1] = NULL;
9100 	return err;
9101 }
9102 
9103 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9104 				    const struct btf *btf,
9105 				    struct bpf_reg_state *regs)
9106 {
9107 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9108 	struct bpf_func_state *caller = cur_func(env);
9109 	struct bpf_verifier_log *log = &env->log;
9110 	struct ref_obj_desc ref_obj = {};
9111 	u32 i;
9112 	int ret, err;
9113 
9114 	ret = btf_prepare_func_args(env, subprog);
9115 	if (ret) {
9116 		if (bpf_in_stack_arg_cnt(sub) > 0) {
9117 			err = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9118 			if (err)
9119 				return err;
9120 		}
9121 		return ret;
9122 	}
9123 
9124 	ret = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9125 	if (ret)
9126 		return ret;
9127 
9128 	/* check that BTF function arguments match actual types that the
9129 	 * verifier sees.
9130 	 */
9131 	for (i = 0; i < sub->arg_cnt; i++) {
9132 		argno_t argno = argno_from_arg(i + 1);
9133 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
9134 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9135 
9136 		if (arg->arg_type == ARG_ANYTHING) {
9137 			if (reg->type != SCALAR_VALUE) {
9138 				bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno));
9139 				return -EINVAL;
9140 			}
9141 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9142 			/*
9143 			 * Anything is allowed for untrusted arguments, as these are
9144 			 * read-only and probe read instructions would protect against
9145 			 * invalid memory access.
9146 			 */
9147 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9148 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX);
9149 			if (ret < 0)
9150 				return ret;
9151 			/* If function expects ctx type in BTF check that caller
9152 			 * is passing PTR_TO_CTX.
9153 			 */
9154 			if (reg->type != PTR_TO_CTX) {
9155 				bpf_log(log, "%s expects pointer to ctx\n",
9156 					reg_arg_name(env, argno));
9157 				return -EINVAL;
9158 			}
9159 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9160 			ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE);
9161 			if (ret < 0)
9162 				return ret;
9163 			if (check_mem_reg(env, reg, argno, arg->mem_size))
9164 				return -EINVAL;
9165 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9166 				bpf_log(log, "%s is expected to be non-NULL\n",
9167 					reg_arg_name(env, argno));
9168 				return -EINVAL;
9169 			}
9170 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9171 			/*
9172 			 * Can pass any value and the kernel won't crash, but
9173 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9174 			 * else is a bug in the bpf program. Point it out to
9175 			 * the user at the verification time instead of
9176 			 * run-time debug nightmare.
9177 			 */
9178 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9179 				bpf_log(log, "%s is not a pointer to arena or scalar.\n",
9180 					reg_arg_name(env, argno));
9181 				return -EINVAL;
9182 			}
9183 		} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
9184 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR);
9185 			if (ret)
9186 				return ret;
9187 
9188 			ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL);
9189 			if (ret)
9190 				return ret;
9191 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9192 			struct bpf_call_arg_meta meta;
9193 			int err;
9194 
9195 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9196 				continue;
9197 
9198 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9199 			err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta);
9200 			err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type);
9201 			if (err)
9202 				return err;
9203 		} else {
9204 			verifier_bug(env, "unrecognized %s type %d",
9205 				     reg_arg_name(env, argno), arg->arg_type);
9206 			return -EFAULT;
9207 		}
9208 	}
9209 
9210 	return 0;
9211 }
9212 
9213 /* Compare BTF of a function call with given bpf_reg_state.
9214  * Returns:
9215  * EFAULT - there is a verifier bug. Abort verification.
9216  * EINVAL - there is a type mismatch or BTF is not available.
9217  * 0 - BTF matches with what bpf_reg_state expects.
9218  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9219  */
9220 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9221 				  struct bpf_reg_state *regs)
9222 {
9223 	struct bpf_prog *prog = env->prog;
9224 	struct btf *btf = prog->aux->btf;
9225 	u32 btf_id;
9226 	int err;
9227 
9228 	if (!prog->aux->func_info)
9229 		return -EINVAL;
9230 
9231 	btf_id = prog->aux->func_info[subprog].type_id;
9232 	if (!btf_id)
9233 		return -EFAULT;
9234 
9235 	if (prog->aux->func_info_aux[subprog].unreliable)
9236 		return -EINVAL;
9237 
9238 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9239 	/* Compiler optimizations can remove arguments from static functions
9240 	 * or mismatched type can be passed into a global function.
9241 	 * In such cases mark the function as unreliable from BTF point of view.
9242 	 */
9243 	if (err)
9244 		prog->aux->func_info_aux[subprog].unreliable = true;
9245 	return err;
9246 }
9247 
9248 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9249 			      int insn_idx, int subprog,
9250 			      set_callee_state_fn set_callee_state_cb)
9251 {
9252 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9253 	struct bpf_func_state *caller, *callee;
9254 	int err;
9255 
9256 	caller = state->frame[state->curframe];
9257 	err = btf_check_subprog_call(env, subprog, caller->regs);
9258 	if (err == -EFAULT)
9259 		return err;
9260 
9261 	/* set_callee_state is used for direct subprog calls, but we are
9262 	 * interested in validating only BPF helpers that can call subprogs as
9263 	 * callbacks
9264 	 */
9265 	env->subprog_info[subprog].is_cb = true;
9266 	if (bpf_pseudo_kfunc_call(insn) &&
9267 	    !is_callback_calling_kfunc(insn->imm)) {
9268 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9269 			     func_id_name(insn->imm), insn->imm);
9270 		return -EFAULT;
9271 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9272 		   !is_callback_calling_function(insn->imm)) { /* helper */
9273 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9274 			     func_id_name(insn->imm), insn->imm);
9275 		return -EFAULT;
9276 	}
9277 
9278 	if (bpf_is_async_callback_calling_insn(insn)) {
9279 		struct bpf_verifier_state *async_cb;
9280 
9281 		/* there is no real recursion here. timer and workqueue callbacks are async */
9282 		env->subprog_info[subprog].is_async_cb = true;
9283 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9284 					 insn_idx, subprog,
9285 					 is_async_cb_sleepable(env, insn));
9286 		if (IS_ERR(async_cb))
9287 			return PTR_ERR(async_cb);
9288 		callee = async_cb->frame[0];
9289 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9290 
9291 		/* Convert bpf_timer_set_callback() args into timer callback args */
9292 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9293 		if (err)
9294 			return err;
9295 
9296 		return 0;
9297 	}
9298 
9299 	/* for callback functions enqueue entry to callback and
9300 	 * proceed with next instruction within current frame.
9301 	 */
9302 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9303 	if (IS_ERR(callback_state))
9304 		return PTR_ERR(callback_state);
9305 
9306 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9307 			       callback_state);
9308 	if (err)
9309 		return err;
9310 
9311 	callback_state->callback_unroll_depth++;
9312 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9313 	caller->callback_depth = 0;
9314 	return 0;
9315 }
9316 
9317 static int process_bpf_exit_full(struct bpf_verifier_env *env,
9318 				 bool *do_print_state, bool exception_exit);
9319 
9320 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9321 			   int *insn_idx)
9322 {
9323 	struct bpf_verifier_state *state = env->cur_state;
9324 	struct bpf_subprog_info *caller_info;
9325 	u16 callee_incoming, stack_arg_cnt;
9326 	struct bpf_func_state *caller;
9327 	int err, subprog, target_insn;
9328 
9329 	target_insn = *insn_idx + insn->imm + 1;
9330 	subprog = bpf_find_subprog(env, target_insn);
9331 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9332 			    target_insn))
9333 		return -EFAULT;
9334 
9335 	caller = state->frame[state->curframe];
9336 	err = btf_check_subprog_call(env, subprog, caller->regs);
9337 	if (err == -EFAULT)
9338 		return err;
9339 	if (bpf_subprog_is_global(env, subprog)) {
9340 		const char *sub_name = subprog_name(env, subprog);
9341 
9342 		if (env->cur_state->active_locks) {
9343 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9344 				     "use static function instead\n");
9345 			return -EINVAL;
9346 		}
9347 
9348 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9349 			verbose(env, "sleepable global function %s() called in %s\n",
9350 				sub_name, non_sleepable_context_description(env));
9351 			return -EINVAL;
9352 		}
9353 
9354 		if (err) {
9355 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9356 				subprog, sub_name);
9357 			return err;
9358 		}
9359 
9360 		if (env->log.level & BPF_LOG_LEVEL)
9361 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9362 				subprog, sub_name);
9363 		if (env->subprog_info[subprog].changes_pkt_data)
9364 			clear_all_pkt_pointers(env);
9365 		/* mark global subprog for verifying after main prog */
9366 		subprog_aux(env, subprog)->called = true;
9367 		clear_caller_saved_regs(env, caller->regs);
9368 		invalidate_outgoing_stack_args(env, cur_func(env));
9369 
9370 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9371 		if (!subprog_returns_void(env, subprog)) {
9372 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9373 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9374 		}
9375 
9376 		if (env->subprog_info[subprog].might_throw) {
9377 			struct bpf_verifier_state *branch;
9378 
9379 			branch = push_stack(env, *insn_idx + 1, *insn_idx, false);
9380 			if (IS_ERR(branch)) {
9381 				verbose(env, "failed to push state for global subprog exception path\n");
9382 				return PTR_ERR(branch);
9383 			}
9384 			return process_bpf_exit_full(env, NULL, true);
9385 		}
9386 
9387 		/* continue with next insn after call */
9388 		return 0;
9389 	}
9390 
9391 	/*
9392 	 * Track caller's total stack arg count (incoming + max outgoing).
9393 	 * This is needed so the JIT knows how much stack arg space to allocate.
9394 	 */
9395 	caller_info = &env->subprog_info[caller->subprogno];
9396 	callee_incoming = bpf_in_stack_arg_cnt(&env->subprog_info[subprog]);
9397 	stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + callee_incoming;
9398 	if (stack_arg_cnt > caller_info->stack_arg_cnt)
9399 		caller_info->stack_arg_cnt = stack_arg_cnt;
9400 
9401 	/* for regular function entry setup new frame and continue
9402 	 * from that frame.
9403 	 */
9404 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9405 	if (err)
9406 		return err;
9407 
9408 	clear_caller_saved_regs(env, caller->regs);
9409 
9410 	/* and go analyze first insn of the callee */
9411 	*insn_idx = env->subprog_info[subprog].start - 1;
9412 
9413 	if (env->log.level & BPF_LOG_LEVEL) {
9414 		verbose(env, "caller:\n");
9415 		print_verifier_state(env, state, caller->frameno, true);
9416 		verbose(env, "callee:\n");
9417 		print_verifier_state(env, state, state->curframe, true);
9418 	}
9419 
9420 	return 0;
9421 }
9422 
9423 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9424 				   struct bpf_func_state *caller,
9425 				   struct bpf_func_state *callee)
9426 {
9427 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9428 	 *      void *callback_ctx, u64 flags);
9429 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9430 	 *      void *callback_ctx);
9431 	 */
9432 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9433 
9434 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9435 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9436 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9437 
9438 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9439 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9440 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9441 
9442 	/* pointer to stack or null */
9443 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9444 
9445 	/* unused */
9446 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9447 	return 0;
9448 }
9449 
9450 static int set_callee_state(struct bpf_verifier_env *env,
9451 			    struct bpf_func_state *caller,
9452 			    struct bpf_func_state *callee, int insn_idx)
9453 {
9454 	int i;
9455 
9456 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9457 	 * pointers, which connects us up to the liveness chain
9458 	 */
9459 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9460 		callee->regs[i] = caller->regs[i];
9461 	return 0;
9462 }
9463 
9464 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9465 				       struct bpf_func_state *caller,
9466 				       struct bpf_func_state *callee,
9467 				       int insn_idx)
9468 {
9469 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9470 	struct bpf_map *map;
9471 	int err;
9472 
9473 	/* valid map_ptr and poison value does not matter */
9474 	map = insn_aux->map_ptr_state.map_ptr;
9475 	if (!map->ops->map_set_for_each_callback_args ||
9476 	    !map->ops->map_for_each_callback) {
9477 		verbose(env, "callback function not allowed for map\n");
9478 		return -ENOTSUPP;
9479 	}
9480 
9481 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9482 	if (err)
9483 		return err;
9484 
9485 	callee->in_callback_fn = true;
9486 	callee->callback_ret_range = retval_range(0, 1);
9487 	return 0;
9488 }
9489 
9490 static int set_loop_callback_state(struct bpf_verifier_env *env,
9491 				   struct bpf_func_state *caller,
9492 				   struct bpf_func_state *callee,
9493 				   int insn_idx)
9494 {
9495 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9496 	 *	    u64 flags);
9497 	 * callback_fn(u64 index, void *callback_ctx);
9498 	 */
9499 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9500 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9501 
9502 	/* unused */
9503 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9504 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9505 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9506 
9507 	callee->in_callback_fn = true;
9508 	callee->callback_ret_range = retval_range(0, 1);
9509 	return 0;
9510 }
9511 
9512 static int set_timer_callback_state(struct bpf_verifier_env *env,
9513 				    struct bpf_func_state *caller,
9514 				    struct bpf_func_state *callee,
9515 				    int insn_idx)
9516 {
9517 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9518 
9519 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9520 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9521 	 */
9522 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9523 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9524 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9525 
9526 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9527 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9528 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9529 
9530 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9531 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9532 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9533 
9534 	/* unused */
9535 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9536 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9537 	callee->in_async_callback_fn = true;
9538 	callee->callback_ret_range = retval_range(0, 0);
9539 	return 0;
9540 }
9541 
9542 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9543 				       struct bpf_func_state *caller,
9544 				       struct bpf_func_state *callee,
9545 				       int insn_idx)
9546 {
9547 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9548 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9549 	 * (callback_fn)(struct task_struct *task,
9550 	 *               struct vm_area_struct *vma, void *callback_ctx);
9551 	 */
9552 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9553 
9554 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9555 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9556 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9557 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9558 
9559 	/* pointer to stack or null */
9560 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9561 
9562 	/* unused */
9563 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9564 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9565 	callee->in_callback_fn = true;
9566 	callee->callback_ret_range = retval_range(0, 1);
9567 	return 0;
9568 }
9569 
9570 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9571 					   struct bpf_func_state *caller,
9572 					   struct bpf_func_state *callee,
9573 					   int insn_idx)
9574 {
9575 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9576 	 *			  callback_ctx, u64 flags);
9577 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9578 	 */
9579 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9580 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9581 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9582 
9583 	/* unused */
9584 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9585 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9586 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9587 
9588 	callee->in_callback_fn = true;
9589 	callee->callback_ret_range = retval_range(0, 1);
9590 	return 0;
9591 }
9592 
9593 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9594 					 struct bpf_func_state *caller,
9595 					 struct bpf_func_state *callee,
9596 					 int insn_idx)
9597 {
9598 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9599 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9600 	 *
9601 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9602 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9603 	 * by this point, so look at 'root'
9604 	 */
9605 	struct btf_field *field;
9606 
9607 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9608 				      caller->regs[BPF_REG_1].var_off.value,
9609 				      BPF_RB_ROOT);
9610 	if (!field || !field->graph_root.value_btf_id)
9611 		return -EFAULT;
9612 
9613 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9614 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9615 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9616 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9617 
9618 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9619 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9620 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9621 	callee->in_callback_fn = true;
9622 	callee->callback_ret_range = retval_range(0, 1);
9623 	return 0;
9624 }
9625 
9626 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9627 						 struct bpf_func_state *caller,
9628 						 struct bpf_func_state *callee,
9629 						 int insn_idx)
9630 {
9631 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9632 
9633 	/*
9634 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9635 	 */
9636 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9637 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9638 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9639 
9640 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9641 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9642 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9643 
9644 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9645 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9646 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9647 
9648 	/* unused */
9649 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9650 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9651 	callee->in_async_callback_fn = true;
9652 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9653 	return 0;
9654 }
9655 
9656 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9657 
9658 /* Are we currently verifying the callback for a rbtree helper that must
9659  * be called with lock held? If so, no need to complain about unreleased
9660  * lock
9661  */
9662 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9663 {
9664 	struct bpf_verifier_state *state = env->cur_state;
9665 	struct bpf_insn *insn = env->prog->insnsi;
9666 	struct bpf_func_state *callee;
9667 	int kfunc_btf_id;
9668 
9669 	if (!state->curframe)
9670 		return false;
9671 
9672 	callee = state->frame[state->curframe];
9673 
9674 	if (!callee->in_callback_fn)
9675 		return false;
9676 
9677 	kfunc_btf_id = insn[callee->callsite].imm;
9678 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9679 }
9680 
9681 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9682 {
9683 	if (range.return_32bit)
9684 		return range.minval <= reg_s32_min(reg) && reg_s32_max(reg) <= range.maxval;
9685 	else
9686 		return range.minval <= reg_smin(reg) && reg_smax(reg) <= range.maxval;
9687 }
9688 
9689 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9690 {
9691 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9692 	struct bpf_func_state *caller, *callee;
9693 	struct bpf_reg_state *r0;
9694 	bool in_callback_fn;
9695 	int err;
9696 
9697 	callee = state->frame[state->curframe];
9698 	r0 = &callee->regs[BPF_REG_0];
9699 	if (r0->type == PTR_TO_STACK) {
9700 		/* technically it's ok to return caller's stack pointer
9701 		 * (or caller's caller's pointer) back to the caller,
9702 		 * since these pointers are valid. Only current stack
9703 		 * pointer will be invalid as soon as function exits,
9704 		 * but let's be conservative
9705 		 */
9706 		verbose(env, "cannot return stack pointer to the caller\n");
9707 		return -EINVAL;
9708 	}
9709 
9710 	caller = state->frame[state->curframe - 1];
9711 	if (callee->in_callback_fn) {
9712 		if (r0->type != SCALAR_VALUE) {
9713 			verbose(env, "R0 not a scalar value\n");
9714 			return -EACCES;
9715 		}
9716 
9717 		/* we are going to rely on register's precise value */
9718 		err = mark_chain_precision(env, BPF_REG_0);
9719 		if (err)
9720 			return err;
9721 
9722 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9723 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9724 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9725 					       "At callback return", "R0");
9726 			return -EINVAL;
9727 		}
9728 		if (!bpf_calls_callback(env, callee->callsite)) {
9729 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9730 				     *insn_idx, callee->callsite);
9731 			return -EFAULT;
9732 		}
9733 	} else {
9734 		/* return to the caller whatever r0 had in the callee */
9735 		caller->regs[BPF_REG_0] = *r0;
9736 	}
9737 
9738 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9739 	 * there function call logic would reschedule callback visit. If iteration
9740 	 * converges is_state_visited() would prune that visit eventually.
9741 	 */
9742 	in_callback_fn = callee->in_callback_fn;
9743 	if (in_callback_fn)
9744 		*insn_idx = callee->callsite;
9745 	else
9746 		*insn_idx = callee->callsite + 1;
9747 
9748 	if (env->log.level & BPF_LOG_LEVEL) {
9749 		verbose(env, "returning from callee:\n");
9750 		print_verifier_state(env, state, callee->frameno, true);
9751 		verbose(env, "to caller at %d:\n", *insn_idx);
9752 		print_verifier_state(env, state, caller->frameno, true);
9753 	}
9754 	/* clear everything in the callee. In case of exceptional exits using
9755 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9756 	free_func_state(callee);
9757 	state->frame[state->curframe--] = NULL;
9758 	invalidate_outgoing_stack_args(env, caller);
9759 
9760 	/* for callbacks widen imprecise scalars to make programs like below verify:
9761 	 *
9762 	 *   struct ctx { int i; }
9763 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9764 	 *   ...
9765 	 *   struct ctx = { .i = 0; }
9766 	 *   bpf_loop(100, cb, &ctx, 0);
9767 	 *
9768 	 * This is similar to what is done in process_iter_next_call() for open
9769 	 * coded iterators.
9770 	 */
9771 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9772 	if (prev_st) {
9773 		err = widen_imprecise_scalars(env, prev_st, state);
9774 		if (err)
9775 			return err;
9776 	}
9777 	return 0;
9778 }
9779 
9780 static int do_refine_retval_range(struct bpf_verifier_env *env,
9781 				  struct bpf_reg_state *regs, int ret_type,
9782 				  int func_id,
9783 				  struct bpf_call_arg_meta *meta)
9784 {
9785 	struct bpf_retval_range range;
9786 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9787 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9788 
9789 	if (ret_type != RET_INTEGER)
9790 		return 0;
9791 
9792 	switch (func_id) {
9793 	case BPF_FUNC_get_stack:
9794 	case BPF_FUNC_get_task_stack:
9795 	case BPF_FUNC_probe_read_str:
9796 	case BPF_FUNC_probe_read_kernel_str:
9797 	case BPF_FUNC_probe_read_user_str:
9798 		reg_set_srange64(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9799 		reg_set_srange32(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9800 		reg_bounds_sync(ret_reg);
9801 		break;
9802 	case BPF_FUNC_get_smp_processor_id:
9803 		reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1);
9804 		reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1);
9805 		reg_bounds_sync(ret_reg);
9806 		break;
9807 	case BPF_FUNC_get_retval:
9808 		/*
9809 		 * bpf_get_retval may see arbitrary value passed by bpf_prog_run_array_cg for
9810 		 * CGROUP_GETSOCKOPT type.
9811 		 */
9812 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
9813 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
9814 			break;
9815 
9816 		if (prog_type == BPF_PROG_TYPE_LSM &&
9817 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
9818 			if (!env->prog->aux->attach_func_proto->type)
9819 				break;
9820 			bpf_lsm_get_retval_range(env->prog, &range);
9821 		} else {
9822 			range.minval = -MAX_ERRNO;
9823 			range.maxval = 0;
9824 		}
9825 
9826 		reg_set_srange64(ret_reg, range.minval, range.maxval);
9827 		reg_set_srange32(ret_reg, range.minval, range.maxval);
9828 		reg_bounds_sync(ret_reg);
9829 		break;
9830 	}
9831 
9832 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9833 }
9834 
9835 static int
9836 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9837 		int func_id, int insn_idx)
9838 {
9839 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9840 	struct bpf_map *map = meta->map.ptr;
9841 
9842 	if (func_id != BPF_FUNC_tail_call &&
9843 	    func_id != BPF_FUNC_map_lookup_elem &&
9844 	    func_id != BPF_FUNC_map_update_elem &&
9845 	    func_id != BPF_FUNC_map_delete_elem &&
9846 	    func_id != BPF_FUNC_map_push_elem &&
9847 	    func_id != BPF_FUNC_map_pop_elem &&
9848 	    func_id != BPF_FUNC_map_peek_elem &&
9849 	    func_id != BPF_FUNC_for_each_map_elem &&
9850 	    func_id != BPF_FUNC_redirect_map &&
9851 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9852 		return 0;
9853 
9854 	if (map == NULL) {
9855 		verifier_bug(env, "expected map for helper call");
9856 		return -EFAULT;
9857 	}
9858 
9859 	/* In case of read-only, some additional restrictions
9860 	 * need to be applied in order to prevent altering the
9861 	 * state of the map from program side.
9862 	 */
9863 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9864 	    (func_id == BPF_FUNC_map_delete_elem ||
9865 	     func_id == BPF_FUNC_map_update_elem ||
9866 	     func_id == BPF_FUNC_map_push_elem ||
9867 	     func_id == BPF_FUNC_map_pop_elem)) {
9868 		verbose(env, "write into map forbidden\n");
9869 		return -EACCES;
9870 	}
9871 
9872 	if (!aux->map_ptr_state.map_ptr)
9873 		bpf_map_ptr_store(aux, meta->map.ptr,
9874 				  !meta->map.ptr->bypass_spec_v1, false);
9875 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
9876 		bpf_map_ptr_store(aux, meta->map.ptr,
9877 				  !meta->map.ptr->bypass_spec_v1, true);
9878 	return 0;
9879 }
9880 
9881 static int
9882 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9883 		int func_id, int insn_idx)
9884 {
9885 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9886 	struct bpf_reg_state *reg;
9887 	struct bpf_map *map = meta->map.ptr;
9888 	u64 val, max;
9889 	int err;
9890 
9891 	if (func_id != BPF_FUNC_tail_call)
9892 		return 0;
9893 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9894 		verbose(env, "expected prog array map for tail call");
9895 		return -EINVAL;
9896 	}
9897 
9898 	reg = reg_state(env, BPF_REG_3);
9899 	val = reg->var_off.value;
9900 	max = map->max_entries;
9901 
9902 	if (!(is_reg_const(reg, false) && val < max)) {
9903 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9904 		return 0;
9905 	}
9906 
9907 	err = mark_chain_precision(env, BPF_REG_3);
9908 	if (err)
9909 		return err;
9910 	if (bpf_map_key_unseen(aux))
9911 		bpf_map_key_store(aux, val);
9912 	else if (!bpf_map_key_poisoned(aux) &&
9913 		  bpf_map_key_immediate(aux) != val)
9914 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9915 	return 0;
9916 }
9917 
9918 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
9919 {
9920 	struct bpf_verifier_state *state = env->cur_state;
9921 	enum bpf_prog_type type = resolve_prog_type(env->prog);
9922 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
9923 	bool refs_lingering = false;
9924 	int i;
9925 
9926 	if (!exception_exit && cur_func(env)->frameno)
9927 		return 0;
9928 
9929 	for (i = 0; i < state->acquired_refs; i++) {
9930 		if (state->refs[i].type != REF_TYPE_PTR)
9931 			continue;
9932 		/* Allow struct_ops programs to return a referenced kptr back to
9933 		 * kernel. Type checks are performed later in check_return_code.
9934 		 */
9935 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
9936 		    reg->id == state->refs[i].id)
9937 			continue;
9938 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9939 			state->refs[i].id, state->refs[i].insn_idx);
9940 		refs_lingering = true;
9941 	}
9942 	return refs_lingering ? -EINVAL : 0;
9943 }
9944 
9945 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
9946 {
9947 	int err;
9948 
9949 	if (check_lock && env->cur_state->active_locks) {
9950 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
9951 		return -EINVAL;
9952 	}
9953 
9954 	err = check_reference_leak(env, exception_exit);
9955 	if (err) {
9956 		verbose(env, "%s would lead to reference leak\n", prefix);
9957 		return err;
9958 	}
9959 
9960 	if (check_lock && env->cur_state->active_irq_id) {
9961 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
9962 		return -EINVAL;
9963 	}
9964 
9965 	if (check_lock && env->cur_state->active_rcu_locks) {
9966 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
9967 		return -EINVAL;
9968 	}
9969 
9970 	if (check_lock && env->cur_state->active_preempt_locks) {
9971 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
9972 		return -EINVAL;
9973 	}
9974 
9975 	return 0;
9976 }
9977 
9978 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9979 				   struct bpf_reg_state *regs)
9980 {
9981 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9982 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9983 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9984 	struct bpf_bprintf_data data = {};
9985 	int err, fmt_map_off, num_args;
9986 	u64 fmt_addr;
9987 	char *fmt;
9988 
9989 	/* data must be an array of u64 */
9990 	if (data_len_reg->var_off.value % 8)
9991 		return -EINVAL;
9992 	num_args = data_len_reg->var_off.value / 8;
9993 
9994 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
9995 	 * and map_direct_value_addr is set.
9996 	 */
9997 	fmt_map_off = fmt_reg->var_off.value;
9998 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
9999 						  fmt_map_off);
10000 	if (err) {
10001 		verbose(env, "failed to retrieve map value address\n");
10002 		return -EFAULT;
10003 	}
10004 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10005 
10006 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10007 	 * can focus on validating the format specifiers.
10008 	 */
10009 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10010 	if (err < 0)
10011 		verbose(env, "Invalid format string\n");
10012 
10013 	return err;
10014 }
10015 
10016 static int check_get_func_ip(struct bpf_verifier_env *env)
10017 {
10018 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10019 	int func_id = BPF_FUNC_get_func_ip;
10020 
10021 	if (type == BPF_PROG_TYPE_TRACING) {
10022 		if (!bpf_prog_has_trampoline(env->prog)) {
10023 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10024 				func_id_name(func_id), func_id);
10025 			return -ENOTSUPP;
10026 		}
10027 		return 0;
10028 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10029 		return 0;
10030 	}
10031 
10032 	verbose(env, "func %s#%d not supported for program type %d\n",
10033 		func_id_name(func_id), func_id, type);
10034 	return -ENOTSUPP;
10035 }
10036 
10037 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10038 {
10039 	return &env->insn_aux_data[env->insn_idx];
10040 }
10041 
10042 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10043 {
10044 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10045 	bool reg_is_null = bpf_register_is_null(reg);
10046 
10047 	if (reg_is_null)
10048 		mark_chain_precision(env, BPF_REG_4);
10049 
10050 	return reg_is_null;
10051 }
10052 
10053 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10054 {
10055 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10056 
10057 	if (!state->initialized) {
10058 		state->initialized = 1;
10059 		state->fit_for_inline = loop_flag_is_zero(env);
10060 		state->callback_subprogno = subprogno;
10061 		return;
10062 	}
10063 
10064 	if (!state->fit_for_inline)
10065 		return;
10066 
10067 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10068 				 state->callback_subprogno == subprogno);
10069 }
10070 
10071 /* Returns whether or not the given map can potentially elide
10072  * lookup return value nullness check. This is possible if the key
10073  * is statically known.
10074  */
10075 static bool can_elide_value_nullness(const struct bpf_map *map)
10076 {
10077 	if (map->map_flags & BPF_F_INNER_MAP)
10078 		return false;
10079 
10080 	switch (map->map_type) {
10081 	case BPF_MAP_TYPE_ARRAY:
10082 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10083 		return true;
10084 	default:
10085 		return false;
10086 	}
10087 }
10088 
10089 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10090 			 const struct bpf_func_proto **ptr)
10091 {
10092 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10093 		return -ERANGE;
10094 
10095 	if (!env->ops->get_func_proto)
10096 		return -EINVAL;
10097 
10098 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10099 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10100 }
10101 
10102 /* Check if we're in a sleepable context. */
10103 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10104 {
10105 	return !env->cur_state->active_rcu_locks &&
10106 	       !env->cur_state->active_preempt_locks &&
10107 	       !env->cur_state->active_locks &&
10108 	       !env->cur_state->active_irq_id &&
10109 	       in_sleepable(env);
10110 }
10111 
10112 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10113 {
10114 	if (env->cur_state->active_rcu_locks)
10115 		return "rcu_read_lock region";
10116 	if (env->cur_state->active_preempt_locks)
10117 		return "non-preemptible region";
10118 	if (env->cur_state->active_irq_id)
10119 		return "IRQ-disabled region";
10120 	if (env->cur_state->active_locks)
10121 		return "lock region";
10122 	return "non-sleepable prog";
10123 }
10124 
10125 static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
10126 		       bool convert_rcu, bool release_dynptr)
10127 {
10128 	int err = -EINVAL;
10129 
10130 	if (bpf_register_is_null(reg))
10131 		return 0;
10132 
10133 	if (release_dynptr)
10134 		err = unmark_stack_slots_dynptr(env, reg);
10135 	else if (convert_rcu)
10136 		err = ref_convert_alloc_rcu_protected(env, reg->id);
10137 	else if (reg_is_referenced(env, reg))
10138 		err = release_reference(env, reg->id);
10139 
10140 	return err;
10141 }
10142 
10143 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10144 			     int *insn_idx_p)
10145 {
10146 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10147 	bool returns_cpu_specific_alloc_ptr = false;
10148 	const struct bpf_func_proto *fn = NULL;
10149 	enum bpf_return_type ret_type;
10150 	enum bpf_type_flag ret_flag;
10151 	struct bpf_reg_state *regs;
10152 	struct bpf_call_arg_meta meta;
10153 	int insn_idx = *insn_idx_p;
10154 	bool changes_data;
10155 	int i, err, func_id;
10156 
10157 	/* find function prototype */
10158 	func_id = insn->imm;
10159 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10160 	if (err == -ERANGE) {
10161 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10162 		return -EINVAL;
10163 	}
10164 
10165 	if (err) {
10166 		verbose(env, "program of this type cannot use helper %s#%d\n",
10167 			func_id_name(func_id), func_id);
10168 		return err;
10169 	}
10170 
10171 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10172 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10173 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10174 		return -EINVAL;
10175 	}
10176 
10177 	if (fn->allowed && !fn->allowed(env->prog)) {
10178 		verbose(env, "helper call is not allowed in probe\n");
10179 		return -EINVAL;
10180 	}
10181 
10182 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10183 	changes_data = bpf_helper_changes_pkt_data(func_id);
10184 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10185 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10186 		return -EFAULT;
10187 	}
10188 
10189 	memset(&meta, 0, sizeof(meta));
10190 	meta.pkt_access = fn->pkt_access;
10191 
10192 	err = check_func_proto(fn, &meta);
10193 	if (err) {
10194 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10195 		return err;
10196 	}
10197 
10198 	if (fn->might_sleep && !in_sleepable_context(env)) {
10199 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10200 			non_sleepable_context_description(env));
10201 		return -EINVAL;
10202 	}
10203 
10204 	/* Track non-sleepable context for helpers. */
10205 	if (!in_sleepable_context(env))
10206 		env->insn_aux_data[insn_idx].non_sleepable = true;
10207 
10208 	meta.func_id = func_id;
10209 	/* check args */
10210 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10211 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10212 		if (err)
10213 			return err;
10214 	}
10215 
10216 	err = record_func_map(env, &meta, func_id, insn_idx);
10217 	if (err)
10218 		return err;
10219 
10220 	err = record_func_key(env, &meta, func_id, insn_idx);
10221 	if (err)
10222 		return err;
10223 
10224 	regs = cur_regs(env);
10225 
10226 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10227 	 * is inferred from register state.
10228 	 */
10229 	for (i = 0; i < meta.access_size; i++) {
10230 		err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B,
10231 				       BPF_WRITE, -1, false, false);
10232 		if (err)
10233 			return err;
10234 	}
10235 
10236 	if (meta.release_regno) {
10237 		struct bpf_reg_state *reg = &regs[meta.release_regno];
10238 		bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) &&
10239 				   (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU);
10240 
10241 		err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id);
10242 		if (err)
10243 			return err;
10244 	}
10245 
10246 	switch (func_id) {
10247 	case BPF_FUNC_tail_call:
10248 		err = check_resource_leak(env, false, true, "tail_call");
10249 		if (err)
10250 			return err;
10251 		break;
10252 	case BPF_FUNC_get_local_storage:
10253 		/* check that flags argument in get_local_storage(map, flags) is 0,
10254 		 * this is required because get_local_storage() can't return an error.
10255 		 */
10256 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10257 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10258 			return -EINVAL;
10259 		}
10260 		break;
10261 	case BPF_FUNC_for_each_map_elem:
10262 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10263 					 set_map_elem_callback_state);
10264 		break;
10265 	case BPF_FUNC_timer_set_callback:
10266 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10267 					 set_timer_callback_state);
10268 		break;
10269 	case BPF_FUNC_find_vma:
10270 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10271 					 set_find_vma_callback_state);
10272 		break;
10273 	case BPF_FUNC_snprintf:
10274 		err = check_bpf_snprintf_call(env, regs);
10275 		break;
10276 	case BPF_FUNC_loop:
10277 		update_loop_inline_state(env, meta.subprogno);
10278 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10279 		 * is finished, thus mark it precise.
10280 		 */
10281 		err = mark_chain_precision(env, BPF_REG_1);
10282 		if (err)
10283 			return err;
10284 		if (cur_func(env)->callback_depth < reg_umax(&regs[BPF_REG_1])) {
10285 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10286 						 set_loop_callback_state);
10287 		} else {
10288 			cur_func(env)->callback_depth = 0;
10289 			if (env->log.level & BPF_LOG_LEVEL2)
10290 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10291 					env->cur_state->curframe);
10292 		}
10293 		break;
10294 	case BPF_FUNC_dynptr_from_mem:
10295 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10296 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10297 				reg_type_str(env, regs[BPF_REG_1].type));
10298 			return -EACCES;
10299 		}
10300 		break;
10301 	case BPF_FUNC_set_retval:
10302 	{
10303 		struct bpf_retval_range range = {
10304 			.minval = -MAX_ERRNO,
10305 			.maxval = 0,
10306 			.return_32bit = true
10307 		};
10308 		struct bpf_reg_state *r1 = &regs[BPF_REG_1];
10309 
10310 		if (r1->type != SCALAR_VALUE) {
10311 			verbose(env, "R1 is not a scalar\n");
10312 			return -EINVAL;
10313 		}
10314 
10315 		/* CGROUP_GETSOCKOPT is allowed to return arbitrary value */
10316 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
10317 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
10318 			break;
10319 
10320 		if (prog_type == BPF_PROG_TYPE_LSM &&
10321 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10322 			if (!env->prog->aux->attach_func_proto->type) {
10323 				/* Make sure programs that attach to void
10324 				 * hooks don't try to modify return value.
10325 				 */
10326 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10327 				return -EINVAL;
10328 			}
10329 			bpf_lsm_get_retval_range(env->prog, &range);
10330 		}
10331 
10332 		err = mark_chain_precision(env, BPF_REG_1);
10333 		if (err)
10334 			return err;
10335 
10336 		if (!retval_range_within(range, r1)) {
10337 			verbose_invalid_scalar(env, r1, range, "At bpf_set_retval", "R1");
10338 			return -EINVAL;
10339 		}
10340 
10341 		break;
10342 	}
10343 	case BPF_FUNC_dynptr_write:
10344 	{
10345 		enum bpf_dynptr_type dynptr_type = meta.dynptr.type;
10346 
10347 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10348 			return -EFAULT;
10349 
10350 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10351 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10352 			/* this will trigger clear_all_pkt_pointers(), which will
10353 			 * invalidate all dynptr slices associated with the skb
10354 			 */
10355 			changes_data = true;
10356 
10357 		break;
10358 	}
10359 	case BPF_FUNC_per_cpu_ptr:
10360 	case BPF_FUNC_this_cpu_ptr:
10361 	{
10362 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10363 		const struct btf_type *type;
10364 
10365 		if (reg->type & MEM_RCU) {
10366 			type = btf_type_by_id(reg->btf, reg->btf_id);
10367 			if (!type || !btf_type_is_struct(type)) {
10368 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10369 				return -EFAULT;
10370 			}
10371 			returns_cpu_specific_alloc_ptr = true;
10372 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10373 		}
10374 		break;
10375 	}
10376 	case BPF_FUNC_user_ringbuf_drain:
10377 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10378 					 set_user_ringbuf_callback_state);
10379 		break;
10380 	}
10381 
10382 	if (err)
10383 		return err;
10384 
10385 	/* reset caller saved regs */
10386 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10387 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10388 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10389 	}
10390 	invalidate_outgoing_stack_args(env, cur_func(env));
10391 
10392 	/* helper call returns 64-bit value. */
10393 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10394 
10395 	/* update return register (already marked as written above) */
10396 	ret_type = fn->ret_type;
10397 	ret_flag = type_flag(ret_type);
10398 
10399 	switch (base_type(ret_type)) {
10400 	case RET_INTEGER:
10401 		/* sets type to SCALAR_VALUE */
10402 		mark_reg_unknown(env, regs, BPF_REG_0);
10403 		break;
10404 	case RET_VOID:
10405 		regs[BPF_REG_0].type = NOT_INIT;
10406 		break;
10407 	case RET_PTR_TO_MAP_VALUE:
10408 		/* There is no offset yet applied, variable or fixed */
10409 		mark_reg_known_zero(env, regs, BPF_REG_0);
10410 		/* remember map_ptr, so that check_map_access()
10411 		 * can check 'value_size' boundary of memory access
10412 		 * to map element returned from bpf_map_lookup_elem()
10413 		 */
10414 		if (meta.map.ptr == NULL) {
10415 			verifier_bug(env, "unexpected null map_ptr");
10416 			return -EFAULT;
10417 		}
10418 
10419 		if (func_id == BPF_FUNC_map_lookup_elem &&
10420 		    can_elide_value_nullness(meta.map.ptr) &&
10421 		    meta.const_map_key >= 0 &&
10422 		    meta.const_map_key < meta.map.ptr->max_entries)
10423 			ret_flag &= ~PTR_MAYBE_NULL;
10424 
10425 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10426 		regs[BPF_REG_0].map_uid = meta.map.uid;
10427 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10428 		if (!type_may_be_null(ret_flag) &&
10429 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10430 			regs[BPF_REG_0].id = ++env->id_gen;
10431 		}
10432 		break;
10433 	case RET_PTR_TO_SOCKET:
10434 		mark_reg_known_zero(env, regs, BPF_REG_0);
10435 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10436 		break;
10437 	case RET_PTR_TO_SOCK_COMMON:
10438 		mark_reg_known_zero(env, regs, BPF_REG_0);
10439 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10440 		break;
10441 	case RET_PTR_TO_TCP_SOCK:
10442 		mark_reg_known_zero(env, regs, BPF_REG_0);
10443 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10444 		break;
10445 	case RET_PTR_TO_MEM:
10446 		mark_reg_known_zero(env, regs, BPF_REG_0);
10447 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10448 		regs[BPF_REG_0].mem_size = meta.mem_size;
10449 		break;
10450 	case RET_PTR_TO_MEM_OR_BTF_ID:
10451 	{
10452 		const struct btf_type *t;
10453 
10454 		mark_reg_known_zero(env, regs, BPF_REG_0);
10455 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10456 		if (!btf_type_is_struct(t)) {
10457 			u32 tsize;
10458 			const struct btf_type *ret;
10459 			const char *tname;
10460 
10461 			/* resolve the type size of ksym. */
10462 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10463 			if (IS_ERR(ret)) {
10464 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10465 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10466 					tname, PTR_ERR(ret));
10467 				return -EINVAL;
10468 			}
10469 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10470 			regs[BPF_REG_0].mem_size = tsize;
10471 		} else {
10472 			if (returns_cpu_specific_alloc_ptr) {
10473 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10474 			} else {
10475 				/* MEM_RDONLY may be carried from ret_flag, but it
10476 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10477 				 * it will confuse the check of PTR_TO_BTF_ID in
10478 				 * check_mem_access().
10479 				 */
10480 				ret_flag &= ~MEM_RDONLY;
10481 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10482 			}
10483 
10484 			regs[BPF_REG_0].btf = meta.ret_btf;
10485 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10486 		}
10487 		break;
10488 	}
10489 	case RET_PTR_TO_BTF_ID:
10490 	{
10491 		struct btf *ret_btf;
10492 		int ret_btf_id;
10493 
10494 		mark_reg_known_zero(env, regs, BPF_REG_0);
10495 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10496 		if (func_id == BPF_FUNC_kptr_xchg) {
10497 			ret_btf = meta.kptr_field->kptr.btf;
10498 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10499 			if (!btf_is_kernel(ret_btf)) {
10500 				regs[BPF_REG_0].type |= MEM_ALLOC;
10501 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10502 					regs[BPF_REG_0].type |= MEM_PERCPU;
10503 			}
10504 		} else {
10505 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10506 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10507 					     func_id_name(func_id));
10508 				return -EFAULT;
10509 			}
10510 			ret_btf = btf_vmlinux;
10511 			ret_btf_id = *fn->ret_btf_id;
10512 		}
10513 		if (ret_btf_id == 0) {
10514 			verbose(env, "invalid return type %u of func %s#%d\n",
10515 				base_type(ret_type), func_id_name(func_id),
10516 				func_id);
10517 			return -EINVAL;
10518 		}
10519 		regs[BPF_REG_0].btf = ret_btf;
10520 		regs[BPF_REG_0].btf_id = ret_btf_id;
10521 		break;
10522 	}
10523 	default:
10524 		verbose(env, "unknown return type %u of func %s#%d\n",
10525 			base_type(ret_type), func_id_name(func_id), func_id);
10526 		return -EINVAL;
10527 	}
10528 
10529 	if (type_may_be_null(regs[BPF_REG_0].type))
10530 		regs[BPF_REG_0].id = ++env->id_gen;
10531 
10532 	if (is_ptr_cast_function(func_id) &&
10533 	    find_reference_state(env->cur_state, meta.ref_obj.id)) {
10534 		struct bpf_verifier_state *branch;
10535 		struct bpf_reg_state *r0;
10536 
10537 		err = validate_ref_obj(env, &meta.ref_obj);
10538 		if (err)
10539 			return err;
10540 
10541 		/*
10542 		 * In order for a release of any of the original or cast pointers
10543 		 * to invalidate all other pointers, reuse the same reference id for
10544 		 * the cast result.
10545 		 * This reference id can't be used for nullness propagation,
10546 		 * as cast might return NULL for a non-NULL input.
10547 		 * Hence, explore the NULL case as a separate branch.
10548 		 */
10549 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10550 		if (IS_ERR(branch))
10551 			return PTR_ERR(branch);
10552 
10553 		r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0];
10554 		__mark_reg_known_zero(r0);
10555 		r0->type = SCALAR_VALUE;
10556 
10557 		regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL;
10558 		regs[BPF_REG_0].id = meta.ref_obj.id;
10559 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10560 		int id = acquire_reference(env, insn_idx, 0);
10561 
10562 		if (id < 0)
10563 			return id;
10564 
10565 		regs[BPF_REG_0].id = id;
10566 	}
10567 
10568 	if (func_id == BPF_FUNC_dynptr_data)
10569 		regs[BPF_REG_0].parent_id = meta.dynptr.id;
10570 
10571 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10572 	if (err)
10573 		return err;
10574 
10575 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10576 	if (err)
10577 		return err;
10578 
10579 	if ((func_id == BPF_FUNC_get_stack ||
10580 	     func_id == BPF_FUNC_get_task_stack) &&
10581 	    !env->prog->has_callchain_buf) {
10582 		const char *err_str;
10583 
10584 #ifdef CONFIG_PERF_EVENTS
10585 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10586 		err_str = "cannot get callchain buffer for func %s#%d\n";
10587 #else
10588 		err = -ENOTSUPP;
10589 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10590 #endif
10591 		if (err) {
10592 			verbose(env, err_str, func_id_name(func_id), func_id);
10593 			return err;
10594 		}
10595 
10596 		env->prog->has_callchain_buf = true;
10597 	}
10598 
10599 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10600 		env->prog->call_get_stack = true;
10601 
10602 	if (func_id == BPF_FUNC_get_func_ip) {
10603 		if (check_get_func_ip(env))
10604 			return -ENOTSUPP;
10605 		env->prog->call_get_func_ip = true;
10606 	}
10607 
10608 	if (func_id == BPF_FUNC_tail_call) {
10609 		if (env->cur_state->curframe) {
10610 			struct bpf_verifier_state *branch;
10611 
10612 			mark_reg_scratched(env, BPF_REG_0);
10613 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10614 			if (IS_ERR(branch))
10615 				return PTR_ERR(branch);
10616 			clear_all_pkt_pointers(env);
10617 			mark_reg_unknown(env, regs, BPF_REG_0);
10618 			err = prepare_func_exit(env, &env->insn_idx);
10619 			if (err)
10620 				return err;
10621 			env->insn_idx--;
10622 		} else {
10623 			changes_data = false;
10624 		}
10625 	}
10626 
10627 	if (changes_data)
10628 		clear_all_pkt_pointers(env);
10629 	return 0;
10630 }
10631 
10632 /* mark_btf_func_reg_size() is used when the reg size is determined by
10633  * the BTF func_proto's return value size and argument.
10634  */
10635 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10636 				     u32 regno, size_t reg_size)
10637 {
10638 	struct bpf_reg_state *reg = &regs[regno];
10639 
10640 	if (regno == BPF_REG_0) {
10641 		/* Function return value */
10642 		reg->subreg_def = reg_size == sizeof(u64) ?
10643 			DEF_NOT_SUBREG : env->insn_idx + 1;
10644 	} else if (reg_size == sizeof(u64)) {
10645 		/* Function argument */
10646 		mark_insn_zext(env, reg);
10647 	}
10648 }
10649 
10650 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10651 				   size_t reg_size)
10652 {
10653 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10654 }
10655 
10656 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10657 {
10658 	return meta->kfunc_flags & KF_ACQUIRE;
10659 }
10660 
10661 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10662 {
10663 	return meta->kfunc_flags & KF_RELEASE;
10664 }
10665 
10666 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10667 {
10668 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10669 }
10670 
10671 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10672 {
10673 	return meta->kfunc_flags & KF_RCU;
10674 }
10675 
10676 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10677 {
10678 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10679 }
10680 
10681 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10682 				  const struct btf_param *arg,
10683 				  const struct bpf_reg_state *reg)
10684 {
10685 	const struct btf_type *t;
10686 
10687 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10688 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10689 		return false;
10690 
10691 	return btf_param_match_suffix(btf, arg, "__sz");
10692 }
10693 
10694 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10695 					const struct btf_param *arg,
10696 					const struct bpf_reg_state *reg)
10697 {
10698 	const struct btf_type *t;
10699 
10700 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10701 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10702 		return false;
10703 
10704 	return btf_param_match_suffix(btf, arg, "__szk");
10705 }
10706 
10707 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10708 {
10709 	return btf_param_match_suffix(btf, arg, "__k");
10710 }
10711 
10712 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10713 {
10714 	return btf_param_match_suffix(btf, arg, "__ign");
10715 }
10716 
10717 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10718 {
10719 	return btf_param_match_suffix(btf, arg, "__map");
10720 }
10721 
10722 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10723 {
10724 	return btf_param_match_suffix(btf, arg, "__alloc");
10725 }
10726 
10727 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10728 {
10729 	return btf_param_match_suffix(btf, arg, "__uninit");
10730 }
10731 
10732 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10733 {
10734 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10735 }
10736 
10737 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10738 {
10739 	return btf_param_match_suffix(btf, arg, "__nullable");
10740 }
10741 
10742 static bool is_kfunc_arg_nonown_allowed(const struct btf *btf, const struct btf_param *arg)
10743 {
10744 	return btf_param_match_suffix(btf, arg, "__nonown_allowed");
10745 }
10746 
10747 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10748 {
10749 	return btf_param_match_suffix(btf, arg, "__str");
10750 }
10751 
10752 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10753 {
10754 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10755 }
10756 
10757 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10758 					  const struct btf_param *arg,
10759 					  const char *name)
10760 {
10761 	int len, target_len = strlen(name);
10762 	const char *param_name;
10763 
10764 	param_name = btf_name_by_offset(btf, arg->name_off);
10765 	if (str_is_empty(param_name))
10766 		return false;
10767 	len = strlen(param_name);
10768 	if (len != target_len)
10769 		return false;
10770 	if (strcmp(param_name, name))
10771 		return false;
10772 
10773 	return true;
10774 }
10775 
10776 enum {
10777 	KF_ARG_DYNPTR_ID,
10778 	KF_ARG_LIST_HEAD_ID,
10779 	KF_ARG_LIST_NODE_ID,
10780 	KF_ARG_RB_ROOT_ID,
10781 	KF_ARG_RB_NODE_ID,
10782 	KF_ARG_WORKQUEUE_ID,
10783 	KF_ARG_RES_SPIN_LOCK_ID,
10784 	KF_ARG_TASK_WORK_ID,
10785 	KF_ARG_PROG_AUX_ID,
10786 	KF_ARG_TIMER_ID
10787 };
10788 
10789 BTF_ID_LIST(kf_arg_btf_ids)
10790 BTF_ID(struct, bpf_dynptr)
10791 BTF_ID(struct, bpf_list_head)
10792 BTF_ID(struct, bpf_list_node)
10793 BTF_ID(struct, bpf_rb_root)
10794 BTF_ID(struct, bpf_rb_node)
10795 BTF_ID(struct, bpf_wq)
10796 BTF_ID(struct, bpf_res_spin_lock)
10797 BTF_ID(struct, bpf_task_work)
10798 BTF_ID(struct, bpf_prog_aux)
10799 BTF_ID(struct, bpf_timer)
10800 
10801 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10802 				    const struct btf_param *arg, int type)
10803 {
10804 	const struct btf_type *t;
10805 	u32 res_id;
10806 
10807 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10808 	if (!t)
10809 		return false;
10810 	if (!btf_type_is_ptr(t))
10811 		return false;
10812 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10813 	if (!t)
10814 		return false;
10815 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10816 }
10817 
10818 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10819 {
10820 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10821 }
10822 
10823 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10824 {
10825 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10826 }
10827 
10828 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10829 {
10830 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10831 }
10832 
10833 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10834 {
10835 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10836 }
10837 
10838 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10839 {
10840 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10841 }
10842 
10843 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10844 {
10845 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10846 }
10847 
10848 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10849 {
10850 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10851 }
10852 
10853 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10854 {
10855 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10856 }
10857 
10858 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10859 {
10860 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
10861 }
10862 
10863 static bool is_rbtree_node_type(const struct btf_type *t)
10864 {
10865 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
10866 }
10867 
10868 static bool is_list_node_type(const struct btf_type *t)
10869 {
10870 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
10871 }
10872 
10873 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10874 				  const struct btf_param *arg)
10875 {
10876 	const struct btf_type *t;
10877 
10878 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10879 	if (!t)
10880 		return false;
10881 
10882 	return true;
10883 }
10884 
10885 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
10886 {
10887 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
10888 }
10889 
10890 /*
10891  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
10892  *   - the _impl prototype with full arg list (meta->func_proto)
10893  *   - the BPF API prototype w/o implicit args (func->type in BTF)
10894  * To determine whether an argument is implicit, we compare its position
10895  * against the number of arguments in the prototype w/o implicit args.
10896  */
10897 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
10898 {
10899 	const struct btf_type *func, *func_proto;
10900 	u32 argn;
10901 
10902 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
10903 		return false;
10904 
10905 	func = btf_type_by_id(meta->btf, meta->func_id);
10906 	func_proto = btf_type_by_id(meta->btf, func->type);
10907 	argn = btf_type_vlen(func_proto);
10908 
10909 	return argn <= arg_idx;
10910 }
10911 
10912 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10913 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10914 					const struct btf *btf,
10915 					const struct btf_type *t, int rec)
10916 {
10917 	const struct btf_type *member_type;
10918 	const struct btf_member *member;
10919 	u32 i;
10920 
10921 	if (!btf_type_is_struct(t))
10922 		return false;
10923 
10924 	for_each_member(i, t, member) {
10925 		const struct btf_array *array;
10926 
10927 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10928 		if (btf_type_is_struct(member_type)) {
10929 			if (rec >= 3) {
10930 				verbose(env, "max struct nesting depth exceeded\n");
10931 				return false;
10932 			}
10933 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10934 				return false;
10935 			continue;
10936 		}
10937 		if (btf_type_is_array(member_type)) {
10938 			array = btf_array(member_type);
10939 			if (!array->nelems)
10940 				return false;
10941 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10942 			if (!btf_type_is_scalar(member_type))
10943 				return false;
10944 			continue;
10945 		}
10946 		if (!btf_type_is_scalar(member_type))
10947 			return false;
10948 	}
10949 	return true;
10950 }
10951 
10952 enum kfunc_ptr_arg_type {
10953 	KF_ARG_PTR_TO_CTX,
10954 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10955 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10956 	KF_ARG_PTR_TO_DYNPTR,
10957 	KF_ARG_PTR_TO_ITER,
10958 	KF_ARG_PTR_TO_LIST_HEAD,
10959 	KF_ARG_PTR_TO_LIST_NODE,
10960 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
10961 	KF_ARG_PTR_TO_MEM,
10962 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
10963 	KF_ARG_PTR_TO_CALLBACK,
10964 	KF_ARG_PTR_TO_RB_ROOT,
10965 	KF_ARG_PTR_TO_RB_NODE,
10966 	KF_ARG_PTR_TO_NULL,
10967 	KF_ARG_PTR_TO_CONST_STR,
10968 	KF_ARG_PTR_TO_MAP,
10969 	KF_ARG_PTR_TO_TIMER,
10970 	KF_ARG_PTR_TO_WORKQUEUE,
10971 	KF_ARG_PTR_TO_IRQ_FLAG,
10972 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
10973 	KF_ARG_PTR_TO_TASK_WORK,
10974 };
10975 
10976 enum special_kfunc_type {
10977 	KF_bpf_obj_new_impl,
10978 	KF_bpf_obj_new,
10979 	KF_bpf_obj_drop_impl,
10980 	KF_bpf_obj_drop,
10981 	KF_bpf_refcount_acquire_impl,
10982 	KF_bpf_refcount_acquire,
10983 	KF_bpf_list_push_front_impl,
10984 	KF_bpf_list_push_front,
10985 	KF_bpf_list_push_back_impl,
10986 	KF_bpf_list_push_back,
10987 	KF_bpf_list_add,
10988 	KF_bpf_list_pop_front,
10989 	KF_bpf_list_pop_back,
10990 	KF_bpf_list_del,
10991 	KF_bpf_list_front,
10992 	KF_bpf_list_back,
10993 	KF_bpf_list_is_first,
10994 	KF_bpf_list_is_last,
10995 	KF_bpf_list_empty,
10996 	KF_bpf_cast_to_kern_ctx,
10997 	KF_bpf_rdonly_cast,
10998 	KF_bpf_rcu_read_lock,
10999 	KF_bpf_rcu_read_unlock,
11000 	KF_bpf_rbtree_remove,
11001 	KF_bpf_rbtree_add_impl,
11002 	KF_bpf_rbtree_add,
11003 	KF_bpf_rbtree_first,
11004 	KF_bpf_rbtree_root,
11005 	KF_bpf_rbtree_left,
11006 	KF_bpf_rbtree_right,
11007 	KF_bpf_dynptr_from_skb,
11008 	KF_bpf_dynptr_from_xdp,
11009 	KF_bpf_dynptr_from_skb_meta,
11010 	KF_bpf_xdp_pull_data,
11011 	KF_bpf_dynptr_slice,
11012 	KF_bpf_dynptr_slice_rdwr,
11013 	KF_bpf_dynptr_clone,
11014 	KF_bpf_percpu_obj_new_impl,
11015 	KF_bpf_percpu_obj_new,
11016 	KF_bpf_percpu_obj_drop_impl,
11017 	KF_bpf_percpu_obj_drop,
11018 	KF_bpf_throw,
11019 	KF_bpf_wq_set_callback,
11020 	KF_bpf_preempt_disable,
11021 	KF_bpf_preempt_enable,
11022 	KF_bpf_iter_css_task_new,
11023 	KF_bpf_session_cookie,
11024 	KF_bpf_get_kmem_cache,
11025 	KF_bpf_local_irq_save,
11026 	KF_bpf_local_irq_restore,
11027 	KF_bpf_iter_num_new,
11028 	KF_bpf_iter_num_next,
11029 	KF_bpf_iter_num_destroy,
11030 	KF_bpf_set_dentry_xattr,
11031 	KF_bpf_remove_dentry_xattr,
11032 	KF_bpf_res_spin_lock,
11033 	KF_bpf_res_spin_unlock,
11034 	KF_bpf_res_spin_lock_irqsave,
11035 	KF_bpf_res_spin_unlock_irqrestore,
11036 	KF_bpf_dynptr_from_file,
11037 	KF_bpf_dynptr_file_discard,
11038 	KF___bpf_trap,
11039 	KF_bpf_task_work_schedule_signal,
11040 	KF_bpf_task_work_schedule_resume,
11041 	KF_bpf_arena_alloc_pages,
11042 	KF_bpf_arena_free_pages,
11043 	KF_bpf_arena_reserve_pages,
11044 	KF_bpf_session_is_return,
11045 	KF_bpf_stream_vprintk,
11046 	KF_bpf_stream_print_stack,
11047 };
11048 
11049 BTF_ID_LIST(special_kfunc_list)
11050 BTF_ID(func, bpf_obj_new_impl)
11051 BTF_ID(func, bpf_obj_new)
11052 BTF_ID(func, bpf_obj_drop_impl)
11053 BTF_ID(func, bpf_obj_drop)
11054 BTF_ID(func, bpf_refcount_acquire_impl)
11055 BTF_ID(func, bpf_refcount_acquire)
11056 BTF_ID(func, bpf_list_push_front_impl)
11057 BTF_ID(func, bpf_list_push_front)
11058 BTF_ID(func, bpf_list_push_back_impl)
11059 BTF_ID(func, bpf_list_push_back)
11060 BTF_ID(func, bpf_list_add)
11061 BTF_ID(func, bpf_list_pop_front)
11062 BTF_ID(func, bpf_list_pop_back)
11063 BTF_ID(func, bpf_list_del)
11064 BTF_ID(func, bpf_list_front)
11065 BTF_ID(func, bpf_list_back)
11066 BTF_ID(func, bpf_list_is_first)
11067 BTF_ID(func, bpf_list_is_last)
11068 BTF_ID(func, bpf_list_empty)
11069 BTF_ID(func, bpf_cast_to_kern_ctx)
11070 BTF_ID(func, bpf_rdonly_cast)
11071 BTF_ID(func, bpf_rcu_read_lock)
11072 BTF_ID(func, bpf_rcu_read_unlock)
11073 BTF_ID(func, bpf_rbtree_remove)
11074 BTF_ID(func, bpf_rbtree_add_impl)
11075 BTF_ID(func, bpf_rbtree_add)
11076 BTF_ID(func, bpf_rbtree_first)
11077 BTF_ID(func, bpf_rbtree_root)
11078 BTF_ID(func, bpf_rbtree_left)
11079 BTF_ID(func, bpf_rbtree_right)
11080 #ifdef CONFIG_NET
11081 BTF_ID(func, bpf_dynptr_from_skb)
11082 BTF_ID(func, bpf_dynptr_from_xdp)
11083 BTF_ID(func, bpf_dynptr_from_skb_meta)
11084 BTF_ID(func, bpf_xdp_pull_data)
11085 #else
11086 BTF_ID_UNUSED
11087 BTF_ID_UNUSED
11088 BTF_ID_UNUSED
11089 BTF_ID_UNUSED
11090 #endif
11091 BTF_ID(func, bpf_dynptr_slice)
11092 BTF_ID(func, bpf_dynptr_slice_rdwr)
11093 BTF_ID(func, bpf_dynptr_clone)
11094 BTF_ID(func, bpf_percpu_obj_new_impl)
11095 BTF_ID(func, bpf_percpu_obj_new)
11096 BTF_ID(func, bpf_percpu_obj_drop_impl)
11097 BTF_ID(func, bpf_percpu_obj_drop)
11098 BTF_ID(func, bpf_throw)
11099 BTF_ID(func, bpf_wq_set_callback)
11100 BTF_ID(func, bpf_preempt_disable)
11101 BTF_ID(func, bpf_preempt_enable)
11102 #ifdef CONFIG_CGROUPS
11103 BTF_ID(func, bpf_iter_css_task_new)
11104 #else
11105 BTF_ID_UNUSED
11106 #endif
11107 #ifdef CONFIG_BPF_EVENTS
11108 BTF_ID(func, bpf_session_cookie)
11109 #else
11110 BTF_ID_UNUSED
11111 #endif
11112 BTF_ID(func, bpf_get_kmem_cache)
11113 BTF_ID(func, bpf_local_irq_save)
11114 BTF_ID(func, bpf_local_irq_restore)
11115 BTF_ID(func, bpf_iter_num_new)
11116 BTF_ID(func, bpf_iter_num_next)
11117 BTF_ID(func, bpf_iter_num_destroy)
11118 #ifdef CONFIG_BPF_LSM
11119 BTF_ID(func, bpf_set_dentry_xattr)
11120 BTF_ID(func, bpf_remove_dentry_xattr)
11121 #else
11122 BTF_ID_UNUSED
11123 BTF_ID_UNUSED
11124 #endif
11125 BTF_ID(func, bpf_res_spin_lock)
11126 BTF_ID(func, bpf_res_spin_unlock)
11127 BTF_ID(func, bpf_res_spin_lock_irqsave)
11128 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11129 BTF_ID(func, bpf_dynptr_from_file)
11130 BTF_ID(func, bpf_dynptr_file_discard)
11131 BTF_ID(func, __bpf_trap)
11132 BTF_ID(func, bpf_task_work_schedule_signal)
11133 BTF_ID(func, bpf_task_work_schedule_resume)
11134 BTF_ID(func, bpf_arena_alloc_pages)
11135 BTF_ID(func, bpf_arena_free_pages)
11136 BTF_ID(func, bpf_arena_reserve_pages)
11137 #ifdef CONFIG_BPF_EVENTS
11138 BTF_ID(func, bpf_session_is_return)
11139 #else
11140 BTF_ID_UNUSED
11141 #endif
11142 BTF_ID(func, bpf_stream_vprintk)
11143 BTF_ID(func, bpf_stream_print_stack)
11144 
11145 static bool is_bpf_obj_new_kfunc(u32 func_id)
11146 {
11147 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11148 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11149 }
11150 
11151 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11152 {
11153 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11154 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11155 }
11156 
11157 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11158 {
11159 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11160 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11161 }
11162 
11163 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11164 {
11165 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11166 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11167 }
11168 
11169 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11170 {
11171 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11172 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11173 }
11174 
11175 static bool is_bpf_list_push_kfunc(u32 func_id)
11176 {
11177 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11178 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11179 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11180 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11181 	       func_id == special_kfunc_list[KF_bpf_list_add];
11182 }
11183 
11184 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11185 {
11186 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11187 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11188 }
11189 
11190 static bool is_task_work_add_kfunc(u32 func_id)
11191 {
11192 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11193 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11194 }
11195 
11196 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11197 {
11198 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11199 		return false;
11200 
11201 	return meta->kfunc_flags & KF_RET_NULL;
11202 }
11203 
11204 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11205 {
11206 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11207 }
11208 
11209 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11210 {
11211 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11212 }
11213 
11214 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11215 {
11216 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11217 }
11218 
11219 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11220 {
11221 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11222 }
11223 
11224 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11225 {
11226 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11227 }
11228 
11229 static enum kfunc_ptr_arg_type
11230 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_func_state *caller,
11231 		       struct bpf_reg_state *regs, struct bpf_kfunc_call_arg_meta *meta,
11232 		       const struct btf_type *t, const struct btf_type *ref_t,
11233 		       const char *ref_tname, const struct btf_param *args,
11234 		       int arg, int nargs, argno_t argno, struct bpf_reg_state *reg)
11235 {
11236 	bool arg_mem_size = false;
11237 
11238 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11239 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11240 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11241 		return KF_ARG_PTR_TO_CTX;
11242 
11243 	if (arg + 1 < nargs &&
11244 	    (is_kfunc_arg_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)) ||
11245 	     is_kfunc_arg_const_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1))))
11246 		arg_mem_size = true;
11247 
11248 	/* In this function, we verify the kfunc's BTF as per the argument type,
11249 	 * leaving the rest of the verification with respect to the register
11250 	 * type to our caller. When a set of conditions hold in the BTF type of
11251 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11252 	 */
11253 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), arg))
11254 		return KF_ARG_PTR_TO_CTX;
11255 
11256 	if (is_kfunc_arg_nullable(meta->btf, &args[arg]) && bpf_register_is_null(reg) &&
11257 	    !arg_mem_size)
11258 		return KF_ARG_PTR_TO_NULL;
11259 
11260 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[arg]))
11261 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11262 
11263 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[arg]))
11264 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11265 
11266 	if (is_kfunc_arg_dynptr(meta->btf, &args[arg]))
11267 		return KF_ARG_PTR_TO_DYNPTR;
11268 
11269 	if (is_kfunc_arg_iter(meta, arg, &args[arg]))
11270 		return KF_ARG_PTR_TO_ITER;
11271 
11272 	if (is_kfunc_arg_list_head(meta->btf, &args[arg]))
11273 		return KF_ARG_PTR_TO_LIST_HEAD;
11274 
11275 	if (is_kfunc_arg_list_node(meta->btf, &args[arg]))
11276 		return KF_ARG_PTR_TO_LIST_NODE;
11277 
11278 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[arg]))
11279 		return KF_ARG_PTR_TO_RB_ROOT;
11280 
11281 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[arg]))
11282 		return KF_ARG_PTR_TO_RB_NODE;
11283 
11284 	if (is_kfunc_arg_const_str(meta->btf, &args[arg]))
11285 		return KF_ARG_PTR_TO_CONST_STR;
11286 
11287 	if (is_kfunc_arg_map(meta->btf, &args[arg]))
11288 		return KF_ARG_PTR_TO_MAP;
11289 
11290 	if (is_kfunc_arg_wq(meta->btf, &args[arg]))
11291 		return KF_ARG_PTR_TO_WORKQUEUE;
11292 
11293 	if (is_kfunc_arg_timer(meta->btf, &args[arg]))
11294 		return KF_ARG_PTR_TO_TIMER;
11295 
11296 	if (is_kfunc_arg_task_work(meta->btf, &args[arg]))
11297 		return KF_ARG_PTR_TO_TASK_WORK;
11298 
11299 	if (is_kfunc_arg_irq_flag(meta->btf, &args[arg]))
11300 		return KF_ARG_PTR_TO_IRQ_FLAG;
11301 
11302 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[arg]))
11303 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11304 
11305 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11306 		if (!btf_type_is_struct(ref_t)) {
11307 			verbose(env, "kernel function %s %s pointer type %s %s is not supported\n",
11308 				meta->func_name, reg_arg_name(env, argno),
11309 				btf_type_str(ref_t), ref_tname);
11310 			return -EINVAL;
11311 		}
11312 		return KF_ARG_PTR_TO_BTF_ID;
11313 	}
11314 
11315 	if (is_kfunc_arg_callback(env, meta->btf, &args[arg]))
11316 		return KF_ARG_PTR_TO_CALLBACK;
11317 
11318 	/* This is the catch all argument type of register types supported by
11319 	 * check_helper_mem_access. However, we only allow when argument type is
11320 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11321 	 * arg_mem_size is true, the pointer can be void *.
11322 	 */
11323 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11324 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11325 		verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n",
11326 			reg_arg_name(env, argno),
11327 			btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11328 		return -EINVAL;
11329 	}
11330 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11331 }
11332 
11333 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11334 					struct bpf_reg_state *reg,
11335 					const struct btf_type *ref_t,
11336 					const char *ref_tname, u32 ref_id,
11337 					struct bpf_kfunc_call_arg_meta *meta,
11338 					int arg, argno_t argno)
11339 {
11340 	const struct btf_type *reg_ref_t;
11341 	bool strict_type_match = false;
11342 	const struct btf *reg_btf;
11343 	const char *reg_ref_tname;
11344 	bool taking_projection;
11345 	bool struct_same;
11346 	u32 reg_ref_id;
11347 
11348 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11349 		reg_btf = reg->btf;
11350 		reg_ref_id = reg->btf_id;
11351 	} else {
11352 		reg_btf = btf_vmlinux;
11353 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11354 	}
11355 
11356 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11357 	 * or releasing a reference, or are no-cast aliases. We do _not_
11358 	 * enforce strict matching for kfuncs by default,
11359 	 * as we want to enable BPF programs to pass types that are bitwise
11360 	 * equivalent without forcing them to explicitly cast with something
11361 	 * like bpf_cast_to_kern_ctx().
11362 	 *
11363 	 * For example, say we had a type like the following:
11364 	 *
11365 	 * struct bpf_cpumask {
11366 	 *	cpumask_t cpumask;
11367 	 *	refcount_t usage;
11368 	 * };
11369 	 *
11370 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11371 	 * to a struct cpumask, so it would be safe to pass a struct
11372 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11373 	 *
11374 	 * The philosophy here is similar to how we allow scalars of different
11375 	 * types to be passed to kfuncs as long as the size is the same. The
11376 	 * only difference here is that we're simply allowing
11377 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11378 	 * resolve types.
11379 	 */
11380 	if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) ||
11381 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11382 		strict_type_match = true;
11383 
11384 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11385 
11386 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11387 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11388 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11389 					   meta->btf, ref_id, strict_type_match);
11390 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11391 	 * actually use it -- it must cast to the underlying type. So we allow
11392 	 * caller to pass in the underlying type.
11393 	 */
11394 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11395 	if (!taking_projection && !struct_same) {
11396 		verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n",
11397 			meta->func_name, reg_arg_name(env, argno),
11398 			btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno),
11399 			btf_type_str(reg_ref_t), reg_ref_tname);
11400 		return -EINVAL;
11401 	}
11402 	return 0;
11403 }
11404 
11405 static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
11406 			     struct bpf_kfunc_call_arg_meta *meta)
11407 {
11408 	int err, spi, kfunc_class = IRQ_NATIVE_KFUNC;
11409 	bool irq_save;
11410 
11411 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11412 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11413 		irq_save = true;
11414 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11415 			kfunc_class = IRQ_LOCK_KFUNC;
11416 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11417 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11418 		irq_save = false;
11419 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11420 			kfunc_class = IRQ_LOCK_KFUNC;
11421 	} else {
11422 		verifier_bug(env, "unknown irq flags kfunc");
11423 		return -EFAULT;
11424 	}
11425 
11426 	if (irq_save) {
11427 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11428 			verbose(env, "expected uninitialized irq flag as %s\n",
11429 				reg_arg_name(env, argno));
11430 			return -EINVAL;
11431 		}
11432 
11433 		err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW,
11434 				       BPF_WRITE, -1, false, false);
11435 		if (err)
11436 			return err;
11437 
11438 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11439 		if (err)
11440 			return err;
11441 	} else {
11442 		err = is_irq_flag_reg_valid_init(env, reg);
11443 		if (err) {
11444 			verbose(env, "expected an initialized irq flag as %s\n",
11445 				reg_arg_name(env, argno));
11446 			return err;
11447 		}
11448 
11449 		spi = irq_flag_get_spi(env, reg);
11450 		if (spi < 0)
11451 			return spi;
11452 
11453 		mark_stack_slots_scratched(env, spi, 1);
11454 
11455 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11456 		if (err)
11457 			return err;
11458 	}
11459 	return 0;
11460 }
11461 
11462 
11463 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11464 {
11465 	struct btf_record *rec = reg_btf_record(reg);
11466 
11467 	if (!env->cur_state->active_locks) {
11468 		verifier_bug(env, "%s w/o active lock", __func__);
11469 		return -EFAULT;
11470 	}
11471 
11472 	if (type_flag(reg->type) & NON_OWN_REF) {
11473 		verifier_bug(env, "NON_OWN_REF already set");
11474 		return -EFAULT;
11475 	}
11476 
11477 	reg->type |= NON_OWN_REF;
11478 	if (rec->refcount_off >= 0)
11479 		reg->type |= MEM_RCU;
11480 
11481 	return 0;
11482 }
11483 
11484 static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id)
11485 {
11486 	struct bpf_func_state *unused;
11487 	struct bpf_reg_state *reg;
11488 
11489 	WARN_ON_ONCE(release_reference_nomark(env->cur_state, id));
11490 
11491 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11492 		if (reg->id == id) {
11493 			reg->id = 0;
11494 			ref_set_non_owning(env, reg);
11495 		}
11496 	}));
11497 
11498 	return;
11499 }
11500 
11501 /* Implementation details:
11502  *
11503  * Each register points to some region of memory, which we define as an
11504  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11505  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11506  * allocation. The lock and the data it protects are colocated in the same
11507  * memory region.
11508  *
11509  * Hence, everytime a register holds a pointer value pointing to such
11510  * allocation, the verifier preserves a unique reg->id for it.
11511  *
11512  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11513  * bpf_spin_lock is called.
11514  *
11515  * To enable this, lock state in the verifier captures two values:
11516  *	active_lock.ptr = Register's type specific pointer
11517  *	active_lock.id  = A unique ID for each register pointer value
11518  *
11519  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11520  * supported register types.
11521  *
11522  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11523  * allocated objects is the reg->btf pointer.
11524  *
11525  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11526  * can establish the provenance of the map value statically for each distinct
11527  * lookup into such maps. They always contain a single map value hence unique
11528  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11529  *
11530  * So, in case of global variables, they use array maps with max_entries = 1,
11531  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11532  * into the same map value as max_entries is 1, as described above).
11533  *
11534  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11535  * outer map pointer (in verifier context), but each lookup into an inner map
11536  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11537  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11538  * will get different reg->id assigned to each lookup, hence different
11539  * active_lock.id.
11540  *
11541  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11542  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11543  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11544  */
11545 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11546 {
11547 	struct bpf_reference_state *s;
11548 	void *ptr;
11549 	u32 id;
11550 
11551 	switch ((int)reg->type) {
11552 	case PTR_TO_MAP_VALUE:
11553 		ptr = reg->map_ptr;
11554 		break;
11555 	case PTR_TO_BTF_ID | MEM_ALLOC:
11556 		ptr = reg->btf;
11557 		break;
11558 	default:
11559 		verifier_bug(env, "unknown reg type for lock check");
11560 		return -EFAULT;
11561 	}
11562 	id = reg->id;
11563 
11564 	if (!env->cur_state->active_locks)
11565 		return -EINVAL;
11566 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11567 	if (!s) {
11568 		verbose(env, "held lock and object are not in the same allocation\n");
11569 		return -EINVAL;
11570 	}
11571 	return 0;
11572 }
11573 
11574 static bool is_bpf_list_api_kfunc(u32 btf_id)
11575 {
11576 	return is_bpf_list_push_kfunc(btf_id) ||
11577 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11578 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11579 	       btf_id == special_kfunc_list[KF_bpf_list_del] ||
11580 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11581 	       btf_id == special_kfunc_list[KF_bpf_list_back] ||
11582 	       btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11583 	       btf_id == special_kfunc_list[KF_bpf_list_is_last] ||
11584 	       btf_id == special_kfunc_list[KF_bpf_list_empty];
11585 }
11586 
11587 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11588 {
11589 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11590 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11591 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11592 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11593 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11594 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11595 }
11596 
11597 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11598 {
11599 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11600 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11601 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11602 }
11603 
11604 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11605 {
11606 	return is_bpf_list_api_kfunc(btf_id) ||
11607 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11608 	       is_bpf_refcount_acquire_kfunc(btf_id);
11609 }
11610 
11611 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11612 {
11613 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11614 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11615 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11616 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11617 }
11618 
11619 static bool is_bpf_arena_kfunc(u32 btf_id)
11620 {
11621 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11622 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11623 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11624 }
11625 
11626 static bool is_bpf_stream_kfunc(u32 btf_id)
11627 {
11628 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11629 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11630 }
11631 
11632 static bool kfunc_spin_allowed(u32 btf_id)
11633 {
11634 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11635 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11636 	       is_bpf_stream_kfunc(btf_id);
11637 }
11638 
11639 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11640 {
11641 	return is_bpf_rbtree_add_kfunc(btf_id);
11642 }
11643 
11644 static bool is_async_callback_calling_kfunc(u32 btf_id)
11645 {
11646 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11647 	       is_task_work_add_kfunc(btf_id);
11648 }
11649 
11650 bool bpf_is_throw_kfunc(struct bpf_insn *insn)
11651 {
11652 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11653 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11654 }
11655 
11656 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11657 {
11658 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11659 }
11660 
11661 static bool is_callback_calling_kfunc(u32 btf_id)
11662 {
11663 	return is_sync_callback_calling_kfunc(btf_id) ||
11664 	       is_async_callback_calling_kfunc(btf_id);
11665 }
11666 
11667 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11668 {
11669 	return is_bpf_rbtree_api_kfunc(btf_id);
11670 }
11671 
11672 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11673 					  enum btf_field_type head_field_type,
11674 					  u32 kfunc_btf_id)
11675 {
11676 	bool ret;
11677 
11678 	switch (head_field_type) {
11679 	case BPF_LIST_HEAD:
11680 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11681 		break;
11682 	case BPF_RB_ROOT:
11683 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11684 		break;
11685 	default:
11686 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11687 			btf_field_type_name(head_field_type));
11688 		return false;
11689 	}
11690 
11691 	if (!ret)
11692 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11693 			btf_field_type_name(head_field_type));
11694 	return ret;
11695 }
11696 
11697 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11698 					  enum btf_field_type node_field_type,
11699 					  u32 kfunc_btf_id)
11700 {
11701 	bool ret;
11702 
11703 	switch (node_field_type) {
11704 	case BPF_LIST_NODE:
11705 		ret = is_bpf_list_push_kfunc(kfunc_btf_id) ||
11706 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
11707 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11708 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_last];
11709 		break;
11710 	case BPF_RB_NODE:
11711 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11712 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11713 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11714 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11715 		break;
11716 	default:
11717 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11718 			btf_field_type_name(node_field_type));
11719 		return false;
11720 	}
11721 
11722 	if (!ret)
11723 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11724 			btf_field_type_name(node_field_type));
11725 	return ret;
11726 }
11727 
11728 static int
11729 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11730 				   struct bpf_reg_state *reg, argno_t argno,
11731 				   struct bpf_kfunc_call_arg_meta *meta,
11732 				   enum btf_field_type head_field_type,
11733 				   struct btf_field **head_field)
11734 {
11735 	const char *head_type_name;
11736 	struct btf_field *field;
11737 	struct btf_record *rec;
11738 	u32 head_off;
11739 
11740 	if (meta->btf != btf_vmlinux) {
11741 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11742 		return -EFAULT;
11743 	}
11744 
11745 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11746 		return -EFAULT;
11747 
11748 	head_type_name = btf_field_type_name(head_field_type);
11749 	if (!tnum_is_const(reg->var_off)) {
11750 		verbose(env,
11751 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11752 			reg_arg_name(env, argno), head_type_name);
11753 		return -EINVAL;
11754 	}
11755 
11756 	rec = reg_btf_record(reg);
11757 	head_off = reg->var_off.value;
11758 	field = btf_record_find(rec, head_off, head_field_type);
11759 	if (!field) {
11760 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11761 		return -EINVAL;
11762 	}
11763 
11764 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11765 	if (check_reg_allocation_locked(env, reg)) {
11766 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11767 			rec->spin_lock_off, head_type_name);
11768 		return -EINVAL;
11769 	}
11770 
11771 	if (*head_field) {
11772 		verifier_bug(env, "repeating %s arg", head_type_name);
11773 		return -EFAULT;
11774 	}
11775 	*head_field = field;
11776 	return 0;
11777 }
11778 
11779 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11780 					   struct bpf_reg_state *reg, argno_t argno,
11781 					   struct bpf_kfunc_call_arg_meta *meta)
11782 {
11783 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD,
11784 							  &meta->arg_list_head.field);
11785 }
11786 
11787 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11788 					     struct bpf_reg_state *reg, argno_t argno,
11789 					     struct bpf_kfunc_call_arg_meta *meta)
11790 {
11791 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT,
11792 							  &meta->arg_rbtree_root.field);
11793 }
11794 
11795 static int
11796 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11797 				   struct bpf_reg_state *reg, argno_t argno,
11798 				   struct bpf_kfunc_call_arg_meta *meta,
11799 				   enum btf_field_type head_field_type,
11800 				   enum btf_field_type node_field_type,
11801 				   struct btf_field **node_field)
11802 {
11803 	const char *node_type_name;
11804 	const struct btf_type *et, *t;
11805 	struct btf_field *field;
11806 	u32 node_off;
11807 
11808 	if (meta->btf != btf_vmlinux) {
11809 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11810 		return -EFAULT;
11811 	}
11812 
11813 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11814 		return -EFAULT;
11815 
11816 	node_type_name = btf_field_type_name(node_field_type);
11817 	if (!tnum_is_const(reg->var_off)) {
11818 		verbose(env,
11819 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11820 			reg_arg_name(env, argno), node_type_name);
11821 		return -EINVAL;
11822 	}
11823 
11824 	node_off = reg->var_off.value;
11825 	field = reg_find_field_offset(reg, node_off, node_field_type);
11826 	if (!field) {
11827 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11828 		return -EINVAL;
11829 	}
11830 
11831 	field = *node_field;
11832 
11833 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11834 	t = btf_type_by_id(reg->btf, reg->btf_id);
11835 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11836 				  field->graph_root.value_btf_id, true)) {
11837 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11838 			"in struct %s, but arg is at offset=%d in struct %s\n",
11839 			btf_field_type_name(head_field_type),
11840 			btf_field_type_name(node_field_type),
11841 			field->graph_root.node_offset,
11842 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11843 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11844 		return -EINVAL;
11845 	}
11846 	meta->arg_btf = reg->btf;
11847 	meta->arg_btf_id = reg->btf_id;
11848 
11849 	if (node_off != field->graph_root.node_offset) {
11850 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11851 			node_off, btf_field_type_name(node_field_type),
11852 			field->graph_root.node_offset,
11853 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11854 		return -EINVAL;
11855 	}
11856 
11857 	return 0;
11858 }
11859 
11860 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11861 					   struct bpf_reg_state *reg, argno_t argno,
11862 					   struct bpf_kfunc_call_arg_meta *meta)
11863 {
11864 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11865 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11866 						  &meta->arg_list_head.field);
11867 }
11868 
11869 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11870 					     struct bpf_reg_state *reg, argno_t argno,
11871 					     struct bpf_kfunc_call_arg_meta *meta)
11872 {
11873 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11874 						  BPF_RB_ROOT, BPF_RB_NODE,
11875 						  &meta->arg_rbtree_root.field);
11876 }
11877 
11878 /*
11879  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11880  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11881  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11882  * them can only be attached to some specific hook points.
11883  */
11884 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11885 {
11886 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11887 
11888 	switch (prog_type) {
11889 	case BPF_PROG_TYPE_LSM:
11890 		return true;
11891 	case BPF_PROG_TYPE_TRACING:
11892 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11893 			return true;
11894 		fallthrough;
11895 	default:
11896 		return in_sleepable(env);
11897 	}
11898 }
11899 
11900 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11901 			    int insn_idx)
11902 {
11903 	const char *func_name = meta->func_name, *ref_tname;
11904 	struct bpf_func_state *caller = cur_func(env);
11905 	struct bpf_reg_state *regs = cur_regs(env);
11906 	const struct btf *btf = meta->btf;
11907 	const struct btf_param *args;
11908 	struct btf_record *rec;
11909 	u32 i, nargs;
11910 	int ret;
11911 
11912 	args = (const struct btf_param *)(meta->func_proto + 1);
11913 	nargs = btf_type_vlen(meta->func_proto);
11914 	if (nargs > MAX_BPF_FUNC_ARGS) {
11915 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11916 			MAX_BPF_FUNC_ARGS);
11917 		return -EINVAL;
11918 	}
11919 	if (nargs > MAX_BPF_FUNC_REG_ARGS && !bpf_jit_supports_stack_args()) {
11920 		verbose(env, "JIT does not support kfunc %s() with %d args\n",
11921 			func_name, nargs);
11922 		return -ENOTSUPP;
11923 	}
11924 
11925 	ret = check_outgoing_stack_args(env, caller, nargs);
11926 	if (ret)
11927 		return ret;
11928 
11929 	/* Check that BTF function arguments match actual types that the
11930 	 * verifier sees.
11931 	 */
11932 	for (i = 0; i < nargs; i++) {
11933 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
11934 		const struct btf_type *t, *ref_t, *resolve_ret;
11935 		enum bpf_arg_type arg_type = ARG_DONTCARE;
11936 		argno_t argno = argno_from_arg(i + 1);
11937 		int regno = reg_from_argno(argno);
11938 		u32 ref_id, type_size;
11939 		bool is_ret_buf_sz = false;
11940 		int kf_arg_type;
11941 
11942 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
11943 			/* Reject repeated use bpf_prog_aux */
11944 			if (meta->arg_prog) {
11945 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
11946 				return -EFAULT;
11947 			}
11948 			if (regno < 0) {
11949 				verbose(env, "%s prog->aux cannot be a stack argument\n",
11950 					reg_arg_name(env, argno));
11951 				return -EINVAL;
11952 			}
11953 			meta->arg_prog = true;
11954 			cur_aux(env)->arg_prog = regno;
11955 			continue;
11956 		}
11957 
11958 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
11959 			continue;
11960 
11961 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11962 
11963 		if (btf_type_is_scalar(t)) {
11964 			if (reg->type != SCALAR_VALUE) {
11965 				verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno));
11966 				return -EINVAL;
11967 			}
11968 
11969 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11970 				if (meta->arg_constant.found) {
11971 					verifier_bug(env, "only one constant argument permitted");
11972 					return -EFAULT;
11973 				}
11974 				if (!tnum_is_const(reg->var_off)) {
11975 					verbose(env, "%s must be a known constant\n",
11976 						reg_arg_name(env, argno));
11977 					return -EINVAL;
11978 				}
11979 				if (regno >= 0)
11980 					ret = mark_chain_precision(env, regno);
11981 				else
11982 					ret = mark_stack_arg_precision(env, i);
11983 				if (ret < 0)
11984 					return ret;
11985 				meta->arg_constant.found = true;
11986 				meta->arg_constant.value = reg->var_off.value;
11987 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
11988 				meta->r0_rdonly = true;
11989 				is_ret_buf_sz = true;
11990 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
11991 				is_ret_buf_sz = true;
11992 			}
11993 
11994 			if (is_ret_buf_sz) {
11995 				if (meta->r0_size) {
11996 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11997 					return -EINVAL;
11998 				}
11999 
12000 				if (!tnum_is_const(reg->var_off)) {
12001 					verbose(env, "%s is not a const\n",
12002 						reg_arg_name(env, argno));
12003 					return -EINVAL;
12004 				}
12005 
12006 				meta->r0_size = reg->var_off.value;
12007 				if (regno >= 0)
12008 					ret = mark_chain_precision(env, regno);
12009 				else
12010 					ret = mark_stack_arg_precision(env, i);
12011 				if (ret)
12012 					return ret;
12013 			}
12014 			continue;
12015 		}
12016 
12017 		if (!btf_type_is_ptr(t)) {
12018 			verbose(env, "Unrecognized %s type %s\n",
12019 				reg_arg_name(env, argno), btf_type_str(t));
12020 			return -EINVAL;
12021 		}
12022 
12023 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12024 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12025 			verbose(env, "Possibly NULL pointer passed to trusted %s\n",
12026 				reg_arg_name(env, argno));
12027 			return -EACCES;
12028 		}
12029 
12030 		if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) &&
12031 		    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
12032 			verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n",
12033 				func_name, reg_arg_name(env, argno));
12034 			return -EINVAL;
12035 		}
12036 
12037 		if (reg_is_referenced(env, reg))
12038 			update_ref_obj(&meta->ref_obj, reg);
12039 
12040 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12041 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12042 
12043 		kf_arg_type = get_kfunc_ptr_arg_type(env, caller, regs, meta, t, ref_t, ref_tname,
12044 						     args, i, nargs, argno, reg);
12045 		if (kf_arg_type < 0)
12046 			return kf_arg_type;
12047 
12048 		switch (kf_arg_type) {
12049 		case KF_ARG_PTR_TO_NULL:
12050 			continue;
12051 		case KF_ARG_PTR_TO_MAP:
12052 			if (!reg->map_ptr) {
12053 				verbose(env, "pointer in %s isn't map pointer\n",
12054 					reg_arg_name(env, argno));
12055 				return -EINVAL;
12056 			}
12057 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12058 					      reg->map_ptr->record->task_work_off >= 0)) {
12059 				/* Use map_uid (which is unique id of inner map) to reject:
12060 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12061 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12062 				 * if (inner_map1 && inner_map2) {
12063 				 *     wq = bpf_map_lookup_elem(inner_map1);
12064 				 *     if (wq)
12065 				 *         // mismatch would have been allowed
12066 				 *         bpf_wq_init(wq, inner_map2);
12067 				 * }
12068 				 *
12069 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12070 				 */
12071 				if (meta->map.ptr != reg->map_ptr ||
12072 				    meta->map.uid != reg->map_uid) {
12073 					if (reg->map_ptr->record->task_work_off >= 0) {
12074 						verbose(env,
12075 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12076 							meta->map.uid, reg->map_uid);
12077 						return -EINVAL;
12078 					}
12079 					verbose(env,
12080 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12081 						meta->map.uid, reg->map_uid);
12082 					return -EINVAL;
12083 				}
12084 			}
12085 			meta->map.ptr = reg->map_ptr;
12086 			meta->map.uid = reg->map_uid;
12087 			fallthrough;
12088 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12089 		case KF_ARG_PTR_TO_BTF_ID:
12090 			if (!is_trusted_reg(env, reg)) {
12091 				if (!is_kfunc_rcu(meta)) {
12092 					verbose(env, "%s must be referenced or trusted\n",
12093 						reg_arg_name(env, argno));
12094 					return -EINVAL;
12095 				}
12096 				if (!is_rcu_reg(reg)) {
12097 					verbose(env, "%s must be a rcu pointer\n",
12098 						reg_arg_name(env, argno));
12099 					return -EINVAL;
12100 				}
12101 			}
12102 			fallthrough;
12103 		case KF_ARG_PTR_TO_ITER:
12104 		case KF_ARG_PTR_TO_LIST_HEAD:
12105 		case KF_ARG_PTR_TO_LIST_NODE:
12106 		case KF_ARG_PTR_TO_RB_ROOT:
12107 		case KF_ARG_PTR_TO_RB_NODE:
12108 		case KF_ARG_PTR_TO_MEM:
12109 		case KF_ARG_PTR_TO_MEM_SIZE:
12110 		case KF_ARG_PTR_TO_CALLBACK:
12111 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12112 		case KF_ARG_PTR_TO_CONST_STR:
12113 		case KF_ARG_PTR_TO_WORKQUEUE:
12114 		case KF_ARG_PTR_TO_TIMER:
12115 		case KF_ARG_PTR_TO_TASK_WORK:
12116 		case KF_ARG_PTR_TO_IRQ_FLAG:
12117 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12118 			break;
12119 		case KF_ARG_PTR_TO_DYNPTR:
12120 			arg_type = ARG_PTR_TO_DYNPTR;
12121 			break;
12122 		case KF_ARG_PTR_TO_CTX:
12123 			arg_type = ARG_PTR_TO_CTX;
12124 			break;
12125 		default:
12126 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12127 			return -EFAULT;
12128 		}
12129 
12130 		if (regno == meta->release_regno)
12131 			arg_type |= OBJ_RELEASE;
12132 		ret = check_func_arg_reg_off(env, reg, argno, arg_type);
12133 		if (ret < 0)
12134 			return ret;
12135 
12136 		switch (kf_arg_type) {
12137 		case KF_ARG_PTR_TO_CTX:
12138 			if (reg->type != PTR_TO_CTX) {
12139 				verbose(env, "%s expected pointer to ctx, but got %s\n",
12140 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
12141 				return -EINVAL;
12142 			}
12143 
12144 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12145 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12146 				if (ret < 0)
12147 					return -EINVAL;
12148 				meta->ret_btf_id  = ret;
12149 			}
12150 			break;
12151 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12152 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12153 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12154 					verbose(env, "%s expected for bpf_obj_drop()\n",
12155 						reg_arg_name(env, argno));
12156 					return -EINVAL;
12157 				}
12158 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12159 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12160 					verbose(env, "%s expected for bpf_percpu_obj_drop()\n",
12161 						reg_arg_name(env, argno));
12162 					return -EINVAL;
12163 				}
12164 			} else {
12165 				verbose(env, "%s expected pointer to allocated object\n",
12166 					reg_arg_name(env, argno));
12167 				return -EINVAL;
12168 			}
12169 			if (!reg_is_referenced(env, reg)) {
12170 				verbose(env, "allocated object must be referenced\n");
12171 				return -EINVAL;
12172 			}
12173 			if (meta->btf == btf_vmlinux) {
12174 				meta->arg_btf = reg->btf;
12175 				meta->arg_btf_id = reg->btf_id;
12176 			}
12177 			break;
12178 		case KF_ARG_PTR_TO_DYNPTR:
12179 		{
12180 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12181 
12182 			if (is_kfunc_arg_uninit(btf, &args[i]))
12183 				dynptr_arg_type |= MEM_UNINIT;
12184 
12185 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12186 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12187 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12188 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12189 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12190 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12191 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12192 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12193 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12194 				dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE;
12195 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12196 				   (dynptr_arg_type & MEM_UNINIT)) {
12197 				enum bpf_dynptr_type parent_type = meta->dynptr.type;
12198 
12199 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12200 					verifier_bug(env, "no dynptr type for parent of clone");
12201 					return -EFAULT;
12202 				}
12203 
12204 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12205 			}
12206 
12207 			ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type,
12208 						  &meta->ref_obj, &meta->dynptr);
12209 			if (ret < 0)
12210 				return ret;
12211 			break;
12212 		}
12213 		case KF_ARG_PTR_TO_ITER:
12214 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12215 				if (!check_css_task_iter_allowlist(env)) {
12216 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12217 					return -EINVAL;
12218 				}
12219 			}
12220 			ret = process_iter_arg(env, reg, argno, insn_idx, meta);
12221 			if (ret < 0)
12222 				return ret;
12223 			break;
12224 		case KF_ARG_PTR_TO_LIST_HEAD:
12225 			if (reg->type != PTR_TO_MAP_VALUE &&
12226 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12227 				verbose(env, "%s expected pointer to map value or allocated object\n",
12228 					reg_arg_name(env, argno));
12229 				return -EINVAL;
12230 			}
12231 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12232 			    !reg_is_referenced(env, reg)) {
12233 				verbose(env, "allocated object must be referenced\n");
12234 				return -EINVAL;
12235 			}
12236 			ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta);
12237 			if (ret < 0)
12238 				return ret;
12239 			break;
12240 		case KF_ARG_PTR_TO_RB_ROOT:
12241 			if (reg->type != PTR_TO_MAP_VALUE &&
12242 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12243 				verbose(env, "%s expected pointer to map value or allocated object\n",
12244 					reg_arg_name(env, argno));
12245 				return -EINVAL;
12246 			}
12247 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12248 			    !reg_is_referenced(env, reg)) {
12249 				verbose(env, "allocated object must be referenced\n");
12250 				return -EINVAL;
12251 			}
12252 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta);
12253 			if (ret < 0)
12254 				return ret;
12255 			break;
12256 		case KF_ARG_PTR_TO_LIST_NODE:
12257 			if (is_kfunc_arg_nonown_allowed(btf, &args[i]) &&
12258 			    type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) {
12259 				/* Allow bpf_list_front/back return value for
12260 				 * __nonown_allowed list-node arguments.
12261 				 */
12262 				goto check_ok;
12263 			}
12264 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12265 				verbose(env, "%s expected pointer to allocated object\n",
12266 					reg_arg_name(env, argno));
12267 				return -EINVAL;
12268 			}
12269 			if (!reg_is_referenced(env, reg)) {
12270 				verbose(env, "allocated object must be referenced\n");
12271 				return -EINVAL;
12272 			}
12273 check_ok:
12274 			ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta);
12275 			if (ret < 0)
12276 				return ret;
12277 			break;
12278 		case KF_ARG_PTR_TO_RB_NODE:
12279 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12280 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12281 					verbose(env, "%s expected pointer to allocated object\n",
12282 						reg_arg_name(env, argno));
12283 					return -EINVAL;
12284 				}
12285 				if (!reg_is_referenced(env, reg)) {
12286 					verbose(env, "allocated object must be referenced\n");
12287 					return -EINVAL;
12288 				}
12289 			} else {
12290 				if (!type_is_non_owning_ref(reg->type) &&
12291 				    !reg_is_referenced(env, reg)) {
12292 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12293 					return -EINVAL;
12294 				}
12295 				if (in_rbtree_lock_required_cb(env)) {
12296 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12297 					return -EINVAL;
12298 				}
12299 			}
12300 
12301 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta);
12302 			if (ret < 0)
12303 				return ret;
12304 			break;
12305 		case KF_ARG_PTR_TO_MAP:
12306 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12307 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12308 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12309 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12310 			fallthrough;
12311 		case KF_ARG_PTR_TO_BTF_ID:
12312 			/* Only base_type is checked, further checks are done here */
12313 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12314 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12315 			    !reg2btf_ids[base_type(reg->type)]) {
12316 				verbose(env, "%s is %s ", reg_arg_name(env, argno),
12317 					reg_type_str(env, reg->type));
12318 				verbose(env, "expected %s or socket\n",
12319 					reg_type_str(env, base_type(reg->type) |
12320 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12321 				return -EINVAL;
12322 			}
12323 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno);
12324 			if (ret < 0)
12325 				return ret;
12326 			break;
12327 		case KF_ARG_PTR_TO_MEM:
12328 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12329 			if (IS_ERR(resolve_ret)) {
12330 				verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n",
12331 					reg_arg_name(env, argno), btf_type_str(ref_t),
12332 					ref_tname, PTR_ERR(resolve_ret));
12333 				return -EINVAL;
12334 			}
12335 			ret = check_mem_reg(env, reg, argno, type_size);
12336 			if (ret < 0)
12337 				return ret;
12338 			break;
12339 		case KF_ARG_PTR_TO_MEM_SIZE:
12340 		{
12341 			struct bpf_reg_state *buff_reg = reg;
12342 			const struct btf_param *buff_arg = &args[i];
12343 			struct bpf_reg_state *size_reg = get_func_arg_reg(caller, regs, i + 1);
12344 			const struct btf_param *size_arg = &args[i + 1];
12345 			argno_t next_argno = argno_from_arg(i + 2);
12346 
12347 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12348 				ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg,
12349 							       argno, next_argno);
12350 				if (ret < 0) {
12351 					verbose(env, "%s and ", reg_arg_name(env, argno));
12352 					verbose(env, "%s memory, len pair leads to invalid memory access\n",
12353 						reg_arg_name(env, next_argno));
12354 					return ret;
12355 				}
12356 			}
12357 
12358 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12359 				if (meta->arg_constant.found) {
12360 					verifier_bug(env, "only one constant argument permitted");
12361 					return -EFAULT;
12362 				}
12363 				if (!tnum_is_const(size_reg->var_off)) {
12364 					verbose(env, "%s must be a known constant\n",
12365 						reg_arg_name(env, next_argno));
12366 					return -EINVAL;
12367 				}
12368 				meta->arg_constant.found = true;
12369 				meta->arg_constant.value = size_reg->var_off.value;
12370 			}
12371 
12372 			/* Skip next '__sz' or '__szk' argument */
12373 			i++;
12374 			break;
12375 		}
12376 		case KF_ARG_PTR_TO_CALLBACK:
12377 			if (reg->type != PTR_TO_FUNC) {
12378 				verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno));
12379 				return -EINVAL;
12380 			}
12381 			meta->subprogno = reg->subprogno;
12382 			break;
12383 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12384 			if (!type_is_ptr_alloc_obj(reg->type)) {
12385 				verbose(env, "%s is neither owning or non-owning ref\n",
12386 					reg_arg_name(env, argno));
12387 				return -EINVAL;
12388 			}
12389 			if (!type_is_non_owning_ref(reg->type))
12390 				meta->arg_owning_ref = true;
12391 
12392 			rec = reg_btf_record(reg);
12393 			if (!rec) {
12394 				verifier_bug(env, "Couldn't find btf_record");
12395 				return -EFAULT;
12396 			}
12397 
12398 			if (rec->refcount_off < 0) {
12399 				verbose(env, "%s doesn't point to a type with bpf_refcount field\n",
12400 					reg_arg_name(env, argno));
12401 				return -EINVAL;
12402 			}
12403 
12404 			meta->arg_btf = reg->btf;
12405 			meta->arg_btf_id = reg->btf_id;
12406 			break;
12407 		case KF_ARG_PTR_TO_CONST_STR:
12408 			if (reg->type != PTR_TO_MAP_VALUE) {
12409 				verbose(env, "%s doesn't point to a const string\n",
12410 					reg_arg_name(env, argno));
12411 				return -EINVAL;
12412 			}
12413 			ret = check_arg_const_str(env, reg, argno);
12414 			if (ret)
12415 				return ret;
12416 			break;
12417 		case KF_ARG_PTR_TO_WORKQUEUE:
12418 			if (reg->type != PTR_TO_MAP_VALUE) {
12419 				verbose(env, "%s doesn't point to a map value\n",
12420 					reg_arg_name(env, argno));
12421 				return -EINVAL;
12422 			}
12423 			ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map);
12424 			if (ret < 0)
12425 				return ret;
12426 			break;
12427 		case KF_ARG_PTR_TO_TIMER:
12428 			if (reg->type != PTR_TO_MAP_VALUE) {
12429 				verbose(env, "%s doesn't point to a map value\n",
12430 					reg_arg_name(env, argno));
12431 				return -EINVAL;
12432 			}
12433 			ret = process_timer_kfunc(env, reg, argno, meta);
12434 			if (ret < 0)
12435 				return ret;
12436 			break;
12437 		case KF_ARG_PTR_TO_TASK_WORK:
12438 			if (reg->type != PTR_TO_MAP_VALUE) {
12439 				verbose(env, "%s doesn't point to a map value\n",
12440 					reg_arg_name(env, argno));
12441 				return -EINVAL;
12442 			}
12443 			ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map);
12444 			if (ret < 0)
12445 				return ret;
12446 			break;
12447 		case KF_ARG_PTR_TO_IRQ_FLAG:
12448 			if (reg->type != PTR_TO_STACK) {
12449 				verbose(env, "%s doesn't point to an irq flag on stack\n",
12450 					reg_arg_name(env, argno));
12451 				return -EINVAL;
12452 			}
12453 			ret = process_irq_flag(env, reg, argno, meta);
12454 			if (ret < 0)
12455 				return ret;
12456 			break;
12457 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12458 		{
12459 			int flags = PROCESS_RES_LOCK;
12460 
12461 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12462 				verbose(env, "%s doesn't point to map value or allocated object\n",
12463 					reg_arg_name(env, argno));
12464 				return -EINVAL;
12465 			}
12466 
12467 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12468 				return -EFAULT;
12469 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12470 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12471 				flags |= PROCESS_SPIN_LOCK;
12472 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12473 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12474 				flags |= PROCESS_LOCK_IRQ;
12475 			ret = process_spin_lock(env, reg, argno, flags);
12476 			if (ret < 0)
12477 				return ret;
12478 			break;
12479 		}
12480 		}
12481 	}
12482 
12483 	return 0;
12484 }
12485 
12486 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12487 			     s32 func_id,
12488 			     s16 offset,
12489 			     struct bpf_kfunc_call_arg_meta *meta)
12490 {
12491 	struct bpf_kfunc_meta kfunc;
12492 	int err;
12493 
12494 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12495 	if (err)
12496 		return err;
12497 
12498 	memset(meta, 0, sizeof(*meta));
12499 	meta->btf = kfunc.btf;
12500 	meta->func_id = kfunc.id;
12501 	meta->func_proto = kfunc.proto;
12502 	meta->func_name = kfunc.name;
12503 
12504 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12505 		return -EACCES;
12506 
12507 	meta->kfunc_flags = *kfunc.flags;
12508 
12509 	/* Only support release referenced argument passed by register */
12510 	if (is_kfunc_release(meta))
12511 		meta->release_regno = BPF_REG_1;
12512 
12513 	return 0;
12514 }
12515 
12516 /*
12517  * Determine how many bytes a helper accesses through a stack pointer at
12518  * argument position @arg (0-based, corresponding to R1-R5).
12519  *
12520  * Returns:
12521  *   > 0   known read access size in bytes
12522  *     0   doesn't read anything directly
12523  * S64_MIN unknown
12524  *   < 0   known write access of (-return) bytes
12525  */
12526 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12527 				  int arg, int insn_idx)
12528 {
12529 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12530 	const struct bpf_func_proto *fn;
12531 	enum bpf_arg_type at;
12532 	s64 size;
12533 
12534 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12535 		return S64_MIN;
12536 
12537 	at = fn->arg_type[arg];
12538 
12539 	switch (base_type(at)) {
12540 	case ARG_PTR_TO_MAP_KEY:
12541 	case ARG_PTR_TO_MAP_VALUE: {
12542 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12543 		u64 val;
12544 		int i, map_reg;
12545 
12546 		for (i = 0; i < arg; i++) {
12547 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12548 				break;
12549 		}
12550 		if (i >= arg)
12551 			goto scan_all_maps;
12552 
12553 		map_reg = BPF_REG_1 + i;
12554 
12555 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12556 			goto scan_all_maps;
12557 
12558 		i = aux->const_reg_vals[map_reg];
12559 		if (i < env->used_map_cnt) {
12560 			size = is_key ? env->used_maps[i]->key_size
12561 				      : env->used_maps[i]->value_size;
12562 			goto out;
12563 		}
12564 scan_all_maps:
12565 		/*
12566 		 * Map pointer is not known at this call site (e.g. different
12567 		 * maps on merged paths).  Conservatively return the largest
12568 		 * key_size or value_size across all maps used by the program.
12569 		 */
12570 		val = 0;
12571 		for (i = 0; i < env->used_map_cnt; i++) {
12572 			struct bpf_map *map = env->used_maps[i];
12573 			u32 sz = is_key ? map->key_size : map->value_size;
12574 
12575 			if (sz > val)
12576 				val = sz;
12577 			if (map->inner_map_meta) {
12578 				sz = is_key ? map->inner_map_meta->key_size
12579 					    : map->inner_map_meta->value_size;
12580 				if (sz > val)
12581 					val = sz;
12582 			}
12583 		}
12584 		if (!val)
12585 			return S64_MIN;
12586 		size = val;
12587 		goto out;
12588 	}
12589 	case ARG_PTR_TO_MEM:
12590 		if (at & MEM_FIXED_SIZE) {
12591 			size = fn->arg_size[arg];
12592 			goto out;
12593 		}
12594 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12595 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12596 			int size_reg = BPF_REG_1 + arg + 1;
12597 
12598 			if (aux->const_reg_mask & BIT(size_reg)) {
12599 				size = (s64)aux->const_reg_vals[size_reg];
12600 				goto out;
12601 			}
12602 			/*
12603 			 * Size arg is const on each path but differs across merged
12604 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12605 			 */
12606 			if (at & MEM_UNINIT)
12607 				return 0;
12608 			return MAX_BPF_STACK;
12609 		}
12610 		return S64_MIN;
12611 	case ARG_PTR_TO_DYNPTR:
12612 		size = BPF_DYNPTR_SIZE;
12613 		break;
12614 	case ARG_PTR_TO_STACK:
12615 		/*
12616 		 * Only used by bpf_calls_callback() helpers. The helper itself
12617 		 * doesn't access stack. The callback subprog does and it's
12618 		 * analyzed separately.
12619 		 */
12620 		return 0;
12621 	default:
12622 		return S64_MIN;
12623 	}
12624 out:
12625 	/*
12626 	 * MEM_UNINIT args are write-only: the helper initializes the
12627 	 * buffer without reading it.
12628 	 */
12629 	if (at & MEM_UNINIT)
12630 		return -size;
12631 	return size;
12632 }
12633 
12634 /*
12635  * Determine how many bytes a kfunc accesses through a stack pointer at
12636  * argument position @arg (0-based, corresponding to R1-R5).
12637  *
12638  * Returns:
12639  *   > 0      known read access size in bytes
12640  *     0      doesn't access memory through that argument (ex: not a pointer)
12641  *   S64_MIN  unknown
12642  *   < 0      known write access of (-return) bytes
12643  */
12644 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12645 				 int arg, int insn_idx)
12646 {
12647 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12648 	struct bpf_kfunc_call_arg_meta meta;
12649 	const struct btf_param *args;
12650 	const struct btf_type *t, *ref_t;
12651 	const struct btf *btf;
12652 	u32 nargs, type_size;
12653 	s64 size;
12654 
12655 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12656 		return S64_MIN;
12657 
12658 	btf = meta.btf;
12659 	args = btf_params(meta.func_proto);
12660 	nargs = btf_type_vlen(meta.func_proto);
12661 	if (arg >= nargs)
12662 		return 0;
12663 
12664 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12665 	if (!btf_type_is_ptr(t))
12666 		return 0;
12667 
12668 	/* dynptr: fixed 16-byte on-stack representation */
12669 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12670 		size = BPF_DYNPTR_SIZE;
12671 		goto out;
12672 	}
12673 
12674 	/* ptr + __sz/__szk pair: size is in the next register */
12675 	if (arg + 1 < nargs &&
12676 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12677 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12678 		int size_reg = BPF_REG_1 + arg + 1;
12679 
12680 		if (aux->const_reg_mask & BIT(size_reg)) {
12681 			size = (s64)aux->const_reg_vals[size_reg];
12682 			goto out;
12683 		}
12684 		return MAX_BPF_STACK;
12685 	}
12686 
12687 	/* fixed-size pointed-to type: resolve via BTF */
12688 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12689 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12690 		size = type_size;
12691 		goto out;
12692 	}
12693 
12694 	return S64_MIN;
12695 out:
12696 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12697 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12698 		return -size;
12699 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12700 		return -size;
12701 	return size;
12702 }
12703 
12704 /* check special kfuncs and return:
12705  *  1  - not fall-through to 'else' branch, continue verification
12706  *  0  - fall-through to 'else' branch
12707  * < 0 - not fall-through to 'else' branch, return error
12708  */
12709 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12710 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12711 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12712 {
12713 	const struct btf_type *ret_t;
12714 	int err = 0;
12715 
12716 	if (meta->btf != btf_vmlinux)
12717 		return 0;
12718 
12719 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12720 		struct btf_struct_meta *struct_meta;
12721 		struct btf *ret_btf;
12722 		u32 ret_btf_id;
12723 
12724 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12725 			return -ENOMEM;
12726 
12727 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12728 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12729 			return -EINVAL;
12730 		}
12731 
12732 		ret_btf = env->prog->aux->btf;
12733 		ret_btf_id = meta->arg_constant.value;
12734 
12735 		/* This may be NULL due to user not supplying a BTF */
12736 		if (!ret_btf) {
12737 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12738 			return -EINVAL;
12739 		}
12740 
12741 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12742 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12743 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12744 			return -EINVAL;
12745 		}
12746 
12747 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12748 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12749 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12750 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12751 				return -EINVAL;
12752 			}
12753 
12754 			if (!bpf_global_percpu_ma_set) {
12755 				mutex_lock(&bpf_percpu_ma_lock);
12756 				if (!bpf_global_percpu_ma_set) {
12757 					/* Charge memory allocated with bpf_global_percpu_ma to
12758 					 * root memcg. The obj_cgroup for root memcg is NULL.
12759 					 */
12760 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12761 					if (!err)
12762 						bpf_global_percpu_ma_set = true;
12763 				}
12764 				mutex_unlock(&bpf_percpu_ma_lock);
12765 				if (err)
12766 					return err;
12767 			}
12768 
12769 			mutex_lock(&bpf_percpu_ma_lock);
12770 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12771 			mutex_unlock(&bpf_percpu_ma_lock);
12772 			if (err)
12773 				return err;
12774 		}
12775 
12776 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12777 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12778 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12779 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12780 				return -EINVAL;
12781 			}
12782 
12783 			if (struct_meta) {
12784 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12785 				return -EINVAL;
12786 			}
12787 		}
12788 
12789 		mark_reg_known_zero(env, regs, BPF_REG_0);
12790 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12791 		regs[BPF_REG_0].btf = ret_btf;
12792 		regs[BPF_REG_0].btf_id = ret_btf_id;
12793 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12794 			regs[BPF_REG_0].type |= MEM_PERCPU;
12795 
12796 		insn_aux->obj_new_size = ret_t->size;
12797 		insn_aux->kptr_struct_meta = struct_meta;
12798 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12799 		mark_reg_known_zero(env, regs, BPF_REG_0);
12800 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12801 		regs[BPF_REG_0].btf = meta->arg_btf;
12802 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12803 
12804 		insn_aux->kptr_struct_meta =
12805 			btf_find_struct_meta(meta->arg_btf,
12806 					     meta->arg_btf_id);
12807 	} else if (is_list_node_type(ptr_type)) {
12808 		struct btf_field *field = meta->arg_list_head.field;
12809 
12810 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12811 	} else if (is_rbtree_node_type(ptr_type)) {
12812 		struct btf_field *field = meta->arg_rbtree_root.field;
12813 
12814 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12815 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12816 		mark_reg_known_zero(env, regs, BPF_REG_0);
12817 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12818 		regs[BPF_REG_0].btf = desc_btf;
12819 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12820 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12821 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12822 		if (!ret_t) {
12823 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12824 				meta->arg_constant.value);
12825 			return -EINVAL;
12826 		} else if (btf_type_is_struct(ret_t)) {
12827 			mark_reg_known_zero(env, regs, BPF_REG_0);
12828 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12829 			regs[BPF_REG_0].btf = desc_btf;
12830 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12831 		} else if (btf_type_is_void(ret_t)) {
12832 			mark_reg_known_zero(env, regs, BPF_REG_0);
12833 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12834 			regs[BPF_REG_0].mem_size = 0;
12835 		} else {
12836 			verbose(env,
12837 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12838 			return -EINVAL;
12839 		}
12840 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12841 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12842 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->dynptr.type);
12843 
12844 		mark_reg_known_zero(env, regs, BPF_REG_0);
12845 
12846 		if (!meta->arg_constant.found) {
12847 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12848 			return -EFAULT;
12849 		}
12850 
12851 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12852 
12853 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12854 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12855 
12856 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12857 			regs[BPF_REG_0].type |= MEM_RDONLY;
12858 		} else {
12859 			/* this will set env->seen_direct_write to true */
12860 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12861 				verbose(env, "the prog does not allow writes to packet data\n");
12862 				return -EINVAL;
12863 			}
12864 		}
12865 
12866 		if (!meta->dynptr.id) {
12867 			verifier_bug(env, "no dynptr id");
12868 			return -EFAULT;
12869 		}
12870 		regs[BPF_REG_0].parent_id = meta->dynptr.id;
12871 	} else {
12872 		return 0;
12873 	}
12874 
12875 	return 1;
12876 }
12877 
12878 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12879 
12880 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12881 			    int *insn_idx_p)
12882 {
12883 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12884 	struct bpf_reg_state *regs = cur_regs(env);
12885 	const char *func_name, *ptr_type_name;
12886 	const struct btf_type *t, *ptr_type;
12887 	struct bpf_kfunc_call_arg_meta meta;
12888 	struct bpf_insn_aux_data *insn_aux;
12889 	int err, insn_idx = *insn_idx_p;
12890 	const struct btf_param *args;
12891 	u32 i, nargs, ptr_type_id;
12892 	struct btf *desc_btf;
12893 	int id;
12894 
12895 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12896 	if (!insn->imm)
12897 		return 0;
12898 
12899 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12900 	if (err == -EACCES && meta.func_name)
12901 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12902 	if (err)
12903 		return err;
12904 	desc_btf = meta.btf;
12905 	func_name = meta.func_name;
12906 	insn_aux = &env->insn_aux_data[insn_idx];
12907 
12908 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
12909 
12910 	if (!insn->off &&
12911 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
12912 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
12913 		struct bpf_verifier_state *branch;
12914 		struct bpf_reg_state *regs;
12915 
12916 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
12917 		if (IS_ERR(branch)) {
12918 			verbose(env, "failed to push state for failed lock acquisition\n");
12919 			return PTR_ERR(branch);
12920 		}
12921 
12922 		regs = branch->frame[branch->curframe]->regs;
12923 
12924 		/* Clear r0-r5 registers in forked state */
12925 		for (i = 0; i < CALLER_SAVED_REGS; i++)
12926 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
12927 
12928 		mark_reg_unknown(env, regs, BPF_REG_0);
12929 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
12930 		if (err) {
12931 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
12932 			return err;
12933 		}
12934 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
12935 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
12936 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
12937 		return -EFAULT;
12938 	}
12939 
12940 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
12941 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12942 		return -EACCES;
12943 	}
12944 
12945 	sleepable = bpf_is_kfunc_sleepable(&meta);
12946 	if (sleepable && !in_sleepable(env)) {
12947 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
12948 		return -EACCES;
12949 	}
12950 
12951 	/* Track non-sleepable context for kfuncs, same as for helpers. */
12952 	if (!in_sleepable_context(env))
12953 		insn_aux->non_sleepable = true;
12954 
12955 	/* Check the arguments */
12956 	err = check_kfunc_args(env, &meta, insn_idx);
12957 	if (err < 0)
12958 		return err;
12959 
12960 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
12961 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12962 					 set_rbtree_add_callback_state);
12963 		if (err) {
12964 			verbose(env, "kfunc %s#%d failed callback verification\n",
12965 				func_name, meta.func_id);
12966 			return err;
12967 		}
12968 	}
12969 
12970 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
12971 		meta.r0_size = sizeof(u64);
12972 		meta.r0_rdonly = false;
12973 	}
12974 
12975 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
12976 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12977 					 set_timer_callback_state);
12978 		if (err) {
12979 			verbose(env, "kfunc %s#%d failed callback verification\n",
12980 				func_name, meta.func_id);
12981 			return err;
12982 		}
12983 	}
12984 
12985 	if (is_task_work_add_kfunc(meta.func_id)) {
12986 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12987 					 set_task_work_schedule_callback_state);
12988 		if (err) {
12989 			verbose(env, "kfunc %s#%d failed callback verification\n",
12990 				func_name, meta.func_id);
12991 			return err;
12992 		}
12993 	}
12994 
12995 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
12996 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
12997 
12998 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
12999 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13000 
13001 	if (rcu_lock) {
13002 		env->cur_state->active_rcu_locks++;
13003 	} else if (rcu_unlock) {
13004 		if (env->cur_state->active_rcu_locks == 0) {
13005 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13006 			return -EINVAL;
13007 		}
13008 		if (--env->cur_state->active_rcu_locks == 0)
13009 			invalidate_rcu_protected_refs(env);
13010 	} else if (preempt_disable) {
13011 		env->cur_state->active_preempt_locks++;
13012 	} else if (preempt_enable) {
13013 		if (env->cur_state->active_preempt_locks == 0) {
13014 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13015 			return -EINVAL;
13016 		}
13017 		env->cur_state->active_preempt_locks--;
13018 	}
13019 
13020 	if (sleepable && !in_sleepable_context(env)) {
13021 		verbose(env, "kernel func %s is sleepable within %s\n",
13022 			func_name, non_sleepable_context_description(env));
13023 		return -EACCES;
13024 	}
13025 
13026 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13027 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13028 		return -EACCES;
13029 	}
13030 
13031 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13032 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13033 		return -EACCES;
13034 	}
13035 
13036 	/* In case of release function, we get register number of refcounted
13037 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13038 	 */
13039 	if (meta.release_regno) {
13040 		err = release_reg(env, &regs[meta.release_regno], false, !!meta.dynptr.id);
13041 		if (err)
13042 			return err;
13043 	}
13044 
13045 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13046 		id = regs[BPF_REG_2].id;
13047 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13048 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13049 		ref_convert_owning_non_owning(env, id);
13050 	}
13051 
13052 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13053 		if (!bpf_jit_supports_exceptions()) {
13054 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13055 				func_name, meta.func_id);
13056 			return -ENOTSUPP;
13057 		}
13058 		env->seen_exception = true;
13059 
13060 		/* In the case of the default callback, the cookie value passed
13061 		 * to bpf_throw becomes the return value of the program.
13062 		 */
13063 		if (!env->exception_callback_subprog) {
13064 			err = check_return_code(env, BPF_REG_1, "R1");
13065 			if (err < 0)
13066 				return err;
13067 		}
13068 	}
13069 
13070 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13071 		u32 regno = caller_saved[i];
13072 
13073 		bpf_mark_reg_not_init(env, &regs[regno]);
13074 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13075 	}
13076 	invalidate_outgoing_stack_args(env, cur_func(env));
13077 
13078 	/* Check return type */
13079 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13080 
13081 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13082 		if (meta.btf != btf_vmlinux ||
13083 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13084 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13085 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13086 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13087 			return -EINVAL;
13088 		}
13089 	}
13090 
13091 	if (btf_type_is_scalar(t)) {
13092 		mark_reg_unknown(env, regs, BPF_REG_0);
13093 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13094 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13095 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13096 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13097 	} else if (btf_type_is_ptr(t)) {
13098 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13099 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13100 		if (err) {
13101 			if (err < 0)
13102 				return err;
13103 		} else if (btf_type_is_void(ptr_type)) {
13104 			/* kfunc returning 'void *' is equivalent to returning scalar */
13105 			mark_reg_unknown(env, regs, BPF_REG_0);
13106 		} else if (!__btf_type_is_struct(ptr_type)) {
13107 			if (!meta.r0_size) {
13108 				__u32 sz;
13109 
13110 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13111 					meta.r0_size = sz;
13112 					meta.r0_rdonly = true;
13113 				}
13114 			}
13115 			if (!meta.r0_size) {
13116 				ptr_type_name = btf_name_by_offset(desc_btf,
13117 								   ptr_type->name_off);
13118 				verbose(env,
13119 					"kernel function %s returns pointer type %s %s is not supported\n",
13120 					func_name,
13121 					btf_type_str(ptr_type),
13122 					ptr_type_name);
13123 				return -EINVAL;
13124 			}
13125 
13126 			mark_reg_known_zero(env, regs, BPF_REG_0);
13127 			regs[BPF_REG_0].type = PTR_TO_MEM;
13128 			regs[BPF_REG_0].mem_size = meta.r0_size;
13129 
13130 			if (meta.r0_rdonly)
13131 				regs[BPF_REG_0].type |= MEM_RDONLY;
13132 
13133 			/* Ensures we don't access the memory after a release_reference() */
13134 			if (meta.ref_obj.id) {
13135 				err = validate_ref_obj(env, &meta.ref_obj);
13136 				if (err)
13137 					return err;
13138 				regs[BPF_REG_0].parent_id = meta.ref_obj.id;
13139 			}
13140 
13141 			if (is_kfunc_rcu_protected(&meta))
13142 				regs[BPF_REG_0].type |= MEM_RCU;
13143 		} else {
13144 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13145 
13146 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13147 				type |= PTR_UNTRUSTED;
13148 			else if (is_kfunc_rcu_protected(&meta) ||
13149 				 (bpf_is_iter_next_kfunc(&meta) &&
13150 				  (get_iter_from_state(env->cur_state, &meta)
13151 					   ->type & MEM_RCU))) {
13152 				/*
13153 				 * If the iterator's constructor (the _new
13154 				 * function e.g., bpf_iter_task_new) has been
13155 				 * annotated with BPF kfunc flag
13156 				 * KF_RCU_PROTECTED and was called within a RCU
13157 				 * read-side critical section, also propagate
13158 				 * the MEM_RCU flag to the pointer returned from
13159 				 * the iterator's next function (e.g.,
13160 				 * bpf_iter_task_next).
13161 				 */
13162 				type |= MEM_RCU;
13163 			} else {
13164 				/*
13165 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13166 				 * kfunc should by default be treated as
13167 				 * implicitly trusted.
13168 				 */
13169 				type |= PTR_TRUSTED;
13170 			}
13171 
13172 			mark_reg_known_zero(env, regs, BPF_REG_0);
13173 			regs[BPF_REG_0].btf = desc_btf;
13174 			regs[BPF_REG_0].type = type;
13175 			regs[BPF_REG_0].btf_id = ptr_type_id;
13176 		}
13177 
13178 		if (is_kfunc_ret_null(&meta)) {
13179 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13180 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13181 			regs[BPF_REG_0].id = ++env->id_gen;
13182 		}
13183 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13184 		if (is_kfunc_acquire(&meta)) {
13185 			id = acquire_reference(env, insn_idx, 0);
13186 			if (id < 0)
13187 				return id;
13188 			regs[BPF_REG_0].id = id;
13189 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13190 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13191 		}
13192 
13193 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13194 			regs[BPF_REG_0].id = ++env->id_gen;
13195 	} else if (btf_type_is_void(t)) {
13196 		if (meta.btf == btf_vmlinux) {
13197 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13198 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13199 				insn_aux->kptr_struct_meta =
13200 					btf_find_struct_meta(meta.arg_btf,
13201 							     meta.arg_btf_id);
13202 			}
13203 		}
13204 	}
13205 
13206 	if (bpf_is_kfunc_pkt_changing(&meta))
13207 		clear_all_pkt_pointers(env);
13208 
13209 	nargs = btf_type_vlen(meta.func_proto);
13210 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
13211 		struct bpf_func_state *caller = cur_func(env);
13212 		struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno];
13213 		u16 out_stack_arg_cnt = nargs - MAX_BPF_FUNC_REG_ARGS;
13214 		u16 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + out_stack_arg_cnt;
13215 
13216 		if (stack_arg_cnt > caller_info->stack_arg_cnt)
13217 			caller_info->stack_arg_cnt = stack_arg_cnt;
13218 	}
13219 
13220 	args = (const struct btf_param *)(meta.func_proto + 1);
13221 	for (i = 0; i < min_t(int, nargs, MAX_BPF_FUNC_REG_ARGS); i++) {
13222 		u32 regno = i + 1;
13223 
13224 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13225 		if (btf_type_is_ptr(t))
13226 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13227 		else
13228 			/* scalar. ensured by check_kfunc_args() */
13229 			mark_btf_func_reg_size(env, regno, t->size);
13230 	}
13231 
13232 	if (bpf_is_iter_next_kfunc(&meta)) {
13233 		err = process_iter_next_call(env, insn_idx, &meta);
13234 		if (err)
13235 			return err;
13236 	}
13237 
13238 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13239 		env->prog->call_session_cookie = true;
13240 
13241 	if (bpf_is_throw_kfunc(insn))
13242 		return process_bpf_exit_full(env, NULL, true);
13243 
13244 	return 0;
13245 }
13246 
13247 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13248 					 const struct bpf_reg_state *reg,
13249 					 enum bpf_reg_type type)
13250 {
13251 	bool known = tnum_is_const(reg->var_off);
13252 	s64 val = reg->var_off.value;
13253 	s64 smin = reg_smin(reg);
13254 
13255 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13256 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13257 			reg_type_str(env, type), val);
13258 		return false;
13259 	}
13260 
13261 	if (smin == S64_MIN) {
13262 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13263 			reg_type_str(env, type));
13264 		return false;
13265 	}
13266 
13267 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13268 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13269 			smin, reg_type_str(env, type));
13270 		return false;
13271 	}
13272 
13273 	return true;
13274 }
13275 
13276 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13277 				      const struct bpf_reg_state *reg,
13278 				      enum bpf_reg_type type)
13279 {
13280 	bool known = tnum_is_const(reg->var_off);
13281 	s64 val = reg->var_off.value;
13282 	s64 smin = reg_smin(reg);
13283 
13284 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13285 		verbose(env, "%s pointer offset %lld is not allowed\n",
13286 			reg_type_str(env, type), val);
13287 		return false;
13288 	}
13289 
13290 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13291 		verbose(env, "%s pointer offset %lld is not allowed\n",
13292 			reg_type_str(env, type), smin);
13293 		return false;
13294 	}
13295 
13296 	return true;
13297 }
13298 
13299 enum {
13300 	REASON_BOUNDS	= -1,
13301 	REASON_TYPE	= -2,
13302 	REASON_PATHS	= -3,
13303 	REASON_LIMIT	= -4,
13304 	REASON_STACK	= -5,
13305 };
13306 
13307 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13308 			      u32 *alu_limit, bool mask_to_left)
13309 {
13310 	u32 max = 0, ptr_limit = 0;
13311 
13312 	switch (ptr_reg->type) {
13313 	case PTR_TO_STACK:
13314 		/* Offset 0 is out-of-bounds, but acceptable start for the
13315 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13316 		 * offset where we would need to deal with min/max bounds is
13317 		 * currently prohibited for unprivileged.
13318 		 */
13319 		max = MAX_BPF_STACK + mask_to_left;
13320 		ptr_limit = -ptr_reg->var_off.value;
13321 		break;
13322 	case PTR_TO_MAP_VALUE:
13323 		max = ptr_reg->map_ptr->value_size;
13324 		ptr_limit = mask_to_left ? reg_smin(ptr_reg) : reg_umax(ptr_reg);
13325 		break;
13326 	default:
13327 		return REASON_TYPE;
13328 	}
13329 
13330 	if (ptr_limit >= max)
13331 		return REASON_LIMIT;
13332 	*alu_limit = ptr_limit;
13333 	return 0;
13334 }
13335 
13336 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13337 				    const struct bpf_insn *insn)
13338 {
13339 	return env->bypass_spec_v1 ||
13340 		BPF_SRC(insn->code) == BPF_K ||
13341 		cur_aux(env)->nospec;
13342 }
13343 
13344 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13345 				       u32 alu_state, u32 alu_limit)
13346 {
13347 	/* If we arrived here from different branches with different
13348 	 * state or limits to sanitize, then this won't work.
13349 	 */
13350 	if (aux->alu_state &&
13351 	    (aux->alu_state != alu_state ||
13352 	     aux->alu_limit != alu_limit))
13353 		return REASON_PATHS;
13354 
13355 	/* Corresponding fixup done in do_misc_fixups(). */
13356 	aux->alu_state = alu_state;
13357 	aux->alu_limit = alu_limit;
13358 	return 0;
13359 }
13360 
13361 static int sanitize_val_alu(struct bpf_verifier_env *env,
13362 			    struct bpf_insn *insn)
13363 {
13364 	struct bpf_insn_aux_data *aux = cur_aux(env);
13365 
13366 	if (can_skip_alu_sanitation(env, insn))
13367 		return 0;
13368 
13369 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13370 }
13371 
13372 static bool sanitize_needed(u8 opcode)
13373 {
13374 	return opcode == BPF_ADD || opcode == BPF_SUB;
13375 }
13376 
13377 struct bpf_sanitize_info {
13378 	struct bpf_insn_aux_data aux;
13379 	bool mask_to_left;
13380 };
13381 
13382 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13383 				     const struct bpf_insn *insn,
13384 				     u32 next_idx, u32 curr_idx)
13385 {
13386 	struct bpf_verifier_state *branch;
13387 	struct bpf_reg_state *regs;
13388 
13389 	branch = push_stack(env, next_idx, curr_idx, true);
13390 	if (!IS_ERR(branch) && insn) {
13391 		regs = branch->frame[branch->curframe]->regs;
13392 		if (BPF_SRC(insn->code) == BPF_K) {
13393 			mark_reg_unknown(env, regs, insn->dst_reg);
13394 		} else if (BPF_SRC(insn->code) == BPF_X) {
13395 			mark_reg_unknown(env, regs, insn->dst_reg);
13396 			mark_reg_unknown(env, regs, insn->src_reg);
13397 		}
13398 	}
13399 	return PTR_ERR_OR_ZERO(branch);
13400 }
13401 
13402 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13403 			    struct bpf_insn *insn,
13404 			    const struct bpf_reg_state *ptr_reg,
13405 			    const struct bpf_reg_state *off_reg,
13406 			    struct bpf_reg_state *dst_reg,
13407 			    struct bpf_sanitize_info *info,
13408 			    const bool commit_window)
13409 {
13410 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13411 	struct bpf_verifier_state *vstate = env->cur_state;
13412 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13413 	bool off_is_neg = reg_smin(off_reg) < 0;
13414 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13415 	u8 opcode = BPF_OP(insn->code);
13416 	u32 alu_state, alu_limit;
13417 	struct bpf_reg_state tmp;
13418 	int err;
13419 
13420 	if (can_skip_alu_sanitation(env, insn))
13421 		return 0;
13422 
13423 	/* We already marked aux for masking from non-speculative
13424 	 * paths, thus we got here in the first place. We only care
13425 	 * to explore bad access from here.
13426 	 */
13427 	if (vstate->speculative)
13428 		goto do_sim;
13429 
13430 	if (!commit_window) {
13431 		if (!tnum_is_const(off_reg->var_off) &&
13432 		    (reg_smin(off_reg) < 0) != (reg_smax(off_reg) < 0))
13433 			return REASON_BOUNDS;
13434 
13435 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13436 				     (opcode == BPF_SUB && !off_is_neg);
13437 	}
13438 
13439 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13440 	if (err < 0)
13441 		return err;
13442 
13443 	if (commit_window) {
13444 		/* In commit phase we narrow the masking window based on
13445 		 * the observed pointer move after the simulated operation.
13446 		 */
13447 		alu_state = info->aux.alu_state;
13448 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13449 	} else {
13450 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13451 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13452 		alu_state |= ptr_is_dst_reg ?
13453 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13454 
13455 		/* Limit pruning on unknown scalars to enable deep search for
13456 		 * potential masking differences from other program paths.
13457 		 */
13458 		if (!off_is_imm)
13459 			env->explore_alu_limits = true;
13460 	}
13461 
13462 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13463 	if (err < 0)
13464 		return err;
13465 do_sim:
13466 	/* If we're in commit phase, we're done here given we already
13467 	 * pushed the truncated dst_reg into the speculative verification
13468 	 * stack.
13469 	 *
13470 	 * Also, when register is a known constant, we rewrite register-based
13471 	 * operation to immediate-based, and thus do not need masking (and as
13472 	 * a consequence, do not need to simulate the zero-truncation either).
13473 	 */
13474 	if (commit_window || off_is_imm)
13475 		return 0;
13476 
13477 	/* Simulate and find potential out-of-bounds access under
13478 	 * speculative execution from truncation as a result of
13479 	 * masking when off was not within expected range. If off
13480 	 * sits in dst, then we temporarily need to move ptr there
13481 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13482 	 * for cases where we use K-based arithmetic in one direction
13483 	 * and truncated reg-based in the other in order to explore
13484 	 * bad access.
13485 	 */
13486 	if (!ptr_is_dst_reg) {
13487 		tmp = *dst_reg;
13488 		*dst_reg = *ptr_reg;
13489 	}
13490 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13491 	if (err < 0)
13492 		return REASON_STACK;
13493 	if (!ptr_is_dst_reg)
13494 		*dst_reg = tmp;
13495 	return 0;
13496 }
13497 
13498 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13499 {
13500 	struct bpf_verifier_state *vstate = env->cur_state;
13501 
13502 	/* If we simulate paths under speculation, we don't update the
13503 	 * insn as 'seen' such that when we verify unreachable paths in
13504 	 * the non-speculative domain, sanitize_dead_code() can still
13505 	 * rewrite/sanitize them.
13506 	 */
13507 	if (!vstate->speculative)
13508 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13509 }
13510 
13511 static int sanitize_err(struct bpf_verifier_env *env,
13512 			const struct bpf_insn *insn, int reason,
13513 			const struct bpf_reg_state *off_reg,
13514 			const struct bpf_reg_state *dst_reg)
13515 {
13516 	static const char *err = "pointer arithmetic with it prohibited for !root";
13517 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13518 	u32 dst = insn->dst_reg, src = insn->src_reg;
13519 
13520 	switch (reason) {
13521 	case REASON_BOUNDS:
13522 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13523 			off_reg == dst_reg ? dst : src, err);
13524 		break;
13525 	case REASON_TYPE:
13526 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13527 			off_reg == dst_reg ? src : dst, err);
13528 		break;
13529 	case REASON_PATHS:
13530 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13531 			dst, op, err);
13532 		break;
13533 	case REASON_LIMIT:
13534 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13535 			dst, op, err);
13536 		break;
13537 	case REASON_STACK:
13538 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13539 			dst, err);
13540 		return -ENOMEM;
13541 	default:
13542 		verifier_bug(env, "unknown reason (%d)", reason);
13543 		break;
13544 	}
13545 
13546 	return -EACCES;
13547 }
13548 
13549 /* check that stack access falls within stack limits and that 'reg' doesn't
13550  * have a variable offset.
13551  *
13552  * Variable offset is prohibited for unprivileged mode for simplicity since it
13553  * requires corresponding support in Spectre masking for stack ALU.  See also
13554  * retrieve_ptr_limit().
13555  */
13556 static int check_stack_access_for_ptr_arithmetic(
13557 				struct bpf_verifier_env *env,
13558 				int regno,
13559 				const struct bpf_reg_state *reg,
13560 				int off)
13561 {
13562 	if (!tnum_is_const(reg->var_off)) {
13563 		char tn_buf[48];
13564 
13565 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13566 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13567 			regno, tn_buf, off);
13568 		return -EACCES;
13569 	}
13570 
13571 	if (off >= 0 || off < -MAX_BPF_STACK) {
13572 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13573 			"prohibited for !root; off=%d\n", regno, off);
13574 		return -EACCES;
13575 	}
13576 
13577 	return 0;
13578 }
13579 
13580 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13581 				 const struct bpf_insn *insn,
13582 				 struct bpf_reg_state *dst_reg)
13583 {
13584 	u32 dst = insn->dst_reg;
13585 
13586 	/* For unprivileged we require that resulting offset must be in bounds
13587 	 * in order to be able to sanitize access later on.
13588 	 */
13589 	if (env->bypass_spec_v1)
13590 		return 0;
13591 
13592 	switch (dst_reg->type) {
13593 	case PTR_TO_STACK:
13594 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13595 							  dst_reg->var_off.value))
13596 			return -EACCES;
13597 		break;
13598 	case PTR_TO_MAP_VALUE:
13599 		if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) {
13600 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13601 				"prohibited for !root\n", dst);
13602 			return -EACCES;
13603 		}
13604 		break;
13605 	default:
13606 		return -EOPNOTSUPP;
13607 	}
13608 
13609 	return 0;
13610 }
13611 
13612 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13613  * Caller should also handle BPF_MOV case separately.
13614  * If we return -EACCES, caller may want to try again treating pointer as a
13615  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13616  */
13617 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13618 				   struct bpf_insn *insn,
13619 				   const struct bpf_reg_state *ptr_reg,
13620 				   const struct bpf_reg_state *off_reg)
13621 {
13622 	struct bpf_verifier_state *vstate = env->cur_state;
13623 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13624 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13625 	bool known = tnum_is_const(off_reg->var_off);
13626 	s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg);
13627 	u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg);
13628 	struct bpf_sanitize_info info = {};
13629 	u8 opcode = BPF_OP(insn->code);
13630 	u32 dst = insn->dst_reg;
13631 	int ret, bounds_ret;
13632 
13633 	dst_reg = &regs[dst];
13634 
13635 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13636 	    smin_val > smax_val || umin_val > umax_val) {
13637 		/* Taint dst register if offset had invalid bounds derived from
13638 		 * e.g. dead branches.
13639 		 */
13640 		__mark_reg_unknown(env, dst_reg);
13641 		return 0;
13642 	}
13643 
13644 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13645 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13646 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13647 			__mark_reg_unknown(env, dst_reg);
13648 			return 0;
13649 		}
13650 
13651 		verbose(env,
13652 			"R%d 32-bit pointer arithmetic prohibited\n",
13653 			dst);
13654 		return -EACCES;
13655 	}
13656 
13657 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13658 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13659 			dst, reg_type_str(env, ptr_reg->type));
13660 		return -EACCES;
13661 	}
13662 
13663 	/*
13664 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13665 	 * instructions, hence no need to track offsets.
13666 	 */
13667 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13668 		return 0;
13669 
13670 	switch (base_type(ptr_reg->type)) {
13671 	case PTR_TO_CTX:
13672 	case PTR_TO_MAP_VALUE:
13673 	case PTR_TO_MAP_KEY:
13674 	case PTR_TO_STACK:
13675 	case PTR_TO_PACKET_META:
13676 	case PTR_TO_PACKET:
13677 	case PTR_TO_TP_BUFFER:
13678 	case PTR_TO_BTF_ID:
13679 	case PTR_TO_MEM:
13680 	case PTR_TO_BUF:
13681 	case PTR_TO_FUNC:
13682 	case CONST_PTR_TO_DYNPTR:
13683 		break;
13684 	case PTR_TO_FLOW_KEYS:
13685 		if (known)
13686 			break;
13687 		fallthrough;
13688 	case CONST_PTR_TO_MAP:
13689 		/* smin_val represents the known value */
13690 		if (known && smin_val == 0 && opcode == BPF_ADD)
13691 			break;
13692 		fallthrough;
13693 	default:
13694 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13695 			dst, reg_type_str(env, ptr_reg->type));
13696 		return -EACCES;
13697 	}
13698 
13699 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13700 	 * The id may be overwritten later if we create a new variable offset.
13701 	 */
13702 	dst_reg->type = ptr_reg->type;
13703 	dst_reg->id = ptr_reg->id;
13704 
13705 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13706 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13707 		return -EINVAL;
13708 
13709 	/* pointer types do not carry 32-bit bounds at the moment. */
13710 	__mark_reg32_unbounded(dst_reg);
13711 
13712 	if (sanitize_needed(opcode)) {
13713 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13714 				       &info, false);
13715 		if (ret < 0)
13716 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13717 	}
13718 
13719 	switch (opcode) {
13720 	case BPF_ADD:
13721 		/*
13722 		 * dst_reg gets the pointer type and since some positive
13723 		 * integer value was added to the pointer, give it a new 'id'
13724 		 * if it's a PTR_TO_PACKET.
13725 		 * this creates a new 'base' pointer, off_reg (variable) gets
13726 		 * added into the variable offset, and we copy the fixed offset
13727 		 * from ptr_reg.
13728 		 */
13729 		dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64);
13730 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13731 		dst_reg->raw = ptr_reg->raw;
13732 		if (reg_is_pkt_pointer(ptr_reg)) {
13733 			if (!known)
13734 				dst_reg->id = ++env->id_gen;
13735 			/*
13736 			 * Clear range for unknown addends since we can't know
13737 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13738 			 * BEYOND_PKT_END from prior comparison as any pointer
13739 			 * arithmetic invalidates them.
13740 			 */
13741 			if (!known || dst_reg->range < 0)
13742 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13743 		}
13744 		break;
13745 	case BPF_SUB:
13746 		if (dst_reg == off_reg) {
13747 			/* scalar -= pointer.  Creates an unknown scalar */
13748 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13749 				dst);
13750 			return -EACCES;
13751 		}
13752 		/* We don't allow subtraction from FP, because (according to
13753 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13754 		 * be able to deal with it.
13755 		 */
13756 		if (ptr_reg->type == PTR_TO_STACK) {
13757 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13758 				dst);
13759 			return -EACCES;
13760 		}
13761 		dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64));
13762 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13763 		dst_reg->raw = ptr_reg->raw;
13764 		if (reg_is_pkt_pointer(ptr_reg)) {
13765 			if (!known)
13766 				dst_reg->id = ++env->id_gen;
13767 			/*
13768 			 * Clear range if the subtrahend may be negative since
13769 			 * pkt pointer could move past its bounds. A positive
13770 			 * subtrahend moves it backwards keeping positive range
13771 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13772 			 * prior comparison as arithmetic invalidates them.
13773 			 */
13774 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13775 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13776 		}
13777 		break;
13778 	case BPF_AND:
13779 	case BPF_OR:
13780 	case BPF_XOR:
13781 		/* bitwise ops on pointers are troublesome, prohibit. */
13782 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13783 			dst, bpf_alu_string[opcode >> 4]);
13784 		return -EACCES;
13785 	default:
13786 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13787 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13788 			dst, bpf_alu_string[opcode >> 4]);
13789 		return -EACCES;
13790 	}
13791 
13792 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13793 		return -EINVAL;
13794 	reg_bounds_sync(dst_reg);
13795 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13796 	if (bounds_ret == -EACCES)
13797 		return bounds_ret;
13798 	if (sanitize_needed(opcode)) {
13799 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13800 				       &info, true);
13801 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13802 				    && !env->cur_state->speculative
13803 				    && bounds_ret
13804 				    && !ret,
13805 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13806 			return -EFAULT;
13807 		}
13808 		if (ret < 0)
13809 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13810 	}
13811 
13812 	return 0;
13813 }
13814 
13815 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13816 				 struct bpf_reg_state *src_reg)
13817 {
13818 	dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32);
13819 }
13820 
13821 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13822 			       struct bpf_reg_state *src_reg)
13823 {
13824 	dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64);
13825 }
13826 
13827 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13828 				 struct bpf_reg_state *src_reg)
13829 {
13830 	dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32));
13831 }
13832 
13833 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13834 			       struct bpf_reg_state *src_reg)
13835 {
13836 	dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64));
13837 }
13838 
13839 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13840 				 struct bpf_reg_state *src_reg)
13841 {
13842 	s32 smin = reg_s32_min(dst_reg);
13843 	s32 smax = reg_s32_max(dst_reg);
13844 	u32 umin = reg_u32_min(dst_reg);
13845 	u32 umax = reg_u32_max(dst_reg);
13846 	s32 tmp_prod[4];
13847 
13848 	if (check_mul_overflow(umax, reg_u32_max(src_reg), &umax) ||
13849 	    check_mul_overflow(umin, reg_u32_min(src_reg), &umin)) {
13850 		/* Overflow possible, we know nothing */
13851 		umin = 0;
13852 		umax = U32_MAX;
13853 	}
13854 	if (check_mul_overflow(smin, reg_s32_min(src_reg), &tmp_prod[0]) ||
13855 	    check_mul_overflow(smin, reg_s32_max(src_reg), &tmp_prod[1]) ||
13856 	    check_mul_overflow(smax, reg_s32_min(src_reg), &tmp_prod[2]) ||
13857 	    check_mul_overflow(smax, reg_s32_max(src_reg), &tmp_prod[3])) {
13858 		/* Overflow possible, we know nothing */
13859 		smin = S32_MIN;
13860 		smax = S32_MAX;
13861 	} else {
13862 		smin = min_array(tmp_prod, 4);
13863 		smax = max_array(tmp_prod, 4);
13864 	}
13865 
13866 	dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax),
13867 					cnum32_from_srange(smin, smax));
13868 }
13869 
13870 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13871 			       struct bpf_reg_state *src_reg)
13872 {
13873 	s64 smin = reg_smin(dst_reg);
13874 	s64 smax = reg_smax(dst_reg);
13875 	u64 umin = reg_umin(dst_reg);
13876 	u64 umax = reg_umax(dst_reg);
13877 	s64 tmp_prod[4];
13878 
13879 	if (check_mul_overflow(umax, reg_umax(src_reg), &umax) ||
13880 	    check_mul_overflow(umin, reg_umin(src_reg), &umin)) {
13881 		/* Overflow possible, we know nothing */
13882 		umin = 0;
13883 		umax = U64_MAX;
13884 	}
13885 	if (check_mul_overflow(smin, reg_smin(src_reg), &tmp_prod[0]) ||
13886 	    check_mul_overflow(smin, reg_smax(src_reg), &tmp_prod[1]) ||
13887 	    check_mul_overflow(smax, reg_smin(src_reg), &tmp_prod[2]) ||
13888 	    check_mul_overflow(smax, reg_smax(src_reg), &tmp_prod[3])) {
13889 		/* Overflow possible, we know nothing */
13890 		smin = S64_MIN;
13891 		smax = S64_MAX;
13892 	} else {
13893 		smin = min_array(tmp_prod, 4);
13894 		smax = max_array(tmp_prod, 4);
13895 	}
13896 
13897 	dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax),
13898 					cnum64_from_srange(smin, smax));
13899 }
13900 
13901 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
13902 				  struct bpf_reg_state *src_reg)
13903 {
13904 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
13905 
13906 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) / src_val,
13907 			 reg_u32_max(dst_reg) / src_val);
13908 
13909 	/* Reset other ranges/tnum to unbounded/unknown. */
13910 	reset_reg64_and_tnum(dst_reg);
13911 }
13912 
13913 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
13914 				struct bpf_reg_state *src_reg)
13915 {
13916 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
13917 
13918 	reg_set_urange64(dst_reg, div64_u64(reg_umin(dst_reg), src_val),
13919 			 div64_u64(reg_umax(dst_reg), src_val));
13920 
13921 	/* Reset other ranges/tnum to unbounded/unknown. */
13922 	reset_reg32_and_tnum(dst_reg);
13923 }
13924 
13925 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
13926 				  struct bpf_reg_state *src_reg)
13927 {
13928 	s32 smin = reg_s32_min(dst_reg);
13929 	s32 smax = reg_s32_max(dst_reg);
13930 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
13931 	s32 res1, res2;
13932 
13933 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
13934 	if (smin == S32_MIN && src_val == -1) {
13935 		/*
13936 		 * If the dividend range contains more than just S32_MIN,
13937 		 * we cannot precisely track the result, so it becomes unbounded.
13938 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
13939 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
13940 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
13941 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
13942 		 */
13943 		if (smax != S32_MIN) {
13944 			smin = S32_MIN;
13945 			smax = S32_MAX;
13946 		}
13947 		goto reset;
13948 	}
13949 
13950 	res1 = smin / src_val;
13951 	res2 = smax / src_val;
13952 	smin = min(res1, res2);
13953 	smax = max(res1, res2);
13954 
13955 reset:
13956 	reg_set_srange32(dst_reg, smin, smax);
13957 	/* Reset other ranges/tnum to unbounded/unknown. */
13958 	reset_reg64_and_tnum(dst_reg);
13959 }
13960 
13961 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
13962 				struct bpf_reg_state *src_reg)
13963 {
13964 	s64 smin = reg_smin(dst_reg);
13965 	s64 smax = reg_smax(dst_reg);
13966 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
13967 	s64 res1, res2;
13968 
13969 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
13970 	if (smin == S64_MIN && src_val == -1) {
13971 		/*
13972 		 * If the dividend range contains more than just S64_MIN,
13973 		 * we cannot precisely track the result, so it becomes unbounded.
13974 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
13975 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
13976 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
13977 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
13978 		 */
13979 		if (smax != S64_MIN) {
13980 			smin = S64_MIN;
13981 			smax = S64_MAX;
13982 		}
13983 		goto reset;
13984 	}
13985 
13986 	res1 = div64_s64(smin, src_val);
13987 	res2 = div64_s64(smax, src_val);
13988 	smin = min(res1, res2);
13989 	smax = max(res1, res2);
13990 
13991 reset:
13992 	reg_set_srange64(dst_reg, smin, smax);
13993 	/* Reset other ranges/tnum to unbounded/unknown. */
13994 	reset_reg32_and_tnum(dst_reg);
13995 }
13996 
13997 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
13998 				  struct bpf_reg_state *src_reg)
13999 {
14000 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
14001 	u32 res_max = src_val - 1;
14002 
14003 	/*
14004 	 * If dst_umax <= res_max, the result remains unchanged.
14005 	 * e.g., [2, 5] % 10 = [2, 5].
14006 	 */
14007 	if (reg_u32_max(dst_reg) <= res_max)
14008 		return;
14009 
14010 	reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max));
14011 
14012 	/* Reset other ranges/tnum to unbounded/unknown. */
14013 	reset_reg64_and_tnum(dst_reg);
14014 }
14015 
14016 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14017 				struct bpf_reg_state *src_reg)
14018 {
14019 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
14020 	u64 res_max = src_val - 1;
14021 
14022 	/*
14023 	 * If dst_umax <= res_max, the result remains unchanged.
14024 	 * e.g., [2, 5] % 10 = [2, 5].
14025 	 */
14026 	if (reg_umax(dst_reg) <= res_max)
14027 		return;
14028 
14029 	reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max));
14030 
14031 	/* Reset other ranges/tnum to unbounded/unknown. */
14032 	reset_reg32_and_tnum(dst_reg);
14033 }
14034 
14035 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14036 				  struct bpf_reg_state *src_reg)
14037 {
14038 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
14039 
14040 	/*
14041 	 * Safe absolute value calculation:
14042 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14043 	 * Here use unsigned integer to avoid overflow.
14044 	 */
14045 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14046 
14047 	/*
14048 	 * Calculate the maximum possible absolute value of the result.
14049 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14050 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14051 	 */
14052 	s32 res_max_abs = src_abs - 1;
14053 
14054 	/*
14055 	 * If the dividend is already within the result range,
14056 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14057 	 */
14058 	if (reg_s32_min(dst_reg) >= -res_max_abs && reg_s32_max(dst_reg) <= res_max_abs)
14059 		return;
14060 
14061 	/* General case: result has the same sign as the dividend. */
14062 	if (reg_s32_min(dst_reg) >= 0) {
14063 		reg_set_srange32(dst_reg, 0, min(reg_s32_max(dst_reg), res_max_abs));
14064 	} else if (reg_s32_max(dst_reg) <= 0) {
14065 		reg_set_srange32(dst_reg, max(reg_s32_min(dst_reg), -res_max_abs), 0);
14066 	} else {
14067 		reg_set_srange32(dst_reg, -res_max_abs, res_max_abs);
14068 	}
14069 
14070 	/* Reset other ranges/tnum to unbounded/unknown. */
14071 	reset_reg64_and_tnum(dst_reg);
14072 }
14073 
14074 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14075 				struct bpf_reg_state *src_reg)
14076 {
14077 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
14078 
14079 	/*
14080 	 * Safe absolute value calculation:
14081 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14082 	 * Here use unsigned integer to avoid overflow.
14083 	 */
14084 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14085 
14086 	/*
14087 	 * Calculate the maximum possible absolute value of the result.
14088 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14089 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14090 	 */
14091 	s64 res_max_abs = src_abs - 1;
14092 
14093 	/*
14094 	 * If the dividend is already within the result range,
14095 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14096 	 */
14097 	if (reg_smin(dst_reg) >= -res_max_abs && reg_smax(dst_reg) <= res_max_abs)
14098 		return;
14099 
14100 	/* General case: result has the same sign as the dividend. */
14101 	if (reg_smin(dst_reg) >= 0) {
14102 		reg_set_srange64(dst_reg, 0, min(reg_smax(dst_reg), res_max_abs));
14103 	} else if (reg_smax(dst_reg) <= 0) {
14104 		reg_set_srange64(dst_reg, max(reg_smin(dst_reg), -res_max_abs), 0);
14105 	} else {
14106 		reg_set_srange64(dst_reg, -res_max_abs, res_max_abs);
14107 	}
14108 
14109 	/* Reset other ranges/tnum to unbounded/unknown. */
14110 	reset_reg32_and_tnum(dst_reg);
14111 }
14112 
14113 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14114 				 struct bpf_reg_state *src_reg)
14115 {
14116 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14117 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14118 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14119 	u32 umax_val = reg_u32_max(src_reg);
14120 
14121 	if (src_known && dst_known) {
14122 		__mark_reg32_known(dst_reg, var32_off.value);
14123 		return;
14124 	}
14125 
14126 	/* We get our minimum from the var_off, since that's inherently
14127 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14128 	 */
14129 	reg_set_urange32(dst_reg,
14130 			 var32_off.value,
14131 			 min(reg_u32_max(dst_reg), umax_val));
14132 }
14133 
14134 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14135 			       struct bpf_reg_state *src_reg)
14136 {
14137 	bool src_known = tnum_is_const(src_reg->var_off);
14138 	bool dst_known = tnum_is_const(dst_reg->var_off);
14139 	u64 umax_val = reg_umax(src_reg);
14140 
14141 	if (src_known && dst_known) {
14142 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14143 		return;
14144 	}
14145 
14146 	/* We get our minimum from the var_off, since that's inherently
14147 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14148 	 */
14149 	reg_set_urange64(dst_reg,
14150 			 dst_reg->var_off.value,
14151 			 min(reg_umax(dst_reg), umax_val));
14152 
14153 	/* We may learn something more from the var_off */
14154 	__update_reg_bounds(dst_reg);
14155 }
14156 
14157 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14158 				struct bpf_reg_state *src_reg)
14159 {
14160 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14161 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14162 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14163 	u32 umin_val = reg_u32_min(src_reg);
14164 
14165 	if (src_known && dst_known) {
14166 		__mark_reg32_known(dst_reg, var32_off.value);
14167 		return;
14168 	}
14169 
14170 	/* We get our maximum from the var_off, and our minimum is the
14171 	 * maximum of the operands' minima
14172 	 */
14173 	reg_set_urange32(dst_reg,
14174 			 max(reg_u32_min(dst_reg), umin_val),
14175 			 var32_off.value | var32_off.mask);
14176 }
14177 
14178 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14179 			      struct bpf_reg_state *src_reg)
14180 {
14181 	bool src_known = tnum_is_const(src_reg->var_off);
14182 	bool dst_known = tnum_is_const(dst_reg->var_off);
14183 	u64 umin_val = reg_umin(src_reg);
14184 
14185 	if (src_known && dst_known) {
14186 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14187 		return;
14188 	}
14189 
14190 	/* We get our maximum from the var_off, and our minimum is the
14191 	 * maximum of the operands' minima
14192 	 */
14193 	reg_set_urange64(dst_reg,
14194 			 max(reg_umin(dst_reg), umin_val),
14195 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14196 
14197 	/* We may learn something more from the var_off */
14198 	__update_reg_bounds(dst_reg);
14199 }
14200 
14201 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14202 				 struct bpf_reg_state *src_reg)
14203 {
14204 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14205 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14206 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14207 
14208 	if (src_known && dst_known) {
14209 		__mark_reg32_known(dst_reg, var32_off.value);
14210 		return;
14211 	}
14212 
14213 	/* We get both minimum and maximum from the var32_off. */
14214 	reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask);
14215 }
14216 
14217 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14218 			       struct bpf_reg_state *src_reg)
14219 {
14220 	bool src_known = tnum_is_const(src_reg->var_off);
14221 	bool dst_known = tnum_is_const(dst_reg->var_off);
14222 
14223 	if (src_known && dst_known) {
14224 		/* dst_reg->var_off.value has been updated earlier */
14225 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14226 		return;
14227 	}
14228 
14229 	/* We get both minimum and maximum from the var_off. */
14230 	reg_set_urange64(dst_reg,
14231 			 dst_reg->var_off.value,
14232 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14233 }
14234 
14235 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14236 				   u64 umin_val, u64 umax_val)
14237 {
14238 	/* If we might shift our top bit out, then we know nothing */
14239 	if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val))
14240 		reg_set_urange32(dst_reg, 0, U32_MAX);
14241 	else
14242 		/* We lose all sign bit information (except what we can pick
14243 		 * up from var_off)
14244 		 */
14245 		reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val,
14246 				 reg_u32_max(dst_reg) << umax_val);
14247 }
14248 
14249 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14250 				 struct bpf_reg_state *src_reg)
14251 {
14252 	u32 umax_val = reg_u32_max(src_reg);
14253 	u32 umin_val = reg_u32_min(src_reg);
14254 	/* u32 alu operation will zext upper bits */
14255 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14256 
14257 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14258 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14259 	/* Not required but being careful mark reg64 bounds as unknown so
14260 	 * that we are forced to pick them up from tnum and zext later and
14261 	 * if some path skips this step we are still safe.
14262 	 */
14263 	__mark_reg64_unbounded(dst_reg);
14264 	__update_reg32_bounds(dst_reg);
14265 }
14266 
14267 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14268 				   u64 umin_val, u64 umax_val)
14269 {
14270 	struct cnum64 u, s;
14271 
14272 	/* Special case <<32 because it is a common compiler pattern to sign
14273 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14274 	 * because s32 bounds don't flip sign when shifting to the left by
14275 	 * 32bits.
14276 	 */
14277 	if (umin_val == 32 && umax_val == 32)
14278 		s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32,
14279 				       (s64)reg_s32_max(dst_reg) << 32);
14280 	else
14281 		s = CNUM64_UNBOUNDED;
14282 
14283 	/* If we might shift our top bit out, then we know nothing */
14284 	if (reg_umax(dst_reg) > 1ULL << (63 - umax_val))
14285 		u = CNUM64_UNBOUNDED;
14286 	else
14287 		u = cnum64_from_urange(reg_umin(dst_reg) << umin_val,
14288 				       reg_umax(dst_reg) << umax_val);
14289 
14290 	dst_reg->r64 = cnum64_intersect(u, s);
14291 }
14292 
14293 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14294 			       struct bpf_reg_state *src_reg)
14295 {
14296 	u64 umax_val = reg_umax(src_reg);
14297 	u64 umin_val = reg_umin(src_reg);
14298 
14299 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14300 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14301 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14302 
14303 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14304 	/* We may learn something more from the var_off */
14305 	__update_reg_bounds(dst_reg);
14306 }
14307 
14308 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14309 				 struct bpf_reg_state *src_reg)
14310 {
14311 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14312 	u32 umax_val = reg_u32_max(src_reg);
14313 	u32 umin_val = reg_u32_min(src_reg);
14314 
14315 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14316 	 * be negative, then either:
14317 	 * 1) src_reg might be zero, so the sign bit of the result is
14318 	 *    unknown, so we lose our signed bounds
14319 	 * 2) it's known negative, thus the unsigned bounds capture the
14320 	 *    signed bounds
14321 	 * 3) the signed bounds cross zero, so they tell us nothing
14322 	 *    about the result
14323 	 * If the value in dst_reg is known nonnegative, then again the
14324 	 * unsigned bounds capture the signed bounds.
14325 	 * Thus, in all cases it suffices to blow away our signed bounds
14326 	 * and rely on inferring new ones from the unsigned bounds and
14327 	 * var_off of the result.
14328 	 */
14329 
14330 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14331 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val,
14332 			 reg_u32_max(dst_reg) >> umin_val);
14333 
14334 	__mark_reg64_unbounded(dst_reg);
14335 	__update_reg32_bounds(dst_reg);
14336 }
14337 
14338 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14339 			       struct bpf_reg_state *src_reg)
14340 {
14341 	u64 umax_val = reg_umax(src_reg);
14342 	u64 umin_val = reg_umin(src_reg);
14343 
14344 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14345 	 * be negative, then either:
14346 	 * 1) src_reg might be zero, so the sign bit of the result is
14347 	 *    unknown, so we lose our signed bounds
14348 	 * 2) it's known negative, thus the unsigned bounds capture the
14349 	 *    signed bounds
14350 	 * 3) the signed bounds cross zero, so they tell us nothing
14351 	 *    about the result
14352 	 * If the value in dst_reg is known nonnegative, then again the
14353 	 * unsigned bounds capture the signed bounds.
14354 	 * Thus, in all cases it suffices to blow away our signed bounds
14355 	 * and rely on inferring new ones from the unsigned bounds and
14356 	 * var_off of the result.
14357 	 */
14358 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14359 	reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val,
14360 			 reg_umax(dst_reg) >> umin_val);
14361 
14362 	/* Its not easy to operate on alu32 bounds here because it depends
14363 	 * on bits being shifted in. Take easy way out and mark unbounded
14364 	 * so we can recalculate later from tnum.
14365 	 */
14366 	__mark_reg32_unbounded(dst_reg);
14367 	__update_reg_bounds(dst_reg);
14368 }
14369 
14370 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14371 				  struct bpf_reg_state *src_reg)
14372 {
14373 	u64 umin_val = reg_u32_min(src_reg);
14374 
14375 	/* Upon reaching here, src_known is true and
14376 	 * umax_val is equal to umin_val.
14377 	 * Blow away the dst_reg umin_value/umax_value and rely on
14378 	 * dst_reg var_off to refine the result.
14379 	 */
14380 	reg_set_srange32(dst_reg,
14381 			 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val),
14382 			 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val));
14383 
14384 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14385 
14386 	__mark_reg64_unbounded(dst_reg);
14387 	__update_reg32_bounds(dst_reg);
14388 }
14389 
14390 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14391 				struct bpf_reg_state *src_reg)
14392 {
14393 	u64 umin_val = reg_umin(src_reg);
14394 
14395 	/* Upon reaching here, src_known is true and umax_val is equal
14396 	 * to umin_val.
14397 	 */
14398 	reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val,
14399 			 reg_smax(dst_reg) >> umin_val);
14400 
14401 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14402 
14403 	/* Its not easy to operate on alu32 bounds here because it depends
14404 	 * on bits being shifted in from upper 32-bits. Take easy way out
14405 	 * and mark unbounded so we can recalculate later from tnum.
14406 	 */
14407 	__mark_reg32_unbounded(dst_reg);
14408 	__update_reg_bounds(dst_reg);
14409 }
14410 
14411 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14412 {
14413 	/*
14414 	 * Byte swap operation - update var_off using tnum_bswap.
14415 	 * Three cases:
14416 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14417 	 *    unconditional swap
14418 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14419 	 *    swap on big-endian, truncation or no-op on little-endian
14420 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14421 	 *    swap on little-endian, truncation or no-op on big-endian
14422 	 */
14423 
14424 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14425 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14426 	bool is_big_endian;
14427 #ifdef CONFIG_CPU_BIG_ENDIAN
14428 	is_big_endian = true;
14429 #else
14430 	is_big_endian = false;
14431 #endif
14432 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14433 	bool need_bswap = alu64 || (to_le == is_big_endian);
14434 
14435 	/*
14436 	 * If the register is mutated, manually reset its scalar ID to break
14437 	 * any existing ties and avoid incorrect bounds propagation.
14438 	 */
14439 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14440 		clear_scalar_id(dst_reg);
14441 
14442 	if (need_bswap) {
14443 		if (insn->imm == 16)
14444 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14445 		else if (insn->imm == 32)
14446 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14447 		else if (insn->imm == 64)
14448 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14449 		/*
14450 		 * Byteswap scrambles the range, so we must reset bounds.
14451 		 * Bounds will be re-derived from the new tnum later.
14452 		 */
14453 		__mark_reg_unbounded(dst_reg);
14454 	}
14455 	/* For bswap16/32, truncate dst register to match the swapped size */
14456 	if (insn->imm == 16 || insn->imm == 32)
14457 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14458 }
14459 
14460 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14461 					     const struct bpf_reg_state *src_reg)
14462 {
14463 	bool src_is_const = false;
14464 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14465 
14466 	if (insn_bitness == 32) {
14467 		if (tnum_subreg_is_const(src_reg->var_off)
14468 		    && reg_s32_min(src_reg) == reg_s32_max(src_reg)
14469 		    && reg_u32_min(src_reg) == reg_u32_max(src_reg))
14470 			src_is_const = true;
14471 	} else {
14472 		if (tnum_is_const(src_reg->var_off)
14473 		    && reg_smin(src_reg) == reg_smax(src_reg)
14474 		    && reg_umin(src_reg) == reg_umax(src_reg))
14475 			src_is_const = true;
14476 	}
14477 
14478 	switch (BPF_OP(insn->code)) {
14479 	case BPF_ADD:
14480 	case BPF_SUB:
14481 	case BPF_NEG:
14482 	case BPF_AND:
14483 	case BPF_XOR:
14484 	case BPF_OR:
14485 	case BPF_MUL:
14486 	case BPF_END:
14487 		return true;
14488 
14489 	/*
14490 	 * Division and modulo operators range is only safe to compute when the
14491 	 * divisor is a constant.
14492 	 */
14493 	case BPF_DIV:
14494 	case BPF_MOD:
14495 		return src_is_const;
14496 
14497 	/* Shift operators range is only computable if shift dimension operand
14498 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14499 	 * includes shifts by a negative number.
14500 	 */
14501 	case BPF_LSH:
14502 	case BPF_RSH:
14503 	case BPF_ARSH:
14504 		return (src_is_const && reg_umax(src_reg) < insn_bitness);
14505 	default:
14506 		return false;
14507 	}
14508 }
14509 
14510 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14511 			      struct bpf_reg_state *dst_reg)
14512 {
14513 	struct bpf_verifier_state *branch;
14514 	struct bpf_reg_state *regs;
14515 	bool alu32;
14516 
14517 	if (reg_smin(dst_reg) == -1 && reg_smax(dst_reg) == 0)
14518 		alu32 = false;
14519 	else if (reg_s32_min(dst_reg) == -1 && reg_s32_max(dst_reg) == 0)
14520 		alu32 = true;
14521 	else
14522 		return 0;
14523 
14524 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14525 	if (IS_ERR(branch))
14526 		return PTR_ERR(branch);
14527 
14528 	regs = branch->frame[branch->curframe]->regs;
14529 	if (alu32) {
14530 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14531 		__mark_reg32_known(dst_reg, -1ull);
14532 	} else {
14533 		__mark_reg_known(&regs[insn->dst_reg], 0);
14534 		__mark_reg_known(dst_reg, -1ull);
14535 	}
14536 	return 0;
14537 }
14538 
14539 /* WARNING: This function does calculations on 64-bit values, but the actual
14540  * execution may occur on 32-bit values. Therefore, things like bitshifts
14541  * need extra checks in the 32-bit case.
14542  */
14543 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14544 				      struct bpf_insn *insn,
14545 				      struct bpf_reg_state *dst_reg,
14546 				      struct bpf_reg_state src_reg)
14547 {
14548 	u8 opcode = BPF_OP(insn->code);
14549 	s16 off = insn->off;
14550 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14551 	int ret;
14552 
14553 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14554 		__mark_reg_unknown(env, dst_reg);
14555 		return 0;
14556 	}
14557 
14558 	if (sanitize_needed(opcode)) {
14559 		ret = sanitize_val_alu(env, insn);
14560 		if (ret < 0)
14561 			return sanitize_err(env, insn, ret, NULL, NULL);
14562 	}
14563 
14564 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14565 	 * There are two classes of instructions: The first class we track both
14566 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14567 	 * greatest amount of precision when alu operations are mixed with jmp32
14568 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14569 	 * and BPF_OR. This is possible because these ops have fairly easy to
14570 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14571 	 * See alu32 verifier tests for examples. The second class of
14572 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14573 	 * with regards to tracking sign/unsigned bounds because the bits may
14574 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14575 	 * the reg unbounded in the subreg bound space and use the resulting
14576 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14577 	 */
14578 	switch (opcode) {
14579 	case BPF_ADD:
14580 		scalar32_min_max_add(dst_reg, &src_reg);
14581 		scalar_min_max_add(dst_reg, &src_reg);
14582 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14583 		break;
14584 	case BPF_SUB:
14585 		scalar32_min_max_sub(dst_reg, &src_reg);
14586 		scalar_min_max_sub(dst_reg, &src_reg);
14587 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14588 		break;
14589 	case BPF_NEG:
14590 		env->fake_reg[0] = *dst_reg;
14591 		__mark_reg_known(dst_reg, 0);
14592 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14593 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14594 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14595 		break;
14596 	case BPF_MUL:
14597 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14598 		scalar32_min_max_mul(dst_reg, &src_reg);
14599 		scalar_min_max_mul(dst_reg, &src_reg);
14600 		break;
14601 	case BPF_DIV:
14602 		/* BPF div specification: x / 0 = 0 */
14603 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) {
14604 			___mark_reg_known(dst_reg, 0);
14605 			break;
14606 		}
14607 		if (alu32)
14608 			if (off == 1)
14609 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14610 			else
14611 				scalar32_min_max_udiv(dst_reg, &src_reg);
14612 		else
14613 			if (off == 1)
14614 				scalar_min_max_sdiv(dst_reg, &src_reg);
14615 			else
14616 				scalar_min_max_udiv(dst_reg, &src_reg);
14617 		break;
14618 	case BPF_MOD:
14619 		/* BPF mod specification: x % 0 = x */
14620 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0))
14621 			break;
14622 		if (alu32)
14623 			if (off == 1)
14624 				scalar32_min_max_smod(dst_reg, &src_reg);
14625 			else
14626 				scalar32_min_max_umod(dst_reg, &src_reg);
14627 		else
14628 			if (off == 1)
14629 				scalar_min_max_smod(dst_reg, &src_reg);
14630 			else
14631 				scalar_min_max_umod(dst_reg, &src_reg);
14632 		break;
14633 	case BPF_AND:
14634 		if (tnum_is_const(src_reg.var_off)) {
14635 			ret = maybe_fork_scalars(env, insn, dst_reg);
14636 			if (ret)
14637 				return ret;
14638 		}
14639 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14640 		scalar32_min_max_and(dst_reg, &src_reg);
14641 		scalar_min_max_and(dst_reg, &src_reg);
14642 		break;
14643 	case BPF_OR:
14644 		if (tnum_is_const(src_reg.var_off)) {
14645 			ret = maybe_fork_scalars(env, insn, dst_reg);
14646 			if (ret)
14647 				return ret;
14648 		}
14649 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14650 		scalar32_min_max_or(dst_reg, &src_reg);
14651 		scalar_min_max_or(dst_reg, &src_reg);
14652 		break;
14653 	case BPF_XOR:
14654 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14655 		scalar32_min_max_xor(dst_reg, &src_reg);
14656 		scalar_min_max_xor(dst_reg, &src_reg);
14657 		break;
14658 	case BPF_LSH:
14659 		if (alu32)
14660 			scalar32_min_max_lsh(dst_reg, &src_reg);
14661 		else
14662 			scalar_min_max_lsh(dst_reg, &src_reg);
14663 		break;
14664 	case BPF_RSH:
14665 		if (alu32)
14666 			scalar32_min_max_rsh(dst_reg, &src_reg);
14667 		else
14668 			scalar_min_max_rsh(dst_reg, &src_reg);
14669 		break;
14670 	case BPF_ARSH:
14671 		if (alu32)
14672 			scalar32_min_max_arsh(dst_reg, &src_reg);
14673 		else
14674 			scalar_min_max_arsh(dst_reg, &src_reg);
14675 		break;
14676 	case BPF_END:
14677 		scalar_byte_swap(dst_reg, insn);
14678 		break;
14679 	default:
14680 		break;
14681 	}
14682 
14683 	/*
14684 	 * ALU32 ops are zero extended into 64bit register.
14685 	 *
14686 	 * BPF_END is already handled inside the helper (truncation),
14687 	 * so skip zext here to avoid unexpected zero extension.
14688 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
14689 	 * This is a 64bit byte swap operation with alu32==true,
14690 	 * but we should not zero extend the result.
14691 	 */
14692 	if (alu32 && opcode != BPF_END)
14693 		zext_32_to_64(dst_reg);
14694 	reg_bounds_sync(dst_reg);
14695 	return 0;
14696 }
14697 
14698 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14699  * and var_off.
14700  */
14701 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14702 				   struct bpf_insn *insn)
14703 {
14704 	struct bpf_verifier_state *vstate = env->cur_state;
14705 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14706 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14707 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14708 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14709 	u8 opcode = BPF_OP(insn->code);
14710 	int err;
14711 
14712 	dst_reg = &regs[insn->dst_reg];
14713 	if (BPF_SRC(insn->code) == BPF_X)
14714 		src_reg = &regs[insn->src_reg];
14715 	else
14716 		src_reg = NULL;
14717 
14718 	/* Case where at least one operand is an arena. */
14719 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
14720 		struct bpf_insn_aux_data *aux = cur_aux(env);
14721 
14722 		if (dst_reg->type != PTR_TO_ARENA)
14723 			*dst_reg = *src_reg;
14724 
14725 		dst_reg->subreg_def = env->insn_idx + 1;
14726 
14727 		if (BPF_CLASS(insn->code) == BPF_ALU64)
14728 			/*
14729 			 * 32-bit operations zero upper bits automatically.
14730 			 * 64-bit operations need to be converted to 32.
14731 			 */
14732 			aux->needs_zext = true;
14733 
14734 		/* Any arithmetic operations are allowed on arena pointers */
14735 		return 0;
14736 	}
14737 
14738 	if (dst_reg->type != SCALAR_VALUE)
14739 		ptr_reg = dst_reg;
14740 
14741 	if (BPF_SRC(insn->code) == BPF_X) {
14742 		if (src_reg->type != SCALAR_VALUE) {
14743 			if (dst_reg->type != SCALAR_VALUE) {
14744 				/* Combining two pointers by any ALU op yields
14745 				 * an arbitrary scalar. Disallow all math except
14746 				 * pointer subtraction
14747 				 */
14748 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14749 					mark_reg_unknown(env, regs, insn->dst_reg);
14750 					return 0;
14751 				}
14752 				verbose(env, "R%d pointer %s pointer prohibited\n",
14753 					insn->dst_reg,
14754 					bpf_alu_string[opcode >> 4]);
14755 				return -EACCES;
14756 			} else {
14757 				/* scalar += pointer
14758 				 * This is legal, but we have to reverse our
14759 				 * src/dest handling in computing the range
14760 				 */
14761 				err = mark_chain_precision(env, insn->dst_reg);
14762 				if (err)
14763 					return err;
14764 				return adjust_ptr_min_max_vals(env, insn,
14765 							       src_reg, dst_reg);
14766 			}
14767 		} else if (ptr_reg) {
14768 			/* pointer += scalar */
14769 			err = mark_chain_precision(env, insn->src_reg);
14770 			if (err)
14771 				return err;
14772 			return adjust_ptr_min_max_vals(env, insn,
14773 						       dst_reg, src_reg);
14774 		} else if (dst_reg->precise) {
14775 			/* if dst_reg is precise, src_reg should be precise as well */
14776 			err = mark_chain_precision(env, insn->src_reg);
14777 			if (err)
14778 				return err;
14779 		}
14780 	} else {
14781 		/* Pretend the src is a reg with a known value, since we only
14782 		 * need to be able to read from this state.
14783 		 */
14784 		off_reg.type = SCALAR_VALUE;
14785 		__mark_reg_known(&off_reg, insn->imm);
14786 		src_reg = &off_reg;
14787 		if (ptr_reg) /* pointer += K */
14788 			return adjust_ptr_min_max_vals(env, insn,
14789 						       ptr_reg, src_reg);
14790 	}
14791 
14792 	/* Got here implies adding two SCALAR_VALUEs */
14793 	if (WARN_ON_ONCE(ptr_reg)) {
14794 		print_verifier_state(env, vstate, vstate->curframe, true);
14795 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
14796 		return -EFAULT;
14797 	}
14798 	if (WARN_ON(!src_reg)) {
14799 		print_verifier_state(env, vstate, vstate->curframe, true);
14800 		verbose(env, "verifier internal error: no src_reg\n");
14801 		return -EFAULT;
14802 	}
14803 	/*
14804 	 * For alu32 linked register tracking, we need to check dst_reg's
14805 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
14806 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
14807 	 */
14808 	u64 dst_umax = reg_umax(dst_reg);
14809 
14810 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14811 	if (err)
14812 		return err;
14813 	/*
14814 	 * Compilers can generate the code
14815 	 * r1 = r2
14816 	 * r1 += 0x1
14817 	 * if r2 < 1000 goto ...
14818 	 * use r1 in memory access
14819 	 * So remember constant delta between r2 and r1 and update r1 after
14820 	 * 'if' condition.
14821 	 */
14822 	if (env->bpf_capable &&
14823 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
14824 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
14825 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
14826 		u64 val = reg_const_value(src_reg, alu32);
14827 		s32 off;
14828 
14829 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
14830 			goto clear_id;
14831 
14832 		if (alu32 && (dst_umax > U32_MAX))
14833 			goto clear_id;
14834 
14835 		off = (s32)val;
14836 
14837 		if (BPF_OP(insn->code) == BPF_SUB) {
14838 			/* Negating S32_MIN would overflow */
14839 			if (off == S32_MIN)
14840 				goto clear_id;
14841 			off = -off;
14842 		}
14843 
14844 		if (dst_reg->id & BPF_ADD_CONST) {
14845 			/*
14846 			 * If the register already went through rX += val
14847 			 * we cannot accumulate another val into rx->off.
14848 			 */
14849 clear_id:
14850 			clear_scalar_id(dst_reg);
14851 		} else {
14852 			if (alu32)
14853 				dst_reg->id |= BPF_ADD_CONST32;
14854 			else
14855 				dst_reg->id |= BPF_ADD_CONST64;
14856 			dst_reg->delta = off;
14857 		}
14858 	} else {
14859 		/*
14860 		 * Make sure ID is cleared otherwise dst_reg min/max could be
14861 		 * incorrectly propagated into other registers by sync_linked_regs()
14862 		 */
14863 		clear_scalar_id(dst_reg);
14864 	}
14865 	return 0;
14866 }
14867 
14868 /* check validity of 32-bit and 64-bit arithmetic operations */
14869 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14870 {
14871 	struct bpf_reg_state *regs = cur_regs(env);
14872 	u8 opcode = BPF_OP(insn->code);
14873 	int err;
14874 
14875 	if (opcode == BPF_END || opcode == BPF_NEG) {
14876 		/* check src operand */
14877 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14878 		if (err)
14879 			return err;
14880 
14881 		if (is_pointer_value(env, insn->dst_reg)) {
14882 			verbose(env, "R%d pointer arithmetic prohibited\n",
14883 				insn->dst_reg);
14884 			return -EACCES;
14885 		}
14886 
14887 		/* check dest operand */
14888 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
14889 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14890 			err = err ?: adjust_scalar_min_max_vals(env, insn,
14891 							 &regs[insn->dst_reg],
14892 							 regs[insn->dst_reg]);
14893 		} else {
14894 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
14895 		}
14896 		if (err)
14897 			return err;
14898 
14899 	} else if (opcode == BPF_MOV) {
14900 
14901 		if (BPF_SRC(insn->code) == BPF_X) {
14902 			if (insn->off == BPF_ADDR_SPACE_CAST) {
14903 				if (!env->prog->aux->arena) {
14904 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14905 					return -EINVAL;
14906 				}
14907 			}
14908 
14909 			/* check src operand */
14910 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
14911 			if (err)
14912 				return err;
14913 		}
14914 
14915 		/* check dest operand, mark as required later */
14916 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14917 		if (err)
14918 			return err;
14919 
14920 		if (BPF_SRC(insn->code) == BPF_X) {
14921 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
14922 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14923 
14924 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14925 				if (insn->imm) {
14926 					/* off == BPF_ADDR_SPACE_CAST */
14927 					mark_reg_unknown(env, regs, insn->dst_reg);
14928 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
14929 						dst_reg->type = PTR_TO_ARENA;
14930 						/* PTR_TO_ARENA is 32-bit */
14931 						dst_reg->subreg_def = env->insn_idx + 1;
14932 					}
14933 				} else if (insn->off == 0) {
14934 					/* case: R1 = R2
14935 					 * copy register state to dest reg
14936 					 */
14937 					assign_scalar_id_before_mov(env, src_reg);
14938 					*dst_reg = *src_reg;
14939 					dst_reg->subreg_def = DEF_NOT_SUBREG;
14940 				} else {
14941 					/* case: R1 = (s8, s16 s32)R2 */
14942 					if (is_pointer_value(env, insn->src_reg)) {
14943 						verbose(env,
14944 							"R%d sign-extension part of pointer\n",
14945 							insn->src_reg);
14946 						return -EACCES;
14947 					} else if (src_reg->type == SCALAR_VALUE) {
14948 						bool no_sext;
14949 
14950 						no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14951 						if (no_sext)
14952 							assign_scalar_id_before_mov(env, src_reg);
14953 						*dst_reg = *src_reg;
14954 						if (!no_sext)
14955 							clear_scalar_id(dst_reg);
14956 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
14957 						dst_reg->subreg_def = DEF_NOT_SUBREG;
14958 					} else {
14959 						mark_reg_unknown(env, regs, insn->dst_reg);
14960 					}
14961 				}
14962 			} else {
14963 				/* R1 = (u32) R2 */
14964 				if (is_pointer_value(env, insn->src_reg)) {
14965 					verbose(env,
14966 						"R%d partial copy of pointer\n",
14967 						insn->src_reg);
14968 					return -EACCES;
14969 				} else if (src_reg->type == SCALAR_VALUE) {
14970 					if (insn->off == 0) {
14971 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
14972 
14973 						if (is_src_reg_u32)
14974 							assign_scalar_id_before_mov(env, src_reg);
14975 						*dst_reg = *src_reg;
14976 						/* Make sure ID is cleared if src_reg is not in u32
14977 						 * range otherwise dst_reg min/max could be incorrectly
14978 						 * propagated into src_reg by sync_linked_regs()
14979 						 */
14980 						if (!is_src_reg_u32)
14981 							clear_scalar_id(dst_reg);
14982 						dst_reg->subreg_def = env->insn_idx + 1;
14983 					} else {
14984 						/* case: W1 = (s8, s16)W2 */
14985 						bool no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14986 
14987 						if (no_sext)
14988 							assign_scalar_id_before_mov(env, src_reg);
14989 						*dst_reg = *src_reg;
14990 						if (!no_sext)
14991 							clear_scalar_id(dst_reg);
14992 						dst_reg->subreg_def = env->insn_idx + 1;
14993 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
14994 					}
14995 				} else {
14996 					mark_reg_unknown(env, regs,
14997 							 insn->dst_reg);
14998 				}
14999 				zext_32_to_64(dst_reg);
15000 				reg_bounds_sync(dst_reg);
15001 			}
15002 		} else {
15003 			/* case: R = imm
15004 			 * remember the value we stored into this reg
15005 			 */
15006 			/* clear any state __mark_reg_known doesn't set */
15007 			mark_reg_unknown(env, regs, insn->dst_reg);
15008 			regs[insn->dst_reg].type = SCALAR_VALUE;
15009 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15010 				__mark_reg_known(regs + insn->dst_reg,
15011 						 insn->imm);
15012 			} else {
15013 				__mark_reg_known(regs + insn->dst_reg,
15014 						 (u32)insn->imm);
15015 			}
15016 		}
15017 
15018 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15019 
15020 		if (BPF_SRC(insn->code) == BPF_X) {
15021 			/* check src1 operand */
15022 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15023 			if (err)
15024 				return err;
15025 		}
15026 
15027 		/* check src2 operand */
15028 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15029 		if (err)
15030 			return err;
15031 
15032 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15033 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15034 			verbose(env, "div by zero\n");
15035 			return -EINVAL;
15036 		}
15037 
15038 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15039 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15040 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15041 
15042 			if (insn->imm < 0 || insn->imm >= size) {
15043 				verbose(env, "invalid shift %d\n", insn->imm);
15044 				return -EINVAL;
15045 			}
15046 		}
15047 
15048 		/* check dest operand */
15049 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15050 		err = err ?: adjust_reg_min_max_vals(env, insn);
15051 		if (err)
15052 			return err;
15053 	}
15054 
15055 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15056 }
15057 
15058 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15059 				   struct bpf_reg_state *dst_reg,
15060 				   enum bpf_reg_type type,
15061 				   bool range_right_open)
15062 {
15063 	struct bpf_func_state *state;
15064 	struct bpf_reg_state *reg;
15065 	int new_range;
15066 
15067 	if (reg_umax(dst_reg) == 0 && range_right_open)
15068 		/* This doesn't give us any range */
15069 		return;
15070 
15071 	if (reg_umax(dst_reg) > MAX_PACKET_OFF)
15072 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15073 		 * than pkt_end, but that's because it's also less than pkt.
15074 		 */
15075 		return;
15076 
15077 	new_range = reg_umax(dst_reg);
15078 	if (range_right_open)
15079 		new_range++;
15080 
15081 	/* Examples for register markings:
15082 	 *
15083 	 * pkt_data in dst register:
15084 	 *
15085 	 *   r2 = r3;
15086 	 *   r2 += 8;
15087 	 *   if (r2 > pkt_end) goto <handle exception>
15088 	 *   <access okay>
15089 	 *
15090 	 *   r2 = r3;
15091 	 *   r2 += 8;
15092 	 *   if (r2 < pkt_end) goto <access okay>
15093 	 *   <handle exception>
15094 	 *
15095 	 *   Where:
15096 	 *     r2 == dst_reg, pkt_end == src_reg
15097 	 *     r2=pkt(id=n,off=8,r=0)
15098 	 *     r3=pkt(id=n,off=0,r=0)
15099 	 *
15100 	 * pkt_data in src register:
15101 	 *
15102 	 *   r2 = r3;
15103 	 *   r2 += 8;
15104 	 *   if (pkt_end >= r2) goto <access okay>
15105 	 *   <handle exception>
15106 	 *
15107 	 *   r2 = r3;
15108 	 *   r2 += 8;
15109 	 *   if (pkt_end <= r2) goto <handle exception>
15110 	 *   <access okay>
15111 	 *
15112 	 *   Where:
15113 	 *     pkt_end == dst_reg, r2 == src_reg
15114 	 *     r2=pkt(id=n,off=8,r=0)
15115 	 *     r3=pkt(id=n,off=0,r=0)
15116 	 *
15117 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15118 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15119 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15120 	 * the check.
15121 	 */
15122 
15123 	/* If our ids match, then we must have the same max_value.  And we
15124 	 * don't care about the other reg's fixed offset, since if it's too big
15125 	 * the range won't allow anything.
15126 	 * reg_umax(dst_reg) is known < MAX_PACKET_OFF, therefore it fits in a u16.
15127 	 */
15128 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15129 		if (reg->type == type && reg->id == dst_reg->id)
15130 			/* keep the maximum range already checked */
15131 			reg->range = max(reg->range, new_range);
15132 	}));
15133 }
15134 
15135 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15136 				u8 opcode, bool is_jmp32);
15137 static u8 rev_opcode(u8 opcode);
15138 
15139 /*
15140  * Learn more information about live branches by simulating refinement on both branches.
15141  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15142  * that branch is dead.
15143  */
15144 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15145 {
15146 	/* Fallthrough (FALSE) branch */
15147 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15148 	reg_bounds_sync(&env->false_reg1);
15149 	reg_bounds_sync(&env->false_reg2);
15150 	/*
15151 	 * If there is a range bounds violation in *any* of the abstract values in either
15152 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15153 	 * TRUE branch will be taken.
15154 	 */
15155 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15156 		return 1;
15157 
15158 	/* Jump (TRUE) branch */
15159 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15160 	reg_bounds_sync(&env->true_reg1);
15161 	reg_bounds_sync(&env->true_reg2);
15162 	/*
15163 	 * If there is a range bounds violation in *any* of the abstract values in either
15164 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15165 	 * Only FALSE branch will be taken.
15166 	 */
15167 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15168 		return 0;
15169 
15170 	/* Both branches are possible, we can't determine which one will be taken. */
15171 	return -1;
15172 }
15173 
15174 /*
15175  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15176  */
15177 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15178 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15179 {
15180 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15181 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15182 	u64 umin1 = is_jmp32 ? (u64)reg_u32_min(reg1) : reg_umin(reg1);
15183 	u64 umax1 = is_jmp32 ? (u64)reg_u32_max(reg1) : reg_umax(reg1);
15184 	s64 smin1 = is_jmp32 ? (s64)reg_s32_min(reg1) : reg_smin(reg1);
15185 	s64 smax1 = is_jmp32 ? (s64)reg_s32_max(reg1) : reg_smax(reg1);
15186 	u64 umin2 = is_jmp32 ? (u64)reg_u32_min(reg2) : reg_umin(reg2);
15187 	u64 umax2 = is_jmp32 ? (u64)reg_u32_max(reg2) : reg_umax(reg2);
15188 	s64 smin2 = is_jmp32 ? (s64)reg_s32_min(reg2) : reg_smin(reg2);
15189 	s64 smax2 = is_jmp32 ? (s64)reg_s32_max(reg2) : reg_smax(reg2);
15190 
15191 	if (reg1 == reg2) {
15192 		switch (opcode) {
15193 		case BPF_JGE:
15194 		case BPF_JLE:
15195 		case BPF_JSGE:
15196 		case BPF_JSLE:
15197 		case BPF_JEQ:
15198 			return 1;
15199 		case BPF_JGT:
15200 		case BPF_JLT:
15201 		case BPF_JSGT:
15202 		case BPF_JSLT:
15203 		case BPF_JNE:
15204 			return 0;
15205 		case BPF_JSET:
15206 			if (tnum_is_const(t1))
15207 				return t1.value != 0;
15208 			else
15209 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15210 		default:
15211 			return -1;
15212 		}
15213 	}
15214 
15215 	switch (opcode) {
15216 	case BPF_JEQ:
15217 		/* constants, umin/umax and smin/smax checks would be
15218 		 * redundant in this case because they all should match
15219 		 */
15220 		if (tnum_is_const(t1) && tnum_is_const(t2))
15221 			return t1.value == t2.value;
15222 		if (!tnum_overlap(t1, t2))
15223 			return 0;
15224 		/* non-overlapping ranges */
15225 		if (umin1 > umax2 || umax1 < umin2)
15226 			return 0;
15227 		if (smin1 > smax2 || smax1 < smin2)
15228 			return 0;
15229 		if (!is_jmp32) {
15230 			/* if 64-bit ranges are inconclusive, see if we can
15231 			 * utilize 32-bit subrange knowledge to eliminate
15232 			 * branches that can't be taken a priori
15233 			 */
15234 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15235 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15236 				return 0;
15237 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15238 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15239 				return 0;
15240 		}
15241 		break;
15242 	case BPF_JNE:
15243 		/* constants, umin/umax and smin/smax checks would be
15244 		 * redundant in this case because they all should match
15245 		 */
15246 		if (tnum_is_const(t1) && tnum_is_const(t2))
15247 			return t1.value != t2.value;
15248 		if (!tnum_overlap(t1, t2))
15249 			return 1;
15250 		/* non-overlapping ranges */
15251 		if (umin1 > umax2 || umax1 < umin2)
15252 			return 1;
15253 		if (smin1 > smax2 || smax1 < smin2)
15254 			return 1;
15255 		if (!is_jmp32) {
15256 			/* if 64-bit ranges are inconclusive, see if we can
15257 			 * utilize 32-bit subrange knowledge to eliminate
15258 			 * branches that can't be taken a priori
15259 			 */
15260 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15261 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15262 				return 1;
15263 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15264 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15265 				return 1;
15266 		}
15267 		break;
15268 	case BPF_JSET:
15269 		if (!is_reg_const(reg2, is_jmp32)) {
15270 			swap(reg1, reg2);
15271 			swap(t1, t2);
15272 		}
15273 		if (!is_reg_const(reg2, is_jmp32))
15274 			return -1;
15275 		if ((~t1.mask & t1.value) & t2.value)
15276 			return 1;
15277 		if (!((t1.mask | t1.value) & t2.value))
15278 			return 0;
15279 		break;
15280 	case BPF_JGT:
15281 		if (umin1 > umax2)
15282 			return 1;
15283 		else if (umax1 <= umin2)
15284 			return 0;
15285 		break;
15286 	case BPF_JSGT:
15287 		if (smin1 > smax2)
15288 			return 1;
15289 		else if (smax1 <= smin2)
15290 			return 0;
15291 		break;
15292 	case BPF_JLT:
15293 		if (umax1 < umin2)
15294 			return 1;
15295 		else if (umin1 >= umax2)
15296 			return 0;
15297 		break;
15298 	case BPF_JSLT:
15299 		if (smax1 < smin2)
15300 			return 1;
15301 		else if (smin1 >= smax2)
15302 			return 0;
15303 		break;
15304 	case BPF_JGE:
15305 		if (umin1 >= umax2)
15306 			return 1;
15307 		else if (umax1 < umin2)
15308 			return 0;
15309 		break;
15310 	case BPF_JSGE:
15311 		if (smin1 >= smax2)
15312 			return 1;
15313 		else if (smax1 < smin2)
15314 			return 0;
15315 		break;
15316 	case BPF_JLE:
15317 		if (umax1 <= umin2)
15318 			return 1;
15319 		else if (umin1 > umax2)
15320 			return 0;
15321 		break;
15322 	case BPF_JSLE:
15323 		if (smax1 <= smin2)
15324 			return 1;
15325 		else if (smin1 > smax2)
15326 			return 0;
15327 		break;
15328 	}
15329 
15330 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15331 }
15332 
15333 static int flip_opcode(u32 opcode)
15334 {
15335 	/* How can we transform "a <op> b" into "b <op> a"? */
15336 	static const u8 opcode_flip[16] = {
15337 		/* these stay the same */
15338 		[BPF_JEQ  >> 4] = BPF_JEQ,
15339 		[BPF_JNE  >> 4] = BPF_JNE,
15340 		[BPF_JSET >> 4] = BPF_JSET,
15341 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15342 		[BPF_JGE  >> 4] = BPF_JLE,
15343 		[BPF_JGT  >> 4] = BPF_JLT,
15344 		[BPF_JLE  >> 4] = BPF_JGE,
15345 		[BPF_JLT  >> 4] = BPF_JGT,
15346 		[BPF_JSGE >> 4] = BPF_JSLE,
15347 		[BPF_JSGT >> 4] = BPF_JSLT,
15348 		[BPF_JSLE >> 4] = BPF_JSGE,
15349 		[BPF_JSLT >> 4] = BPF_JSGT
15350 	};
15351 	return opcode_flip[opcode >> 4];
15352 }
15353 
15354 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15355 				   struct bpf_reg_state *src_reg,
15356 				   u8 opcode)
15357 {
15358 	struct bpf_reg_state *pkt;
15359 
15360 	if (src_reg->type == PTR_TO_PACKET_END) {
15361 		pkt = dst_reg;
15362 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15363 		pkt = src_reg;
15364 		opcode = flip_opcode(opcode);
15365 	} else {
15366 		return -1;
15367 	}
15368 
15369 	if (pkt->range >= 0)
15370 		return -1;
15371 
15372 	switch (opcode) {
15373 	case BPF_JLE:
15374 		/* pkt <= pkt_end */
15375 		fallthrough;
15376 	case BPF_JGT:
15377 		/* pkt > pkt_end */
15378 		if (pkt->range == BEYOND_PKT_END)
15379 			/* pkt has at last one extra byte beyond pkt_end */
15380 			return opcode == BPF_JGT;
15381 		break;
15382 	case BPF_JLT:
15383 		/* pkt < pkt_end */
15384 		fallthrough;
15385 	case BPF_JGE:
15386 		/* pkt >= pkt_end */
15387 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15388 			return opcode == BPF_JGE;
15389 		break;
15390 	}
15391 	return -1;
15392 }
15393 
15394 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15395  * and return:
15396  *  1 - branch will be taken and "goto target" will be executed
15397  *  0 - branch will not be taken and fall-through to next insn
15398  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15399  *      range [0,10]
15400  */
15401 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15402 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15403 {
15404 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15405 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15406 
15407 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15408 		u64 val;
15409 
15410 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15411 		if (!is_reg_const(reg2, is_jmp32)) {
15412 			opcode = flip_opcode(opcode);
15413 			swap(reg1, reg2);
15414 		}
15415 		/* and ensure that reg2 is a constant */
15416 		if (!is_reg_const(reg2, is_jmp32))
15417 			return -1;
15418 
15419 		if (!reg_not_null(env, reg1))
15420 			return -1;
15421 
15422 		/* If pointer is valid tests against zero will fail so we can
15423 		 * use this to direct branch taken.
15424 		 */
15425 		val = reg_const_value(reg2, is_jmp32);
15426 		if (val != 0)
15427 			return -1;
15428 
15429 		switch (opcode) {
15430 		case BPF_JEQ:
15431 			return 0;
15432 		case BPF_JNE:
15433 			return 1;
15434 		default:
15435 			return -1;
15436 		}
15437 	}
15438 
15439 	/* now deal with two scalars, but not necessarily constants */
15440 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15441 }
15442 
15443 /* Opcode that corresponds to a *false* branch condition.
15444  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15445  */
15446 static u8 rev_opcode(u8 opcode)
15447 {
15448 	switch (opcode) {
15449 	case BPF_JEQ:		return BPF_JNE;
15450 	case BPF_JNE:		return BPF_JEQ;
15451 	/* JSET doesn't have it's reverse opcode in BPF, so add
15452 	 * BPF_X flag to denote the reverse of that operation
15453 	 */
15454 	case BPF_JSET:		return BPF_JSET | BPF_X;
15455 	case BPF_JSET | BPF_X:	return BPF_JSET;
15456 	case BPF_JGE:		return BPF_JLT;
15457 	case BPF_JGT:		return BPF_JLE;
15458 	case BPF_JLE:		return BPF_JGT;
15459 	case BPF_JLT:		return BPF_JGE;
15460 	case BPF_JSGE:		return BPF_JSLT;
15461 	case BPF_JSGT:		return BPF_JSLE;
15462 	case BPF_JSLE:		return BPF_JSGT;
15463 	case BPF_JSLT:		return BPF_JSGE;
15464 	default:		return 0;
15465 	}
15466 }
15467 
15468 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
15469 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15470 				u8 opcode, bool is_jmp32)
15471 {
15472 	struct tnum t;
15473 	u64 val;
15474 
15475 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15476 	switch (opcode) {
15477 	case BPF_JGE:
15478 	case BPF_JGT:
15479 	case BPF_JSGE:
15480 	case BPF_JSGT:
15481 		opcode = flip_opcode(opcode);
15482 		swap(reg1, reg2);
15483 		break;
15484 	default:
15485 		break;
15486 	}
15487 
15488 	switch (opcode) {
15489 	case BPF_JEQ:
15490 		if (is_jmp32) {
15491 			reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32);
15492 			reg2->r32 = reg1->r32;
15493 
15494 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15495 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15496 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15497 		} else {
15498 			reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64);
15499 			reg2->r64 = reg1->r64;
15500 
15501 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15502 			reg2->var_off = reg1->var_off;
15503 		}
15504 		break;
15505 	case BPF_JNE:
15506 		if (!is_reg_const(reg2, is_jmp32))
15507 			swap(reg1, reg2);
15508 		if (!is_reg_const(reg2, is_jmp32))
15509 			break;
15510 
15511 		/* try to recompute the bound of reg1 if reg2 is a const and
15512 		 * is exactly the edge of reg1.
15513 		 */
15514 		val = reg_const_value(reg2, is_jmp32);
15515 		if (is_jmp32) {
15516 			/* Complement of the range [val, val] as cnum32. */
15517 			cnum32_intersect_with(&reg1->r32, (struct cnum32){ val + 1, U32_MAX - 1 });
15518 		} else {
15519 			/* Complement of the range [val, val] as cnum64. */
15520 			cnum64_intersect_with(&reg1->r64, (struct cnum64){ val + 1, U64_MAX - 1 });
15521 		}
15522 		break;
15523 	case BPF_JSET:
15524 		if (!is_reg_const(reg2, is_jmp32))
15525 			swap(reg1, reg2);
15526 		if (!is_reg_const(reg2, is_jmp32))
15527 			break;
15528 		val = reg_const_value(reg2, is_jmp32);
15529 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15530 		 * requires single bit to learn something useful. E.g., if we
15531 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15532 		 * are actually set? We can learn something definite only if
15533 		 * it's a single-bit value to begin with.
15534 		 *
15535 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15536 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15537 		 * bit 1 is set, which we can readily use in adjustments.
15538 		 */
15539 		if (!is_power_of_2(val))
15540 			break;
15541 		if (is_jmp32) {
15542 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15543 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15544 		} else {
15545 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15546 		}
15547 		break;
15548 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15549 		if (!is_reg_const(reg2, is_jmp32))
15550 			swap(reg1, reg2);
15551 		if (!is_reg_const(reg2, is_jmp32))
15552 			break;
15553 		val = reg_const_value(reg2, is_jmp32);
15554 		/* Forget the ranges before narrowing tnums, to avoid invariant
15555 		 * violations if we're on a dead branch.
15556 		 */
15557 		__mark_reg_unbounded(reg1);
15558 		if (is_jmp32) {
15559 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15560 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15561 		} else {
15562 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15563 		}
15564 		break;
15565 	case BPF_JLE:
15566 		if (is_jmp32) {
15567 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2));
15568 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1), U32_MAX);
15569 		} else {
15570 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2));
15571 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1), U64_MAX);
15572 		}
15573 		break;
15574 	case BPF_JLT:
15575 		if (is_jmp32) {
15576 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2) - 1);
15577 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1) + 1, U32_MAX);
15578 		} else {
15579 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2) - 1);
15580 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1) + 1, U64_MAX);
15581 		}
15582 		break;
15583 	case BPF_JSLE:
15584 		if (is_jmp32) {
15585 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2));
15586 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1), S32_MAX);
15587 		} else {
15588 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2));
15589 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1), S64_MAX);
15590 		}
15591 		break;
15592 	case BPF_JSLT:
15593 		if (is_jmp32) {
15594 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2) - 1);
15595 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1) + 1, S32_MAX);
15596 		} else {
15597 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2) - 1);
15598 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1) + 1, S64_MAX);
15599 		}
15600 		break;
15601 	default:
15602 		return;
15603 	}
15604 }
15605 
15606 /* Check for invariant violations on the registers for both branches of a condition */
15607 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15608 {
15609 	int err;
15610 
15611 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15612 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15613 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15614 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15615 	return err;
15616 }
15617 
15618 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15619 				 struct bpf_reg_state *reg, u32 id,
15620 				 bool is_null)
15621 {
15622 	if (type_may_be_null(reg->type) && reg->id == id &&
15623 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15624 		/* Old offset should have been known-zero, because we don't
15625 		 * allow pointer arithmetic on pointers that might be NULL.
15626 		 * If we see this happening, don't convert the register.
15627 		 *
15628 		 * But in some cases, some helpers that return local kptrs
15629 		 * advance offset for the returned pointer. In those cases,
15630 		 * it is fine to expect to see reg->var_off.
15631 		 */
15632 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
15633 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
15634 			return;
15635 		if (is_null) {
15636 			/* We don't need id from this point
15637 			 * onwards anymore, thus we should better reset it,
15638 			 * so that state pruning has chances to take effect.
15639 			 */
15640 			__mark_reg_known_zero(reg);
15641 			reg->type = SCALAR_VALUE;
15642 
15643 			return;
15644 		}
15645 
15646 		mark_ptr_not_null_reg(reg);
15647 
15648 		/*
15649 		 * reg->id is preserved for object relationship tracking
15650 		 * and spin_lock lock state tracking
15651 		 */
15652 	}
15653 }
15654 
15655 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15656  * be folded together at some point.
15657  */
15658 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15659 				  bool is_null)
15660 {
15661 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15662 	struct bpf_reg_state *regs = state->regs, *reg;
15663 	u32 id = regs[regno].id;
15664 
15665 	if (is_null && find_reference_state(vstate, id))
15666 		/* regs[regno] is in the " == NULL" branch.
15667 		 * No one could have freed the reference state before
15668 		 * doing the NULL check.
15669 		 */
15670 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
15671 
15672 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15673 		mark_ptr_or_null_reg(state, reg, id, is_null);
15674 	}));
15675 }
15676 
15677 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15678 				   struct bpf_reg_state *dst_reg,
15679 				   struct bpf_reg_state *src_reg,
15680 				   struct bpf_verifier_state *this_branch,
15681 				   struct bpf_verifier_state *other_branch)
15682 {
15683 	if (BPF_SRC(insn->code) != BPF_X)
15684 		return false;
15685 
15686 	/* Pointers are always 64-bit. */
15687 	if (BPF_CLASS(insn->code) == BPF_JMP32)
15688 		return false;
15689 
15690 	switch (BPF_OP(insn->code)) {
15691 	case BPF_JGT:
15692 		if ((dst_reg->type == PTR_TO_PACKET &&
15693 		     src_reg->type == PTR_TO_PACKET_END) ||
15694 		    (dst_reg->type == PTR_TO_PACKET_META &&
15695 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15696 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15697 			find_good_pkt_pointers(this_branch, dst_reg,
15698 					       dst_reg->type, false);
15699 			mark_pkt_end(other_branch, insn->dst_reg, true);
15700 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15701 			    src_reg->type == PTR_TO_PACKET) ||
15702 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15703 			    src_reg->type == PTR_TO_PACKET_META)) {
15704 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
15705 			find_good_pkt_pointers(other_branch, src_reg,
15706 					       src_reg->type, true);
15707 			mark_pkt_end(this_branch, insn->src_reg, false);
15708 		} else {
15709 			return false;
15710 		}
15711 		break;
15712 	case BPF_JLT:
15713 		if ((dst_reg->type == PTR_TO_PACKET &&
15714 		     src_reg->type == PTR_TO_PACKET_END) ||
15715 		    (dst_reg->type == PTR_TO_PACKET_META &&
15716 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15717 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15718 			find_good_pkt_pointers(other_branch, dst_reg,
15719 					       dst_reg->type, true);
15720 			mark_pkt_end(this_branch, insn->dst_reg, false);
15721 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15722 			    src_reg->type == PTR_TO_PACKET) ||
15723 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15724 			    src_reg->type == PTR_TO_PACKET_META)) {
15725 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
15726 			find_good_pkt_pointers(this_branch, src_reg,
15727 					       src_reg->type, false);
15728 			mark_pkt_end(other_branch, insn->src_reg, true);
15729 		} else {
15730 			return false;
15731 		}
15732 		break;
15733 	case BPF_JGE:
15734 		if ((dst_reg->type == PTR_TO_PACKET &&
15735 		     src_reg->type == PTR_TO_PACKET_END) ||
15736 		    (dst_reg->type == PTR_TO_PACKET_META &&
15737 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15738 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15739 			find_good_pkt_pointers(this_branch, dst_reg,
15740 					       dst_reg->type, true);
15741 			mark_pkt_end(other_branch, insn->dst_reg, false);
15742 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15743 			    src_reg->type == PTR_TO_PACKET) ||
15744 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15745 			    src_reg->type == PTR_TO_PACKET_META)) {
15746 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15747 			find_good_pkt_pointers(other_branch, src_reg,
15748 					       src_reg->type, false);
15749 			mark_pkt_end(this_branch, insn->src_reg, true);
15750 		} else {
15751 			return false;
15752 		}
15753 		break;
15754 	case BPF_JLE:
15755 		if ((dst_reg->type == PTR_TO_PACKET &&
15756 		     src_reg->type == PTR_TO_PACKET_END) ||
15757 		    (dst_reg->type == PTR_TO_PACKET_META &&
15758 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15759 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15760 			find_good_pkt_pointers(other_branch, dst_reg,
15761 					       dst_reg->type, false);
15762 			mark_pkt_end(this_branch, insn->dst_reg, true);
15763 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15764 			    src_reg->type == PTR_TO_PACKET) ||
15765 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15766 			    src_reg->type == PTR_TO_PACKET_META)) {
15767 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15768 			find_good_pkt_pointers(this_branch, src_reg,
15769 					       src_reg->type, true);
15770 			mark_pkt_end(other_branch, insn->src_reg, false);
15771 		} else {
15772 			return false;
15773 		}
15774 		break;
15775 	default:
15776 		return false;
15777 	}
15778 
15779 	return true;
15780 }
15781 
15782 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
15783 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
15784 {
15785 	struct linked_reg *e;
15786 
15787 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
15788 		return;
15789 
15790 	e = linked_regs_push(reg_set);
15791 	if (e) {
15792 		e->frameno = frameno;
15793 		e->is_reg = is_reg;
15794 		e->regno = spi_or_reg;
15795 	} else {
15796 		clear_scalar_id(reg);
15797 	}
15798 }
15799 
15800 /* For all R being scalar registers or spilled scalar registers
15801  * in verifier state, save R in linked_regs if R->id == id.
15802  * If there are too many Rs sharing same id, reset id for leftover Rs.
15803  */
15804 static void collect_linked_regs(struct bpf_verifier_env *env,
15805 				struct bpf_verifier_state *vstate,
15806 				u32 id,
15807 				struct linked_regs *linked_regs)
15808 {
15809 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
15810 	struct bpf_func_state *func;
15811 	struct bpf_reg_state *reg;
15812 	u16 live_regs;
15813 	int i, j;
15814 
15815 	id = id & ~BPF_ADD_CONST;
15816 	for (i = vstate->curframe; i >= 0; i--) {
15817 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
15818 		func = vstate->frame[i];
15819 		for (j = 0; j < BPF_REG_FP; j++) {
15820 			if (!(live_regs & BIT(j)))
15821 				continue;
15822 			reg = &func->regs[j];
15823 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
15824 		}
15825 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
15826 			if (!bpf_is_spilled_reg(&func->stack[j]))
15827 				continue;
15828 			reg = &func->stack[j].spilled_ptr;
15829 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
15830 		}
15831 	}
15832 }
15833 
15834 /* For all R in linked_regs, copy known_reg range into R
15835  * if R->id == known_reg->id.
15836  */
15837 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
15838 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
15839 {
15840 	struct bpf_reg_state fake_reg;
15841 	struct bpf_reg_state *reg;
15842 	struct linked_reg *e;
15843 	int i;
15844 
15845 	for (i = 0; i < linked_regs->cnt; ++i) {
15846 		e = &linked_regs->entries[i];
15847 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
15848 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
15849 		if (reg->type != SCALAR_VALUE || reg == known_reg)
15850 			continue;
15851 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15852 			continue;
15853 		/*
15854 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
15855 		 * hold across different ALU widths.
15856 		 */
15857 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
15858 			continue;
15859 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15860 		    reg->delta == known_reg->delta) {
15861 			s32 saved_subreg_def = reg->subreg_def;
15862 
15863 			*reg = *known_reg;
15864 			reg->subreg_def = saved_subreg_def;
15865 		} else {
15866 			s32 saved_subreg_def = reg->subreg_def;
15867 			s32 saved_off = reg->delta;
15868 			u32 saved_id = reg->id;
15869 
15870 			fake_reg.type = SCALAR_VALUE;
15871 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
15872 
15873 			/* reg = known_reg; reg += delta */
15874 			*reg = *known_reg;
15875 			/*
15876 			 * Must preserve off, id and subreg_def flag,
15877 			 * otherwise another sync_linked_regs() will be incorrect.
15878 			 */
15879 			reg->delta = saved_off;
15880 			reg->id = saved_id;
15881 			reg->subreg_def = saved_subreg_def;
15882 
15883 			scalar32_min_max_add(reg, &fake_reg);
15884 			scalar_min_max_add(reg, &fake_reg);
15885 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15886 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
15887 				zext_32_to_64(reg);
15888 			reg_bounds_sync(reg);
15889 		}
15890 		if (e->is_reg)
15891 			mark_reg_scratched(env, e->regno);
15892 		else
15893 			mark_stack_slot_scratched(env, e->spi);
15894 	}
15895 }
15896 
15897 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15898 			     struct bpf_insn *insn, int *insn_idx)
15899 {
15900 	struct bpf_verifier_state *this_branch = env->cur_state;
15901 	struct bpf_verifier_state *other_branch;
15902 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15903 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15904 	struct bpf_reg_state *eq_branch_regs;
15905 	struct linked_regs linked_regs = {};
15906 	u8 opcode = BPF_OP(insn->code);
15907 	int insn_flags = 0;
15908 	bool is_jmp32;
15909 	int pred = -1;
15910 	int err;
15911 
15912 	/* Only conditional jumps are expected to reach here. */
15913 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
15914 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15915 		return -EINVAL;
15916 	}
15917 
15918 	if (opcode == BPF_JCOND) {
15919 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15920 		int idx = *insn_idx;
15921 
15922 		prev_st = find_prev_entry(env, cur_st->parent, idx);
15923 
15924 		/* branch out 'fallthrough' insn as a new state to explore */
15925 		queued_st = push_stack(env, idx + 1, idx, false);
15926 		if (IS_ERR(queued_st))
15927 			return PTR_ERR(queued_st);
15928 
15929 		queued_st->may_goto_depth++;
15930 		if (prev_st)
15931 			widen_imprecise_scalars(env, prev_st, queued_st);
15932 		*insn_idx += insn->off;
15933 		return 0;
15934 	}
15935 
15936 	/* check src2 operand */
15937 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15938 	if (err)
15939 		return err;
15940 
15941 	dst_reg = &regs[insn->dst_reg];
15942 	if (BPF_SRC(insn->code) == BPF_X) {
15943 		/* check src1 operand */
15944 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
15945 		if (err)
15946 			return err;
15947 
15948 		src_reg = &regs[insn->src_reg];
15949 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
15950 		    is_pointer_value(env, insn->src_reg)) {
15951 			verbose(env, "R%d pointer comparison prohibited\n",
15952 				insn->src_reg);
15953 			return -EACCES;
15954 		}
15955 
15956 		if (src_reg->type == PTR_TO_STACK)
15957 			insn_flags |= INSN_F_SRC_REG_STACK;
15958 		if (dst_reg->type == PTR_TO_STACK)
15959 			insn_flags |= INSN_F_DST_REG_STACK;
15960 	} else {
15961 		src_reg = &env->fake_reg[0];
15962 		memset(src_reg, 0, sizeof(*src_reg));
15963 		src_reg->type = SCALAR_VALUE;
15964 		__mark_reg_known(src_reg, insn->imm);
15965 
15966 		if (dst_reg->type == PTR_TO_STACK)
15967 			insn_flags |= INSN_F_DST_REG_STACK;
15968 	}
15969 
15970 	if (insn_flags) {
15971 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0, 0, 0);
15972 		if (err)
15973 			return err;
15974 	}
15975 
15976 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
15977 	env->false_reg1 = *dst_reg;
15978 	env->false_reg2 = *src_reg;
15979 	env->true_reg1 = *dst_reg;
15980 	env->true_reg2 = *src_reg;
15981 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
15982 	if (pred >= 0) {
15983 		/* If we get here with a dst_reg pointer type it is because
15984 		 * above is_branch_taken() special cased the 0 comparison.
15985 		 */
15986 		if (!__is_pointer_value(false, dst_reg))
15987 			err = mark_chain_precision(env, insn->dst_reg);
15988 		if (BPF_SRC(insn->code) == BPF_X && !err &&
15989 		    !__is_pointer_value(false, src_reg))
15990 			err = mark_chain_precision(env, insn->src_reg);
15991 		if (err)
15992 			return err;
15993 	}
15994 
15995 	if (pred == 1) {
15996 		/* Only follow the goto, ignore fall-through. If needed, push
15997 		 * the fall-through branch for simulation under speculative
15998 		 * execution.
15999 		 */
16000 		if (!env->bypass_spec_v1) {
16001 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
16002 			if (err < 0)
16003 				return err;
16004 		}
16005 		if (env->log.level & BPF_LOG_LEVEL)
16006 			print_insn_state(env, this_branch, this_branch->curframe);
16007 		*insn_idx += insn->off;
16008 		return 0;
16009 	} else if (pred == 0) {
16010 		/* Only follow the fall-through branch, since that's where the
16011 		 * program will go. If needed, push the goto branch for
16012 		 * simulation under speculative execution.
16013 		 */
16014 		if (!env->bypass_spec_v1) {
16015 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16016 							*insn_idx);
16017 			if (err < 0)
16018 				return err;
16019 		}
16020 		if (env->log.level & BPF_LOG_LEVEL)
16021 			print_insn_state(env, this_branch, this_branch->curframe);
16022 		return 0;
16023 	}
16024 
16025 	/* Push scalar registers sharing same ID to jump history,
16026 	 * do this before creating 'other_branch', so that both
16027 	 * 'this_branch' and 'other_branch' share this history
16028 	 * if parent state is created.
16029 	 */
16030 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16031 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16032 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16033 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16034 	if (linked_regs.cnt > 1) {
16035 		err = bpf_push_jmp_history(env, this_branch, 0, 0, 0, linked_regs_pack(&linked_regs));
16036 		if (err)
16037 			return err;
16038 	}
16039 
16040 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16041 	if (IS_ERR(other_branch))
16042 		return PTR_ERR(other_branch);
16043 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16044 
16045 	err = regs_bounds_sanity_check_branches(env);
16046 	if (err)
16047 		return err;
16048 
16049 	*dst_reg = env->false_reg1;
16050 	*src_reg = env->false_reg2;
16051 	other_branch_regs[insn->dst_reg] = env->true_reg1;
16052 	if (BPF_SRC(insn->code) == BPF_X)
16053 		other_branch_regs[insn->src_reg] = env->true_reg2;
16054 
16055 	if (BPF_SRC(insn->code) == BPF_X &&
16056 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16057 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16058 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16059 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16060 				 &linked_regs);
16061 	}
16062 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16063 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16064 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16065 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16066 				 &linked_regs);
16067 	}
16068 
16069 	/* if one pointer register is compared to another pointer
16070 	 * register check if PTR_MAYBE_NULL could be lifted.
16071 	 * E.g. register A - maybe null
16072 	 *      register B - not null
16073 	 * for JNE A, B, ... - A is not null in the false branch;
16074 	 * for JEQ A, B, ... - A is not null in the true branch.
16075 	 *
16076 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16077 	 * not need to be null checked by the BPF program, i.e.,
16078 	 * could be null even without PTR_MAYBE_NULL marking, so
16079 	 * only propagate nullness when neither reg is that type.
16080 	 */
16081 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16082 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16083 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16084 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16085 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16086 		eq_branch_regs = NULL;
16087 		switch (opcode) {
16088 		case BPF_JEQ:
16089 			eq_branch_regs = other_branch_regs;
16090 			break;
16091 		case BPF_JNE:
16092 			eq_branch_regs = regs;
16093 			break;
16094 		default:
16095 			/* do nothing */
16096 			break;
16097 		}
16098 		if (eq_branch_regs) {
16099 			if (type_may_be_null(src_reg->type))
16100 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16101 			else
16102 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16103 		}
16104 	}
16105 
16106 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16107 	 * Also does the same detection for a register whose the value is
16108 	 * known to be 0.
16109 	 * NOTE: these optimizations below are related with pointer comparison
16110 	 *       which will never be JMP32.
16111 	 */
16112 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16113 	    type_may_be_null(dst_reg->type) &&
16114 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16115 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16116 		/* Mark all identical registers in each branch as either
16117 		 * safe or unknown depending R == 0 or R != 0 conditional.
16118 		 */
16119 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16120 				      opcode == BPF_JNE);
16121 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16122 				      opcode == BPF_JEQ);
16123 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16124 					   this_branch, other_branch) &&
16125 		   is_pointer_value(env, insn->dst_reg)) {
16126 		verbose(env, "R%d pointer comparison prohibited\n",
16127 			insn->dst_reg);
16128 		return -EACCES;
16129 	}
16130 	if (env->log.level & BPF_LOG_LEVEL)
16131 		print_insn_state(env, this_branch, this_branch->curframe);
16132 	return 0;
16133 }
16134 
16135 /* verify BPF_LD_IMM64 instruction */
16136 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16137 {
16138 	struct bpf_insn_aux_data *aux = cur_aux(env);
16139 	struct bpf_reg_state *regs = cur_regs(env);
16140 	struct bpf_reg_state *dst_reg;
16141 	struct bpf_map *map;
16142 	int err;
16143 
16144 	if (BPF_SIZE(insn->code) != BPF_DW) {
16145 		verbose(env, "invalid BPF_LD_IMM insn\n");
16146 		return -EINVAL;
16147 	}
16148 
16149 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16150 	if (err)
16151 		return err;
16152 
16153 	dst_reg = &regs[insn->dst_reg];
16154 	if (insn->src_reg == 0) {
16155 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16156 
16157 		dst_reg->type = SCALAR_VALUE;
16158 		__mark_reg_known(&regs[insn->dst_reg], imm);
16159 		return 0;
16160 	}
16161 
16162 	/* All special src_reg cases are listed below. From this point onwards
16163 	 * we either succeed and assign a corresponding dst_reg->type after
16164 	 * zeroing the offset, or fail and reject the program.
16165 	 */
16166 	mark_reg_known_zero(env, regs, insn->dst_reg);
16167 
16168 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16169 		dst_reg->type = aux->btf_var.reg_type;
16170 		switch (base_type(dst_reg->type)) {
16171 		case PTR_TO_MEM:
16172 			dst_reg->mem_size = aux->btf_var.mem_size;
16173 			break;
16174 		case PTR_TO_BTF_ID:
16175 			dst_reg->btf = aux->btf_var.btf;
16176 			dst_reg->btf_id = aux->btf_var.btf_id;
16177 			break;
16178 		default:
16179 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16180 			return -EFAULT;
16181 		}
16182 		return 0;
16183 	}
16184 
16185 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16186 		struct bpf_prog_aux *aux = env->prog->aux;
16187 		u32 subprogno = bpf_find_subprog(env,
16188 						 env->insn_idx + insn->imm + 1);
16189 
16190 		if (!aux->func_info) {
16191 			verbose(env, "missing btf func_info\n");
16192 			return -EINVAL;
16193 		}
16194 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16195 			verbose(env, "callback function not static\n");
16196 			return -EINVAL;
16197 		}
16198 
16199 		dst_reg->type = PTR_TO_FUNC;
16200 		dst_reg->subprogno = subprogno;
16201 		return 0;
16202 	}
16203 
16204 	map = env->used_maps[aux->map_index];
16205 
16206 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16207 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16208 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16209 			__mark_reg_unknown(env, dst_reg);
16210 			dst_reg->map_ptr = map;
16211 			return 0;
16212 		}
16213 		__mark_reg_known(dst_reg, aux->map_off);
16214 		dst_reg->type = PTR_TO_MAP_VALUE;
16215 		dst_reg->map_ptr = map;
16216 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16217 			     map->max_entries != 1);
16218 		/* We want reg->id to be same (0) as map_value is not distinct */
16219 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16220 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16221 		dst_reg->type = CONST_PTR_TO_MAP;
16222 		dst_reg->map_ptr = map;
16223 	} else {
16224 		verifier_bug(env, "unexpected src reg value for ldimm64");
16225 		return -EFAULT;
16226 	}
16227 
16228 	return 0;
16229 }
16230 
16231 static bool may_access_skb(enum bpf_prog_type type)
16232 {
16233 	switch (type) {
16234 	case BPF_PROG_TYPE_SOCKET_FILTER:
16235 	case BPF_PROG_TYPE_SCHED_CLS:
16236 	case BPF_PROG_TYPE_SCHED_ACT:
16237 		return true;
16238 	default:
16239 		return false;
16240 	}
16241 }
16242 
16243 /* verify safety of LD_ABS|LD_IND instructions:
16244  * - they can only appear in the programs where ctx == skb
16245  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16246  *   preserve R6-R9, and store return value into R0
16247  *
16248  * Implicit input:
16249  *   ctx == skb == R6 == CTX
16250  *
16251  * Explicit input:
16252  *   SRC == any register
16253  *   IMM == 32-bit immediate
16254  *
16255  * Output:
16256  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16257  */
16258 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16259 {
16260 	struct bpf_reg_state *regs = cur_regs(env);
16261 	static const int ctx_reg = BPF_REG_6;
16262 	u8 mode = BPF_MODE(insn->code);
16263 	int i, err;
16264 
16265 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16266 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16267 		return -EINVAL;
16268 	}
16269 
16270 	if (!env->ops->gen_ld_abs) {
16271 		verifier_bug(env, "gen_ld_abs is null");
16272 		return -EFAULT;
16273 	}
16274 
16275 	/* check whether implicit source operand (register R6) is readable */
16276 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16277 	if (err)
16278 		return err;
16279 
16280 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16281 	 * gen_ld_abs() may terminate the program at runtime, leading to
16282 	 * reference leak.
16283 	 */
16284 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16285 	if (err)
16286 		return err;
16287 
16288 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16289 		verbose(env,
16290 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16291 		return -EINVAL;
16292 	}
16293 
16294 	if (mode == BPF_IND) {
16295 		/* check explicit source operand */
16296 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16297 		if (err)
16298 			return err;
16299 	}
16300 
16301 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16302 	if (err < 0)
16303 		return err;
16304 
16305 	/* reset caller saved regs to unreadable */
16306 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16307 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16308 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16309 	}
16310 
16311 	/* mark destination R0 register as readable, since it contains
16312 	 * the value fetched from the packet.
16313 	 * Already marked as written above.
16314 	 */
16315 	mark_reg_unknown(env, regs, BPF_REG_0);
16316 	/* ld_abs load up to 32-bit skb data. */
16317 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16318 	/*
16319 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16320 	 * which must be explored by the verifier when in a subprog.
16321 	 */
16322 	if (env->cur_state->curframe) {
16323 		struct bpf_verifier_state *branch;
16324 
16325 		mark_reg_scratched(env, BPF_REG_0);
16326 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16327 		if (IS_ERR(branch))
16328 			return PTR_ERR(branch);
16329 		mark_reg_known_zero(env, regs, BPF_REG_0);
16330 		err = prepare_func_exit(env, &env->insn_idx);
16331 		if (err)
16332 			return err;
16333 		env->insn_idx--;
16334 	}
16335 	return 0;
16336 }
16337 
16338 
16339 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16340 {
16341 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16342 
16343 	/* Default return value range. */
16344 	*range = retval_range(0, 1);
16345 
16346 	switch (prog_type) {
16347 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16348 		switch (env->prog->expected_attach_type) {
16349 		case BPF_CGROUP_UDP4_RECVMSG:
16350 		case BPF_CGROUP_UDP6_RECVMSG:
16351 		case BPF_CGROUP_UNIX_RECVMSG:
16352 		case BPF_CGROUP_INET4_GETPEERNAME:
16353 		case BPF_CGROUP_INET6_GETPEERNAME:
16354 		case BPF_CGROUP_UNIX_GETPEERNAME:
16355 		case BPF_CGROUP_INET4_GETSOCKNAME:
16356 		case BPF_CGROUP_INET6_GETSOCKNAME:
16357 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16358 			*range = retval_range(1, 1);
16359 			break;
16360 		case BPF_CGROUP_INET4_BIND:
16361 		case BPF_CGROUP_INET6_BIND:
16362 			*range = retval_range(0, 3);
16363 			break;
16364 		default:
16365 			break;
16366 		}
16367 		break;
16368 	case BPF_PROG_TYPE_CGROUP_SKB:
16369 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16370 			*range = retval_range(0, 3);
16371 		break;
16372 	case BPF_PROG_TYPE_CGROUP_SOCK:
16373 	case BPF_PROG_TYPE_SOCK_OPS:
16374 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16375 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16376 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16377 		break;
16378 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16379 		if (!env->prog->aux->attach_btf_id)
16380 			return false;
16381 		*range = retval_range(0, 0);
16382 		break;
16383 	case BPF_PROG_TYPE_TRACING:
16384 		switch (env->prog->expected_attach_type) {
16385 		case BPF_TRACE_FENTRY:
16386 		case BPF_TRACE_FEXIT:
16387 		case BPF_TRACE_FSESSION:
16388 		case BPF_TRACE_FENTRY_MULTI:
16389 		case BPF_TRACE_FEXIT_MULTI:
16390 		case BPF_TRACE_FSESSION_MULTI:
16391 			*range = retval_range(0, 0);
16392 			break;
16393 		case BPF_TRACE_RAW_TP:
16394 		case BPF_MODIFY_RETURN:
16395 			return false;
16396 		case BPF_TRACE_ITER:
16397 		default:
16398 			break;
16399 		}
16400 		break;
16401 	case BPF_PROG_TYPE_KPROBE:
16402 		switch (env->prog->expected_attach_type) {
16403 		case BPF_TRACE_KPROBE_SESSION:
16404 		case BPF_TRACE_UPROBE_SESSION:
16405 			break;
16406 		default:
16407 			return false;
16408 		}
16409 		break;
16410 	case BPF_PROG_TYPE_SK_LOOKUP:
16411 		*range = retval_range(SK_DROP, SK_PASS);
16412 		break;
16413 
16414 	case BPF_PROG_TYPE_LSM:
16415 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16416 			/* no range found, any return value is allowed */
16417 			if (!get_func_retval_range(env->prog, range))
16418 				return false;
16419 			/* no restricted range, any return value is allowed */
16420 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16421 				return false;
16422 			range->return_32bit = true;
16423 		} else if (!env->prog->aux->attach_func_proto->type) {
16424 			/* Make sure programs that attach to void
16425 			 * hooks don't try to modify return value.
16426 			 */
16427 			*range = retval_range(1, 1);
16428 		}
16429 		break;
16430 
16431 	case BPF_PROG_TYPE_NETFILTER:
16432 		*range = retval_range(NF_DROP, NF_ACCEPT);
16433 		break;
16434 	case BPF_PROG_TYPE_STRUCT_OPS:
16435 		*range = retval_range(0, 0);
16436 		break;
16437 	case BPF_PROG_TYPE_EXT:
16438 		/* freplace program can return anything as its return value
16439 		 * depends on the to-be-replaced kernel func or bpf program.
16440 		 */
16441 	default:
16442 		return false;
16443 	}
16444 
16445 	/* Continue calculating. */
16446 
16447 	return true;
16448 }
16449 
16450 static bool program_returns_void(struct bpf_verifier_env *env)
16451 {
16452 	const struct bpf_prog *prog = env->prog;
16453 	enum bpf_prog_type prog_type = prog->type;
16454 
16455 	switch (prog_type) {
16456 	case BPF_PROG_TYPE_LSM:
16457 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16458 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16459 		    !prog->aux->attach_func_proto->type)
16460 			return true;
16461 		break;
16462 	case BPF_PROG_TYPE_STRUCT_OPS:
16463 		if (!prog->aux->attach_func_proto->type)
16464 			return true;
16465 		break;
16466 	case BPF_PROG_TYPE_EXT:
16467 		/*
16468 		 * If the actual program is an extension, let it
16469 		 * return void - attaching will succeed only if the
16470 		 * program being replaced also returns void, and since
16471 		 * it has passed verification its actual type doesn't matter.
16472 		 */
16473 		if (subprog_returns_void(env, 0))
16474 			return true;
16475 		break;
16476 	default:
16477 		break;
16478 	}
16479 	return false;
16480 }
16481 
16482 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16483 {
16484 	const char *exit_ctx = "At program exit";
16485 	struct tnum enforce_attach_type_range = tnum_unknown;
16486 	const struct bpf_prog *prog = env->prog;
16487 	struct bpf_reg_state *reg = reg_state(env, regno);
16488 	struct bpf_retval_range range = retval_range(0, 1);
16489 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16490 	struct bpf_func_state *frame = env->cur_state->frame[0];
16491 	const struct btf_type *reg_type, *ret_type = NULL;
16492 	int err;
16493 
16494 	/* LSM and struct_ops func-ptr's return type could be "void" */
16495 	if (!frame->in_async_callback_fn && program_returns_void(env))
16496 		return 0;
16497 
16498 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16499 		/* Allow a struct_ops program to return a referenced kptr if it
16500 		 * matches the operator's return type and is in its unmodified
16501 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16502 		 */
16503 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16504 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16505 						prog->aux->attach_func_proto->type,
16506 						NULL);
16507 		if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg))
16508 			return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
16509 	}
16510 
16511 	/* eBPF calling convention is such that R0 is used
16512 	 * to return the value from eBPF program.
16513 	 * Make sure that it's readable at this time
16514 	 * of bpf_exit, which means that program wrote
16515 	 * something into it earlier
16516 	 */
16517 	err = check_reg_arg(env, regno, SRC_OP);
16518 	if (err)
16519 		return err;
16520 
16521 	if (is_pointer_value(env, regno)) {
16522 		verbose(env, "R%d leaks addr as return value\n", regno);
16523 		return -EACCES;
16524 	}
16525 
16526 	if (frame->in_async_callback_fn) {
16527 		exit_ctx = "At async callback return";
16528 		range = frame->callback_ret_range;
16529 		goto enforce_retval;
16530 	}
16531 
16532 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16533 		return 0;
16534 
16535 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16536 		enforce_attach_type_range = tnum_range(2, 3);
16537 
16538 	if (!return_retval_range(env, &range))
16539 		return 0;
16540 
16541 enforce_retval:
16542 	if (reg->type != SCALAR_VALUE) {
16543 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16544 			exit_ctx, regno, reg_type_str(env, reg->type));
16545 		return -EINVAL;
16546 	}
16547 
16548 	err = mark_chain_precision(env, regno);
16549 	if (err)
16550 		return err;
16551 
16552 	if (!retval_range_within(range, reg)) {
16553 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16554 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16555 		    prog_type == BPF_PROG_TYPE_LSM &&
16556 		    !prog->aux->attach_func_proto->type)
16557 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16558 		return -EINVAL;
16559 	}
16560 
16561 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16562 	    tnum_in(enforce_attach_type_range, reg->var_off))
16563 		env->prog->enforce_expected_attach_type = 1;
16564 	return 0;
16565 }
16566 
16567 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16568 {
16569 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16570 	struct bpf_func_state *cur_frame = cur_func(env);
16571 	int err;
16572 
16573 	if (subprog_returns_void(env, cur_frame->subprogno))
16574 		return 0;
16575 
16576 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16577 	if (err)
16578 		return err;
16579 
16580 	/* Pointers to arena are safe to pass between subprograms. */
16581 	if (is_arena_reg(env, BPF_REG_0))
16582 		return 0;
16583 
16584 	if (is_pointer_value(env, BPF_REG_0)) {
16585 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16586 		return -EACCES;
16587 	}
16588 
16589 	if (reg->type != SCALAR_VALUE) {
16590 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16591 			reg_type_str(env, reg->type));
16592 		return -EINVAL;
16593 	}
16594 
16595 	return 0;
16596 }
16597 
16598 /* Bitmask with 1s for all caller saved registers */
16599 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16600 
16601 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16602  * replacement patch is presumed to follow bpf_fastcall contract
16603  * (see mark_fastcall_pattern_for_call() below).
16604  */
16605 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16606 {
16607 	switch (imm) {
16608 #ifdef CONFIG_X86_64
16609 	case BPF_FUNC_get_smp_processor_id:
16610 #ifdef CONFIG_SMP
16611 	case BPF_FUNC_get_current_task_btf:
16612 	case BPF_FUNC_get_current_task:
16613 #endif
16614 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16615 #endif
16616 	default:
16617 		return false;
16618 	}
16619 }
16620 
16621 /* If @call is a kfunc or helper call, fills @cs and returns true,
16622  * otherwise returns false.
16623  */
16624 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
16625 			  struct bpf_call_summary *cs)
16626 {
16627 	struct bpf_kfunc_call_arg_meta meta;
16628 	const struct bpf_func_proto *fn;
16629 	int i;
16630 
16631 	if (bpf_helper_call(call)) {
16632 
16633 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
16634 			/* error would be reported later */
16635 			return false;
16636 		cs->fastcall = fn->allow_fastcall &&
16637 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
16638 				bpf_jit_inlines_helper_call(call->imm));
16639 		cs->is_void = fn->ret_type == RET_VOID;
16640 		cs->num_params = 0;
16641 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
16642 			if (fn->arg_type[i] == ARG_DONTCARE)
16643 				break;
16644 			cs->num_params++;
16645 		}
16646 		return true;
16647 	}
16648 
16649 	if (bpf_pseudo_kfunc_call(call)) {
16650 		int err;
16651 
16652 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
16653 		if (err < 0)
16654 			/* error would be reported later */
16655 			return false;
16656 		cs->num_params = btf_type_vlen(meta.func_proto);
16657 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
16658 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
16659 		return true;
16660 	}
16661 
16662 	return false;
16663 }
16664 
16665 /* LLVM define a bpf_fastcall function attribute.
16666  * This attribute means that function scratches only some of
16667  * the caller saved registers defined by ABI.
16668  * For BPF the set of such registers could be defined as follows:
16669  * - R0 is scratched only if function is non-void;
16670  * - R1-R5 are scratched only if corresponding parameter type is defined
16671  *   in the function prototype.
16672  *
16673  * The contract between kernel and clang allows to simultaneously use
16674  * such functions and maintain backwards compatibility with old
16675  * kernels that don't understand bpf_fastcall calls:
16676  *
16677  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
16678  *   registers are not scratched by the call;
16679  *
16680  * - as a post-processing step, clang visits each bpf_fastcall call and adds
16681  *   spill/fill for every live r0-r5;
16682  *
16683  * - stack offsets used for the spill/fill are allocated as lowest
16684  *   stack offsets in whole function and are not used for any other
16685  *   purposes;
16686  *
16687  * - when kernel loads a program, it looks for such patterns
16688  *   (bpf_fastcall function surrounded by spills/fills) and checks if
16689  *   spill/fill stack offsets are used exclusively in fastcall patterns;
16690  *
16691  * - if so, and if verifier or current JIT inlines the call to the
16692  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
16693  *   spill/fill pairs;
16694  *
16695  * - when old kernel loads a program, presence of spill/fill pairs
16696  *   keeps BPF program valid, albeit slightly less efficient.
16697  *
16698  * For example:
16699  *
16700  *   r1 = 1;
16701  *   r2 = 2;
16702  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16703  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
16704  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16705  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
16706  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
16707  *   r0 = r1;                            exit;
16708  *   r0 += r2;
16709  *   exit;
16710  *
16711  * The purpose of mark_fastcall_pattern_for_call is to:
16712  * - look for such patterns;
16713  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
16714  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
16715  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
16716  *   at which bpf_fastcall spill/fill stack slots start;
16717  * - update env->subprog_info[*]->keep_fastcall_stack.
16718  *
16719  * The .fastcall_pattern and .fastcall_stack_off are used by
16720  * check_fastcall_stack_contract() to check if every stack access to
16721  * fastcall spill/fill stack slot originates from spill/fill
16722  * instructions, members of fastcall patterns.
16723  *
16724  * If such condition holds true for a subprogram, fastcall patterns could
16725  * be rewritten by remove_fastcall_spills_fills().
16726  * Otherwise bpf_fastcall patterns are not changed in the subprogram
16727  * (code, presumably, generated by an older clang version).
16728  *
16729  * For example, it is *not* safe to remove spill/fill below:
16730  *
16731  *   r1 = 1;
16732  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16733  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16734  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
16735  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
16736  *   r0 += r1;                           exit;
16737  *   exit;
16738  */
16739 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
16740 					   struct bpf_subprog_info *subprog,
16741 					   int insn_idx, s16 lowest_off)
16742 {
16743 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
16744 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
16745 	u32 clobbered_regs_mask;
16746 	struct bpf_call_summary cs;
16747 	u32 expected_regs_mask;
16748 	s16 off;
16749 	int i;
16750 
16751 	if (!bpf_get_call_summary(env, call, &cs))
16752 		return;
16753 
16754 	/* A bitmask specifying which caller saved registers are clobbered
16755 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
16756 	 * bpf_fastcall contract:
16757 	 * - includes R0 if function is non-void;
16758 	 * - includes R1-R5 if corresponding parameter has is described
16759 	 *   in the function prototype.
16760 	 */
16761 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
16762 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
16763 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
16764 
16765 	/* match pairs of form:
16766 	 *
16767 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
16768 	 * ...
16769 	 * call %[to_be_inlined]
16770 	 * ...
16771 	 * rX = *(u64 *)(r10 - Y)
16772 	 */
16773 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
16774 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
16775 			break;
16776 		stx = &insns[insn_idx - i];
16777 		ldx = &insns[insn_idx + i];
16778 		/* must be a stack spill/fill pair */
16779 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16780 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
16781 		    stx->dst_reg != BPF_REG_10 ||
16782 		    ldx->src_reg != BPF_REG_10)
16783 			break;
16784 		/* must be a spill/fill for the same reg */
16785 		if (stx->src_reg != ldx->dst_reg)
16786 			break;
16787 		/* must be one of the previously unseen registers */
16788 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
16789 			break;
16790 		/* must be a spill/fill for the same expected offset,
16791 		 * no need to check offset alignment, BPF_DW stack access
16792 		 * is always 8-byte aligned.
16793 		 */
16794 		if (stx->off != off || ldx->off != off)
16795 			break;
16796 		expected_regs_mask &= ~BIT(stx->src_reg);
16797 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
16798 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
16799 	}
16800 	if (i == 1)
16801 		return;
16802 
16803 	/* Conditionally set 'fastcall_spills_num' to allow forward
16804 	 * compatibility when more helper functions are marked as
16805 	 * bpf_fastcall at compile time than current kernel supports, e.g:
16806 	 *
16807 	 *   1: *(u64 *)(r10 - 8) = r1
16808 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
16809 	 *   3: r1 = *(u64 *)(r10 - 8)
16810 	 *   4: *(u64 *)(r10 - 8) = r1
16811 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
16812 	 *   6: r1 = *(u64 *)(r10 - 8)
16813 	 *
16814 	 * There is no need to block bpf_fastcall rewrite for such program.
16815 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
16816 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
16817 	 * does not remove spill/fill pair {4,6}.
16818 	 */
16819 	if (cs.fastcall)
16820 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
16821 	else
16822 		subprog->keep_fastcall_stack = 1;
16823 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
16824 }
16825 
16826 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
16827 {
16828 	struct bpf_subprog_info *subprog = env->subprog_info;
16829 	struct bpf_insn *insn;
16830 	s16 lowest_off;
16831 	int s, i;
16832 
16833 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
16834 		/* find lowest stack spill offset used in this subprog */
16835 		lowest_off = 0;
16836 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16837 			insn = env->prog->insnsi + i;
16838 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16839 			    insn->dst_reg != BPF_REG_10)
16840 				continue;
16841 			lowest_off = min(lowest_off, insn->off);
16842 		}
16843 		/* use this offset to find fastcall patterns */
16844 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16845 			insn = env->prog->insnsi + i;
16846 			if (insn->code != (BPF_JMP | BPF_CALL))
16847 				continue;
16848 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
16849 		}
16850 	}
16851 	return 0;
16852 }
16853 
16854 static void adjust_btf_func(struct bpf_verifier_env *env)
16855 {
16856 	struct bpf_prog_aux *aux = env->prog->aux;
16857 	int i;
16858 
16859 	if (!aux->func_info)
16860 		return;
16861 
16862 	/* func_info is not available for hidden subprogs */
16863 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16864 		aux->func_info[i].insn_off = env->subprog_info[i].start;
16865 }
16866 
16867 /* Find id in idset and increment its count, or add new entry */
16868 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
16869 {
16870 	u32 i;
16871 
16872 	for (i = 0; i < idset->num_ids; i++) {
16873 		if (idset->entries[i].id == id) {
16874 			idset->entries[i].cnt++;
16875 			return;
16876 		}
16877 	}
16878 	/* New id */
16879 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
16880 		idset->entries[idset->num_ids].id = id;
16881 		idset->entries[idset->num_ids].cnt = 1;
16882 		idset->num_ids++;
16883 	}
16884 }
16885 
16886 /* Find id in idset and return its count, or 0 if not found */
16887 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
16888 {
16889 	u32 i;
16890 
16891 	for (i = 0; i < idset->num_ids; i++) {
16892 		if (idset->entries[i].id == id)
16893 			return idset->entries[i].cnt;
16894 	}
16895 	return 0;
16896 }
16897 
16898 /*
16899  * Clear singular scalar ids in a state.
16900  * A register with a non-zero id is called singular if no other register shares
16901  * the same base id. Such registers can be treated as independent (id=0).
16902  */
16903 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
16904 			    struct bpf_verifier_state *st)
16905 {
16906 	struct bpf_idset *idset = &env->idset_scratch;
16907 	struct bpf_func_state *func;
16908 	struct bpf_reg_state *reg;
16909 
16910 	idset->num_ids = 0;
16911 
16912 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16913 		if (reg->type != SCALAR_VALUE)
16914 			continue;
16915 		if (!reg->id)
16916 			continue;
16917 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
16918 	}));
16919 
16920 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16921 		if (reg->type != SCALAR_VALUE)
16922 			continue;
16923 		if (!reg->id)
16924 			continue;
16925 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
16926 			clear_scalar_id(reg);
16927 	}));
16928 }
16929 
16930 /* Return true if it's OK to have the same insn return a different type. */
16931 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16932 {
16933 	switch (base_type(type)) {
16934 	case PTR_TO_CTX:
16935 	case PTR_TO_SOCKET:
16936 	case PTR_TO_SOCK_COMMON:
16937 	case PTR_TO_TCP_SOCK:
16938 	case PTR_TO_XDP_SOCK:
16939 	case PTR_TO_BTF_ID:
16940 	case PTR_TO_ARENA:
16941 		return false;
16942 	default:
16943 		return true;
16944 	}
16945 }
16946 
16947 /* If an instruction was previously used with particular pointer types, then we
16948  * need to be careful to avoid cases such as the below, where it may be ok
16949  * for one branch accessing the pointer, but not ok for the other branch:
16950  *
16951  * R1 = sock_ptr
16952  * goto X;
16953  * ...
16954  * R1 = some_other_valid_ptr;
16955  * goto X;
16956  * ...
16957  * R2 = *(u32 *)(R1 + 0);
16958  */
16959 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16960 {
16961 	return src != prev && (!reg_type_mismatch_ok(src) ||
16962 			       !reg_type_mismatch_ok(prev));
16963 }
16964 
16965 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
16966 {
16967 	switch (base_type(type)) {
16968 	case PTR_TO_MEM:
16969 	case PTR_TO_BTF_ID:
16970 		return true;
16971 	default:
16972 		return false;
16973 	}
16974 }
16975 
16976 static bool is_ptr_to_mem(enum bpf_reg_type type)
16977 {
16978 	return base_type(type) == PTR_TO_MEM;
16979 }
16980 
16981 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
16982 			     bool allow_trust_mismatch)
16983 {
16984 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
16985 	enum bpf_reg_type merged_type;
16986 
16987 	if (*prev_type == NOT_INIT) {
16988 		/* Saw a valid insn
16989 		 * dst_reg = *(u32 *)(src_reg + off)
16990 		 * save type to validate intersecting paths
16991 		 */
16992 		*prev_type = type;
16993 	} else if (reg_type_mismatch(type, *prev_type)) {
16994 		/* Abuser program is trying to use the same insn
16995 		 * dst_reg = *(u32*) (src_reg + off)
16996 		 * with different pointer types:
16997 		 * src_reg == ctx in one branch and
16998 		 * src_reg == stack|map in some other branch.
16999 		 * Reject it.
17000 		 */
17001 		if (allow_trust_mismatch &&
17002 		    is_ptr_to_mem_or_btf_id(type) &&
17003 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
17004 			/*
17005 			 * Have to support a use case when one path through
17006 			 * the program yields TRUSTED pointer while another
17007 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17008 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17009 			 * Same behavior of MEM_RDONLY flag.
17010 			 */
17011 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
17012 				merged_type = PTR_TO_MEM;
17013 			else
17014 				merged_type = PTR_TO_BTF_ID;
17015 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
17016 				merged_type |= PTR_UNTRUSTED;
17017 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17018 				merged_type |= MEM_RDONLY;
17019 			*prev_type = merged_type;
17020 		} else {
17021 			verbose(env, "same insn cannot be used with different pointers\n");
17022 			return -EINVAL;
17023 		}
17024 	}
17025 
17026 	return 0;
17027 }
17028 
17029 enum {
17030 	PROCESS_BPF_EXIT = 1,
17031 	INSN_IDX_UPDATED = 2,
17032 };
17033 
17034 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17035 				 bool *do_print_state,
17036 				 bool exception_exit)
17037 {
17038 	struct bpf_func_state *cur_frame = cur_func(env);
17039 
17040 	/* We must do check_reference_leak here before
17041 	 * prepare_func_exit to handle the case when
17042 	 * state->curframe > 0, it may be a callback function,
17043 	 * for which reference_state must match caller reference
17044 	 * state when it exits.
17045 	 */
17046 	int err = check_resource_leak(env, exception_exit,
17047 				      exception_exit || !env->cur_state->curframe,
17048 				      exception_exit ? "bpf_throw" :
17049 				      "BPF_EXIT instruction in main prog");
17050 	if (err)
17051 		return err;
17052 
17053 	/* The side effect of the prepare_func_exit which is
17054 	 * being skipped is that it frees bpf_func_state.
17055 	 * Typically, process_bpf_exit will only be hit with
17056 	 * outermost exit. copy_verifier_state in pop_stack will
17057 	 * handle freeing of any extra bpf_func_state left over
17058 	 * from not processing all nested function exits. We
17059 	 * also skip return code checks as they are not needed
17060 	 * for exceptional exits.
17061 	 */
17062 	if (exception_exit)
17063 		return PROCESS_BPF_EXIT;
17064 
17065 	if (env->cur_state->curframe) {
17066 		/* exit from nested function */
17067 		err = prepare_func_exit(env, &env->insn_idx);
17068 		if (err)
17069 			return err;
17070 		*do_print_state = true;
17071 		return INSN_IDX_UPDATED;
17072 	}
17073 
17074 	/*
17075 	 * Return from a regular global subprogram differs from return
17076 	 * from the main program or async/exception callback.
17077 	 * Main program exit implies return code restrictions
17078 	 * that depend on program type.
17079 	 * Exit from exception callback is equivalent to main program exit.
17080 	 * Exit from async callback implies return code restrictions
17081 	 * that depend on async scheduling mechanism.
17082 	 */
17083 	if (cur_frame->subprogno &&
17084 	    !cur_frame->in_async_callback_fn &&
17085 	    !cur_frame->in_exception_callback_fn)
17086 		err = check_global_subprog_return_code(env);
17087 	else
17088 		err = check_return_code(env, BPF_REG_0, "R0");
17089 	if (err)
17090 		return err;
17091 	return PROCESS_BPF_EXIT;
17092 }
17093 
17094 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17095 				       int regno,
17096 				       struct bpf_map *map,
17097 				       u32 *pmin_index, u32 *pmax_index)
17098 {
17099 	struct bpf_reg_state *reg = reg_state(env, regno);
17100 	u64 min_index = reg_umin(reg);
17101 	u64 max_index = reg_umax(reg);
17102 	const u32 size = 8;
17103 
17104 	if (min_index > (u64) U32_MAX * size) {
17105 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg_umin(reg));
17106 		return -ERANGE;
17107 	}
17108 	if (max_index > (u64) U32_MAX * size) {
17109 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg_umax(reg));
17110 		return -ERANGE;
17111 	}
17112 
17113 	min_index /= size;
17114 	max_index /= size;
17115 
17116 	if (max_index >= map->max_entries) {
17117 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17118 			     regno, min_index, max_index, map->max_entries);
17119 		return -EINVAL;
17120 	}
17121 
17122 	*pmin_index = min_index;
17123 	*pmax_index = max_index;
17124 	return 0;
17125 }
17126 
17127 /* gotox *dst_reg */
17128 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17129 {
17130 	struct bpf_verifier_state *other_branch;
17131 	struct bpf_reg_state *dst_reg;
17132 	struct bpf_map *map;
17133 	u32 min_index, max_index;
17134 	int err = 0;
17135 	int n;
17136 	int i;
17137 
17138 	dst_reg = reg_state(env, insn->dst_reg);
17139 	if (dst_reg->type != PTR_TO_INSN) {
17140 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17141 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17142 		return -EINVAL;
17143 	}
17144 
17145 	map = dst_reg->map_ptr;
17146 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17147 		return -EFAULT;
17148 
17149 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17150 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17151 		return -EFAULT;
17152 
17153 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17154 	if (err)
17155 		return err;
17156 
17157 	/* Ensure that the buffer is large enough */
17158 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17159 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17160 						        max_index - min_index + 1);
17161 		if (!env->gotox_tmp_buf)
17162 			return -ENOMEM;
17163 	}
17164 
17165 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17166 	if (n < 0)
17167 		return n;
17168 	if (n == 0) {
17169 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17170 			     insn->dst_reg, map->id);
17171 		return -EINVAL;
17172 	}
17173 
17174 	for (i = 0; i < n - 1; i++) {
17175 		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
17176 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17177 					  env->insn_idx, env->cur_state->speculative);
17178 		if (IS_ERR(other_branch))
17179 			return PTR_ERR(other_branch);
17180 	}
17181 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17182 	mark_indirect_target(env, env->insn_idx);
17183 	return INSN_IDX_UPDATED;
17184 }
17185 
17186 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17187 {
17188 	int err;
17189 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17190 	u8 class = BPF_CLASS(insn->code);
17191 
17192 	switch (class) {
17193 	case BPF_ALU:
17194 	case BPF_ALU64:
17195 		return check_alu_op(env, insn);
17196 
17197 	case BPF_LDX:
17198 		return check_load_mem(env, insn, false,
17199 				      BPF_MODE(insn->code) == BPF_MEMSX,
17200 				      true, "ldx");
17201 
17202 	case BPF_STX:
17203 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17204 			return check_atomic(env, insn);
17205 		return check_store_reg(env, insn, false);
17206 
17207 	case BPF_ST: {
17208 		/* Handle stack arg write (store immediate) */
17209 		if (is_stack_arg_st(insn)) {
17210 			struct bpf_verifier_state *vstate = env->cur_state;
17211 			struct bpf_func_state *state = vstate->frame[vstate->curframe];
17212 
17213 			return check_stack_arg_write(env, state, insn->off, NULL);
17214 		}
17215 
17216 		enum bpf_reg_type dst_reg_type;
17217 
17218 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17219 		if (err)
17220 			return err;
17221 
17222 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17223 
17224 		err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg),
17225 				       insn->off, BPF_SIZE(insn->code),
17226 				       BPF_WRITE, -1, false, false);
17227 		if (err)
17228 			return err;
17229 
17230 		return save_aux_ptr_type(env, dst_reg_type, false);
17231 	}
17232 	case BPF_JMP:
17233 	case BPF_JMP32: {
17234 		u8 opcode = BPF_OP(insn->code);
17235 
17236 		env->jmps_processed++;
17237 		if (opcode == BPF_CALL) {
17238 			if (env->cur_state->active_locks) {
17239 				if ((insn->src_reg == BPF_REG_0 &&
17240 				     insn->imm != BPF_FUNC_spin_unlock &&
17241 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17242 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17243 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17244 					verbose(env,
17245 						"function calls are not allowed while holding a lock\n");
17246 					return -EINVAL;
17247 				}
17248 			}
17249 			mark_reg_scratched(env, BPF_REG_0);
17250 			if (bpf_in_stack_arg_cnt(&env->subprog_info[cur_func(env)->subprogno]))
17251 				cur_func(env)->no_stack_arg_load = true;
17252 			if (insn->src_reg == BPF_PSEUDO_CALL)
17253 				return check_func_call(env, insn, &env->insn_idx);
17254 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17255 				return check_kfunc_call(env, insn, &env->insn_idx);
17256 			return check_helper_call(env, insn, &env->insn_idx);
17257 		} else if (opcode == BPF_JA) {
17258 			if (BPF_SRC(insn->code) == BPF_X)
17259 				return check_indirect_jump(env, insn);
17260 
17261 			if (class == BPF_JMP)
17262 				env->insn_idx += insn->off + 1;
17263 			else
17264 				env->insn_idx += insn->imm + 1;
17265 			return INSN_IDX_UPDATED;
17266 		} else if (opcode == BPF_EXIT) {
17267 			return process_bpf_exit_full(env, do_print_state, false);
17268 		}
17269 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17270 	}
17271 	case BPF_LD: {
17272 		u8 mode = BPF_MODE(insn->code);
17273 
17274 		if (mode == BPF_ABS || mode == BPF_IND)
17275 			return check_ld_abs(env, insn);
17276 
17277 		if (mode == BPF_IMM) {
17278 			err = check_ld_imm(env, insn);
17279 			if (err)
17280 				return err;
17281 
17282 			env->insn_idx++;
17283 			sanitize_mark_insn_seen(env);
17284 		}
17285 		return 0;
17286 	}
17287 	}
17288 	/* all class values are handled above. silence compiler warning */
17289 	return -EFAULT;
17290 }
17291 
17292 static int do_check(struct bpf_verifier_env *env)
17293 {
17294 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17295 	struct bpf_verifier_state *state = env->cur_state;
17296 	struct bpf_insn *insns = env->prog->insnsi;
17297 	int insn_cnt = env->prog->len;
17298 	bool do_print_state = false;
17299 	int prev_insn_idx = -1;
17300 
17301 	for (;;) {
17302 		struct bpf_insn *insn;
17303 		struct bpf_insn_aux_data *insn_aux;
17304 		int err;
17305 
17306 		/* reset current history entry on each new instruction */
17307 		env->cur_hist_ent = NULL;
17308 
17309 		env->prev_insn_idx = prev_insn_idx;
17310 		if (env->insn_idx >= insn_cnt) {
17311 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17312 				env->insn_idx, insn_cnt);
17313 			return -EFAULT;
17314 		}
17315 
17316 		insn = &insns[env->insn_idx];
17317 		insn_aux = &env->insn_aux_data[env->insn_idx];
17318 
17319 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17320 			verbose(env,
17321 				"BPF program is too large. Processed %d insn\n",
17322 				env->insn_processed);
17323 			return -E2BIG;
17324 		}
17325 
17326 		state->last_insn_idx = env->prev_insn_idx;
17327 		state->insn_idx = env->insn_idx;
17328 
17329 		if (bpf_is_prune_point(env, env->insn_idx)) {
17330 			err = bpf_is_state_visited(env, env->insn_idx);
17331 			if (err < 0)
17332 				return err;
17333 			if (err == 1) {
17334 				/* found equivalent state, can prune the search */
17335 				if (env->log.level & BPF_LOG_LEVEL) {
17336 					if (do_print_state)
17337 						verbose(env, "\nfrom %d to %d%s: safe\n",
17338 							env->prev_insn_idx, env->insn_idx,
17339 							env->cur_state->speculative ?
17340 							" (speculative execution)" : "");
17341 					else
17342 						verbose(env, "%d: safe\n", env->insn_idx);
17343 				}
17344 				goto process_bpf_exit;
17345 			}
17346 		}
17347 
17348 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17349 			err = bpf_push_jmp_history(env, state, 0, 0, 0, 0);
17350 			if (err)
17351 				return err;
17352 		}
17353 
17354 		if (signal_pending(current))
17355 			return -EAGAIN;
17356 
17357 		if (need_resched())
17358 			cond_resched();
17359 
17360 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17361 			verbose(env, "\nfrom %d to %d%s:",
17362 				env->prev_insn_idx, env->insn_idx,
17363 				env->cur_state->speculative ?
17364 				" (speculative execution)" : "");
17365 			print_verifier_state(env, state, state->curframe, true);
17366 			do_print_state = false;
17367 		}
17368 
17369 		if (env->log.level & BPF_LOG_LEVEL) {
17370 			if (verifier_state_scratched(env))
17371 				print_insn_state(env, state, state->curframe);
17372 
17373 			verbose_linfo(env, env->insn_idx, "; ");
17374 			env->prev_log_pos = env->log.end_pos;
17375 			verbose(env, "%d: ", env->insn_idx);
17376 			bpf_verbose_insn(env, insn);
17377 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17378 			env->prev_log_pos = env->log.end_pos;
17379 		}
17380 
17381 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17382 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17383 							   env->prev_insn_idx);
17384 			if (err)
17385 				return err;
17386 		}
17387 
17388 		sanitize_mark_insn_seen(env);
17389 		prev_insn_idx = env->insn_idx;
17390 
17391 		/* Sanity check: precomputed constants must match verifier state */
17392 		if (!state->speculative && insn_aux->const_reg_mask) {
17393 			struct bpf_reg_state *regs = cur_regs(env);
17394 			u16 mask = insn_aux->const_reg_mask;
17395 
17396 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17397 				u32 cval = insn_aux->const_reg_vals[r];
17398 
17399 				if (!(mask & BIT(r)))
17400 					continue;
17401 				if (regs[r].type != SCALAR_VALUE)
17402 					continue;
17403 				if (!tnum_is_const(regs[r].var_off))
17404 					continue;
17405 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17406 						    env, "const R%d: %u != %llu",
17407 						    r, cval, regs[r].var_off.value))
17408 					return -EFAULT;
17409 			}
17410 		}
17411 
17412 		/* Reduce verification complexity by stopping speculative path
17413 		 * verification when a nospec is encountered.
17414 		 */
17415 		if (state->speculative && insn_aux->nospec)
17416 			goto process_bpf_exit;
17417 
17418 		err = do_check_insn(env, &do_print_state);
17419 		if (error_recoverable_with_nospec(err) && state->speculative) {
17420 			/* Prevent this speculative path from ever reaching the
17421 			 * insn that would have been unsafe to execute.
17422 			 */
17423 			insn_aux->nospec = true;
17424 			/* If it was an ADD/SUB insn, potentially remove any
17425 			 * markings for alu sanitization.
17426 			 */
17427 			insn_aux->alu_state = 0;
17428 			goto process_bpf_exit;
17429 		} else if (err < 0) {
17430 			return err;
17431 		} else if (err == PROCESS_BPF_EXIT) {
17432 			goto process_bpf_exit;
17433 		} else if (err == INSN_IDX_UPDATED) {
17434 		} else if (err == 0) {
17435 			env->insn_idx++;
17436 		}
17437 
17438 		if (state->speculative && insn_aux->nospec_result) {
17439 			/* If we are on a path that performed a jump-op, this
17440 			 * may skip a nospec patched-in after the jump. This can
17441 			 * currently never happen because nospec_result is only
17442 			 * used for the write-ops
17443 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17444 			 * calls. These must never skip the following insn
17445 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17446 			 * is false). Still, add a warning to document this in
17447 			 * case nospec_result is used elsewhere in the future.
17448 			 *
17449 			 * All non-branch instructions have a single
17450 			 * fall-through edge. For these, nospec_result should
17451 			 * already work.
17452 			 */
17453 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17454 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17455 					    BPF_OP(insn->code) != BPF_CALL, env,
17456 					    "speculation barrier after jump instruction may not have the desired effect"))
17457 				return -EFAULT;
17458 process_bpf_exit:
17459 			mark_verifier_state_scratched(env);
17460 			err = bpf_update_branch_counts(env, env->cur_state);
17461 			if (err)
17462 				return err;
17463 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17464 					pop_log);
17465 			if (err < 0) {
17466 				if (err != -ENOENT)
17467 					return err;
17468 				break;
17469 			} else {
17470 				do_print_state = true;
17471 				continue;
17472 			}
17473 		}
17474 	}
17475 
17476 	return 0;
17477 }
17478 
17479 static int find_btf_percpu_datasec(struct btf *btf)
17480 {
17481 	const struct btf_type *t;
17482 	const char *tname;
17483 	int i, n;
17484 
17485 	/*
17486 	 * Both vmlinux and module each have their own ".data..percpu"
17487 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17488 	 * types to look at only module's own BTF types.
17489 	 */
17490 	n = btf_nr_types(btf);
17491 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17492 		t = btf_type_by_id(btf, i);
17493 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17494 			continue;
17495 
17496 		tname = btf_name_by_offset(btf, t->name_off);
17497 		if (!strcmp(tname, ".data..percpu"))
17498 			return i;
17499 	}
17500 
17501 	return -ENOENT;
17502 }
17503 
17504 /*
17505  * Add btf to the env->used_btfs array. If needed, refcount the
17506  * corresponding kernel module. To simplify caller's logic
17507  * in case of error or if btf was added before the function
17508  * decreases the btf refcount.
17509  */
17510 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17511 {
17512 	struct btf_mod_pair *btf_mod;
17513 	int ret = 0;
17514 	int i;
17515 
17516 	/* check whether we recorded this BTF (and maybe module) already */
17517 	for (i = 0; i < env->used_btf_cnt; i++)
17518 		if (env->used_btfs[i].btf == btf)
17519 			goto ret_put;
17520 
17521 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17522 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17523 			MAX_USED_BTFS);
17524 		ret = -E2BIG;
17525 		goto ret_put;
17526 	}
17527 
17528 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17529 	btf_mod->btf = btf;
17530 	btf_mod->module = NULL;
17531 
17532 	/* if we reference variables from kernel module, bump its refcount */
17533 	if (btf_is_module(btf)) {
17534 		btf_mod->module = btf_try_get_module(btf);
17535 		if (!btf_mod->module) {
17536 			ret = -ENXIO;
17537 			goto ret_put;
17538 		}
17539 	}
17540 
17541 	env->used_btf_cnt++;
17542 	return 0;
17543 
17544 ret_put:
17545 	/* Either error or this BTF was already added */
17546 	btf_put(btf);
17547 	return ret;
17548 }
17549 
17550 /* replace pseudo btf_id with kernel symbol address */
17551 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17552 				 struct bpf_insn *insn,
17553 				 struct bpf_insn_aux_data *aux,
17554 				 struct btf *btf)
17555 {
17556 	const struct btf_var_secinfo *vsi;
17557 	const struct btf_type *datasec;
17558 	const struct btf_type *t;
17559 	const char *sym_name;
17560 	bool percpu = false;
17561 	u32 type, id = insn->imm;
17562 	s32 datasec_id;
17563 	u64 addr;
17564 	int i;
17565 
17566 	t = btf_type_by_id(btf, id);
17567 	if (!t) {
17568 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17569 		return -ENOENT;
17570 	}
17571 
17572 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17573 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17574 		return -EINVAL;
17575 	}
17576 
17577 	sym_name = btf_name_by_offset(btf, t->name_off);
17578 	addr = kallsyms_lookup_name(sym_name);
17579 	if (!addr) {
17580 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17581 			sym_name);
17582 		return -ENOENT;
17583 	}
17584 	insn[0].imm = (u32)addr;
17585 	insn[1].imm = addr >> 32;
17586 
17587 	if (btf_type_is_func(t)) {
17588 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17589 		aux->btf_var.mem_size = 0;
17590 		return 0;
17591 	}
17592 
17593 	datasec_id = find_btf_percpu_datasec(btf);
17594 	if (datasec_id > 0) {
17595 		datasec = btf_type_by_id(btf, datasec_id);
17596 		for_each_vsi(i, datasec, vsi) {
17597 			if (vsi->type == id) {
17598 				percpu = true;
17599 				break;
17600 			}
17601 		}
17602 	}
17603 
17604 	type = t->type;
17605 	t = btf_type_skip_modifiers(btf, type, NULL);
17606 	if (percpu) {
17607 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17608 		aux->btf_var.btf = btf;
17609 		aux->btf_var.btf_id = type;
17610 	} else if (!btf_type_is_struct(t)) {
17611 		const struct btf_type *ret;
17612 		const char *tname;
17613 		u32 tsize;
17614 
17615 		/* resolve the type size of ksym. */
17616 		ret = btf_resolve_size(btf, t, &tsize);
17617 		if (IS_ERR(ret)) {
17618 			tname = btf_name_by_offset(btf, t->name_off);
17619 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17620 				tname, PTR_ERR(ret));
17621 			return -EINVAL;
17622 		}
17623 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17624 		aux->btf_var.mem_size = tsize;
17625 	} else {
17626 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17627 		aux->btf_var.btf = btf;
17628 		aux->btf_var.btf_id = type;
17629 	}
17630 
17631 	return 0;
17632 }
17633 
17634 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17635 			       struct bpf_insn *insn,
17636 			       struct bpf_insn_aux_data *aux)
17637 {
17638 	struct btf *btf;
17639 	int btf_fd;
17640 	int err;
17641 
17642 	btf_fd = insn[1].imm;
17643 	if (btf_fd) {
17644 		btf = btf_get_by_fd(btf_fd);
17645 		if (IS_ERR(btf)) {
17646 			verbose(env, "invalid module BTF object FD specified.\n");
17647 			return -EINVAL;
17648 		}
17649 	} else {
17650 		if (!btf_vmlinux) {
17651 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17652 			return -EINVAL;
17653 		}
17654 		btf_get(btf_vmlinux);
17655 		btf = btf_vmlinux;
17656 	}
17657 
17658 	err = __check_pseudo_btf_id(env, insn, aux, btf);
17659 	if (err) {
17660 		btf_put(btf);
17661 		return err;
17662 	}
17663 
17664 	return __add_used_btf(env, btf);
17665 }
17666 
17667 static bool is_tracing_prog_type(enum bpf_prog_type type)
17668 {
17669 	switch (type) {
17670 	case BPF_PROG_TYPE_KPROBE:
17671 	case BPF_PROG_TYPE_TRACEPOINT:
17672 	case BPF_PROG_TYPE_PERF_EVENT:
17673 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
17674 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17675 		return true;
17676 	default:
17677 		return false;
17678 	}
17679 }
17680 
17681 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17682 {
17683 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17684 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17685 }
17686 
17687 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17688 					struct bpf_map *map,
17689 					struct bpf_prog *prog)
17690 
17691 {
17692 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17693 
17694 	if (map->excl_prog_sha &&
17695 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
17696 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
17697 		return -EACCES;
17698 	}
17699 
17700 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
17701 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
17702 		if (is_tracing_prog_type(prog_type)) {
17703 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17704 			return -EINVAL;
17705 		}
17706 	}
17707 
17708 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
17709 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17710 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
17711 			return -EINVAL;
17712 		}
17713 
17714 		if (is_tracing_prog_type(prog_type)) {
17715 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
17716 			return -EINVAL;
17717 		}
17718 	}
17719 
17720 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
17721 	    !bpf_offload_prog_map_match(prog, map)) {
17722 		verbose(env, "offload device mismatch between prog and map\n");
17723 		return -EINVAL;
17724 	}
17725 
17726 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17727 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
17728 		return -EINVAL;
17729 	}
17730 
17731 	if (prog->sleepable)
17732 		switch (map->map_type) {
17733 		case BPF_MAP_TYPE_HASH:
17734 		case BPF_MAP_TYPE_RHASH:
17735 		case BPF_MAP_TYPE_LRU_HASH:
17736 		case BPF_MAP_TYPE_ARRAY:
17737 		case BPF_MAP_TYPE_PERCPU_HASH:
17738 		case BPF_MAP_TYPE_PERCPU_ARRAY:
17739 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17740 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17741 		case BPF_MAP_TYPE_HASH_OF_MAPS:
17742 		case BPF_MAP_TYPE_RINGBUF:
17743 		case BPF_MAP_TYPE_USER_RINGBUF:
17744 		case BPF_MAP_TYPE_INODE_STORAGE:
17745 		case BPF_MAP_TYPE_SK_STORAGE:
17746 		case BPF_MAP_TYPE_TASK_STORAGE:
17747 		case BPF_MAP_TYPE_CGRP_STORAGE:
17748 		case BPF_MAP_TYPE_QUEUE:
17749 		case BPF_MAP_TYPE_STACK:
17750 		case BPF_MAP_TYPE_ARENA:
17751 		case BPF_MAP_TYPE_INSN_ARRAY:
17752 		case BPF_MAP_TYPE_PROG_ARRAY:
17753 			break;
17754 		default:
17755 			verbose(env,
17756 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17757 			return -EINVAL;
17758 		}
17759 
17760 	if (bpf_map_is_cgroup_storage(map) &&
17761 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
17762 		verbose(env, "only one cgroup storage of each type is allowed\n");
17763 		return -EBUSY;
17764 	}
17765 
17766 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
17767 		if (env->prog->aux->arena) {
17768 			verbose(env, "Only one arena per program\n");
17769 			return -EBUSY;
17770 		}
17771 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
17772 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
17773 			return -EPERM;
17774 		}
17775 		if (!env->prog->jit_requested) {
17776 			verbose(env, "JIT is required to use arena\n");
17777 			return -EOPNOTSUPP;
17778 		}
17779 		if (!bpf_jit_supports_arena()) {
17780 			verbose(env, "JIT doesn't support arena\n");
17781 			return -EOPNOTSUPP;
17782 		}
17783 		env->prog->aux->arena = (void *)map;
17784 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
17785 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
17786 			return -EINVAL;
17787 		}
17788 	}
17789 
17790 	return 0;
17791 }
17792 
17793 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
17794 {
17795 	int i, err;
17796 
17797 	/* check whether we recorded this map already */
17798 	for (i = 0; i < env->used_map_cnt; i++)
17799 		if (env->used_maps[i] == map)
17800 			return i;
17801 
17802 	if (env->used_map_cnt >= MAX_USED_MAPS) {
17803 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
17804 			MAX_USED_MAPS);
17805 		return -E2BIG;
17806 	}
17807 
17808 	err = check_map_prog_compatibility(env, map, env->prog);
17809 	if (err)
17810 		return err;
17811 
17812 	if (env->prog->sleepable)
17813 		atomic64_inc(&map->sleepable_refcnt);
17814 
17815 	/* hold the map. If the program is rejected by verifier,
17816 	 * the map will be released by release_maps() or it
17817 	 * will be used by the valid program until it's unloaded
17818 	 * and all maps are released in bpf_free_used_maps()
17819 	 */
17820 	bpf_map_inc(map);
17821 
17822 	env->used_maps[env->used_map_cnt++] = map;
17823 
17824 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
17825 		err = bpf_insn_array_init(map, env->prog);
17826 		if (err) {
17827 			verbose(env, "Failed to properly initialize insn array\n");
17828 			return err;
17829 		}
17830 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
17831 	}
17832 
17833 	return env->used_map_cnt - 1;
17834 }
17835 
17836 /* Add map behind fd to used maps list, if it's not already there, and return
17837  * its index.
17838  * Returns <0 on error, or >= 0 index, on success.
17839  */
17840 static int add_used_map(struct bpf_verifier_env *env, int fd)
17841 {
17842 	struct bpf_map *map;
17843 	CLASS(fd, f)(fd);
17844 
17845 	map = __bpf_map_get(f);
17846 	if (IS_ERR(map)) {
17847 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
17848 		return PTR_ERR(map);
17849 	}
17850 
17851 	return __add_used_map(env, map);
17852 }
17853 
17854 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17855 {
17856 	u8 class = BPF_CLASS(insn->code);
17857 	u8 opcode = BPF_OP(insn->code);
17858 
17859 	switch (opcode) {
17860 	case BPF_NEG:
17861 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
17862 		    insn->off != 0 || insn->imm != 0) {
17863 			verbose(env, "BPF_NEG uses reserved fields\n");
17864 			return -EINVAL;
17865 		}
17866 		return 0;
17867 	case BPF_END:
17868 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
17869 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
17870 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
17871 			verbose(env, "BPF_END uses reserved fields\n");
17872 			return -EINVAL;
17873 		}
17874 		return 0;
17875 	case BPF_MOV:
17876 		if (BPF_SRC(insn->code) == BPF_X) {
17877 			if (class == BPF_ALU) {
17878 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
17879 				    insn->imm) {
17880 					verbose(env, "BPF_MOV uses reserved fields\n");
17881 					return -EINVAL;
17882 				}
17883 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
17884 				if (insn->imm != 1 && insn->imm != 1u << 16) {
17885 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
17886 					return -EINVAL;
17887 				}
17888 			} else if ((insn->off != 0 && insn->off != 8 &&
17889 				    insn->off != 16 && insn->off != 32) || insn->imm) {
17890 				verbose(env, "BPF_MOV uses reserved fields\n");
17891 				return -EINVAL;
17892 			}
17893 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
17894 			verbose(env, "BPF_MOV uses reserved fields\n");
17895 			return -EINVAL;
17896 		}
17897 		return 0;
17898 	case BPF_ADD:
17899 	case BPF_SUB:
17900 	case BPF_AND:
17901 	case BPF_OR:
17902 	case BPF_XOR:
17903 	case BPF_LSH:
17904 	case BPF_RSH:
17905 	case BPF_ARSH:
17906 	case BPF_MUL:
17907 	case BPF_DIV:
17908 	case BPF_MOD:
17909 		if (BPF_SRC(insn->code) == BPF_X) {
17910 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
17911 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17912 				verbose(env, "BPF_ALU uses reserved fields\n");
17913 				return -EINVAL;
17914 			}
17915 		} else if (insn->src_reg != BPF_REG_0 ||
17916 			   (insn->off != 0 && insn->off != 1) ||
17917 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17918 			verbose(env, "BPF_ALU uses reserved fields\n");
17919 			return -EINVAL;
17920 		}
17921 		return 0;
17922 	default:
17923 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17924 		return -EINVAL;
17925 	}
17926 }
17927 
17928 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17929 {
17930 	u8 class = BPF_CLASS(insn->code);
17931 	u8 opcode = BPF_OP(insn->code);
17932 
17933 	switch (opcode) {
17934 	case BPF_CALL:
17935 		if (BPF_SRC(insn->code) != BPF_K ||
17936 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
17937 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
17938 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
17939 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
17940 			verbose(env, "BPF_CALL uses reserved fields\n");
17941 			return -EINVAL;
17942 		}
17943 		return 0;
17944 	case BPF_JA:
17945 		if (BPF_SRC(insn->code) == BPF_X) {
17946 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
17947 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
17948 				return -EINVAL;
17949 			}
17950 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17951 			   (class == BPF_JMP && insn->imm != 0) ||
17952 			   (class == BPF_JMP32 && insn->off != 0)) {
17953 			verbose(env, "BPF_JA uses reserved fields\n");
17954 			return -EINVAL;
17955 		}
17956 		return 0;
17957 	case BPF_EXIT:
17958 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
17959 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17960 		    class == BPF_JMP32) {
17961 			verbose(env, "BPF_EXIT uses reserved fields\n");
17962 			return -EINVAL;
17963 		}
17964 		return 0;
17965 	case BPF_JCOND:
17966 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
17967 		    insn->dst_reg || insn->imm) {
17968 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
17969 			return -EINVAL;
17970 		}
17971 		return 0;
17972 	default:
17973 		if (BPF_SRC(insn->code) == BPF_X) {
17974 			if (insn->imm != 0) {
17975 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17976 				return -EINVAL;
17977 			}
17978 		} else if (insn->src_reg != BPF_REG_0) {
17979 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17980 			return -EINVAL;
17981 		}
17982 		return 0;
17983 	}
17984 }
17985 
17986 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17987 {
17988 	switch (BPF_CLASS(insn->code)) {
17989 	case BPF_ALU:
17990 	case BPF_ALU64:
17991 		return check_alu_fields(env, insn);
17992 	case BPF_LDX:
17993 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
17994 		    insn->imm != 0) {
17995 			verbose(env, "BPF_LDX uses reserved fields\n");
17996 			return -EINVAL;
17997 		}
17998 		return 0;
17999 	case BPF_STX:
18000 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
18001 			return 0;
18002 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
18003 			verbose(env, "BPF_STX uses reserved fields\n");
18004 			return -EINVAL;
18005 		}
18006 		return 0;
18007 	case BPF_ST:
18008 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
18009 			verbose(env, "BPF_ST uses reserved fields\n");
18010 			return -EINVAL;
18011 		}
18012 		return 0;
18013 	case BPF_JMP:
18014 	case BPF_JMP32:
18015 		return check_jmp_fields(env, insn);
18016 	case BPF_LD: {
18017 		u8 mode = BPF_MODE(insn->code);
18018 
18019 		if (mode == BPF_ABS || mode == BPF_IND) {
18020 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18021 			    BPF_SIZE(insn->code) == BPF_DW ||
18022 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18023 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18024 				return -EINVAL;
18025 			}
18026 		} else if (mode != BPF_IMM) {
18027 			verbose(env, "invalid BPF_LD mode\n");
18028 			return -EINVAL;
18029 		}
18030 		return 0;
18031 	}
18032 	default:
18033 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18034 		return -EINVAL;
18035 	}
18036 }
18037 
18038 /*
18039  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18040  *
18041  * 1. if it accesses map FD, replace it with actual map pointer.
18042  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18043  *
18044  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18045  */
18046 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18047 {
18048 	struct bpf_insn *insn = env->prog->insnsi;
18049 	int insn_cnt = env->prog->len;
18050 	int i, err;
18051 
18052 	err = bpf_prog_calc_tag(env->prog);
18053 	if (err)
18054 		return err;
18055 
18056 	for (i = 0; i < insn_cnt; i++, insn++) {
18057 		if (insn->dst_reg >= MAX_BPF_REG &&
18058 		    !is_stack_arg_st(insn) && !is_stack_arg_stx(insn)) {
18059 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18060 			return -EINVAL;
18061 		}
18062 		if (insn->src_reg >= MAX_BPF_REG && !is_stack_arg_ldx(insn)) {
18063 			verbose(env, "R%d is invalid\n", insn->src_reg);
18064 			return -EINVAL;
18065 		}
18066 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18067 			struct bpf_insn_aux_data *aux;
18068 			struct bpf_map *map;
18069 			int map_idx;
18070 			u64 addr;
18071 			u32 fd;
18072 
18073 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18074 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18075 			    insn[1].off != 0) {
18076 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18077 				return -EINVAL;
18078 			}
18079 
18080 			if (insn[0].off != 0) {
18081 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18082 				return -EINVAL;
18083 			}
18084 
18085 			if (insn[0].src_reg == 0)
18086 				/* valid generic load 64-bit imm */
18087 				goto next_insn;
18088 
18089 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18090 				aux = &env->insn_aux_data[i];
18091 				err = check_pseudo_btf_id(env, insn, aux);
18092 				if (err)
18093 					return err;
18094 				goto next_insn;
18095 			}
18096 
18097 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18098 				aux = &env->insn_aux_data[i];
18099 				aux->ptr_type = PTR_TO_FUNC;
18100 				goto next_insn;
18101 			}
18102 
18103 			/* In final convert_pseudo_ld_imm64() step, this is
18104 			 * converted into regular 64-bit imm load insn.
18105 			 */
18106 			switch (insn[0].src_reg) {
18107 			case BPF_PSEUDO_MAP_VALUE:
18108 			case BPF_PSEUDO_MAP_IDX_VALUE:
18109 				break;
18110 			case BPF_PSEUDO_MAP_FD:
18111 			case BPF_PSEUDO_MAP_IDX:
18112 				if (insn[1].imm == 0)
18113 					break;
18114 				fallthrough;
18115 			default:
18116 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18117 				return -EINVAL;
18118 			}
18119 
18120 			switch (insn[0].src_reg) {
18121 			case BPF_PSEUDO_MAP_IDX_VALUE:
18122 			case BPF_PSEUDO_MAP_IDX:
18123 				if (bpfptr_is_null(env->fd_array)) {
18124 					verbose(env, "fd_idx without fd_array is invalid\n");
18125 					return -EPROTO;
18126 				}
18127 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18128 							    insn[0].imm * sizeof(fd),
18129 							    sizeof(fd)))
18130 					return -EFAULT;
18131 				break;
18132 			default:
18133 				fd = insn[0].imm;
18134 				break;
18135 			}
18136 
18137 			map_idx = add_used_map(env, fd);
18138 			if (map_idx < 0)
18139 				return map_idx;
18140 			map = env->used_maps[map_idx];
18141 
18142 			aux = &env->insn_aux_data[i];
18143 			aux->map_index = map_idx;
18144 
18145 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18146 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18147 				addr = (unsigned long)map;
18148 			} else {
18149 				u32 off = insn[1].imm;
18150 
18151 				if (!map->ops->map_direct_value_addr) {
18152 					verbose(env, "no direct value access support for this map type\n");
18153 					return -EINVAL;
18154 				}
18155 
18156 				err = map->ops->map_direct_value_addr(map, &addr, off);
18157 				if (err) {
18158 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18159 						map->value_size, off);
18160 					return err;
18161 				}
18162 
18163 				aux->map_off = off;
18164 				addr += off;
18165 			}
18166 
18167 			insn[0].imm = (u32)addr;
18168 			insn[1].imm = addr >> 32;
18169 
18170 next_insn:
18171 			insn++;
18172 			i++;
18173 			continue;
18174 		}
18175 
18176 		/* Basic sanity check before we invest more work here. */
18177 		if (!bpf_opcode_in_insntable(insn->code)) {
18178 			verbose(env, "unknown opcode %02x\n", insn->code);
18179 			return -EINVAL;
18180 		}
18181 
18182 		err = check_insn_fields(env, insn);
18183 		if (err)
18184 			return err;
18185 	}
18186 
18187 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18188 	 * 'struct bpf_map *' into a register instead of user map_fd.
18189 	 * These pointers will be used later by verifier to validate map access.
18190 	 */
18191 	return 0;
18192 }
18193 
18194 /* drop refcnt of maps used by the rejected program */
18195 static void release_maps(struct bpf_verifier_env *env)
18196 {
18197 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18198 			     env->used_map_cnt);
18199 }
18200 
18201 /* drop refcnt of maps used by the rejected program */
18202 static void release_btfs(struct bpf_verifier_env *env)
18203 {
18204 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18205 }
18206 
18207 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18208 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18209 {
18210 	struct bpf_insn *insn = env->prog->insnsi;
18211 	int insn_cnt = env->prog->len;
18212 	int i;
18213 
18214 	for (i = 0; i < insn_cnt; i++, insn++) {
18215 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18216 			continue;
18217 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18218 			continue;
18219 		insn->src_reg = 0;
18220 	}
18221 }
18222 
18223 static void release_insn_arrays(struct bpf_verifier_env *env)
18224 {
18225 	int i;
18226 
18227 	for (i = 0; i < env->insn_array_map_cnt; i++)
18228 		bpf_insn_array_release(env->insn_array_maps[i]);
18229 }
18230 
18231 
18232 
18233 /* The verifier does more data flow analysis than llvm and will not
18234  * explore branches that are dead at run time. Malicious programs can
18235  * have dead code too. Therefore replace all dead at-run-time code
18236  * with 'ja -1'.
18237  *
18238  * Just nops are not optimal, e.g. if they would sit at the end of the
18239  * program and through another bug we would manage to jump there, then
18240  * we'd execute beyond program memory otherwise. Returning exception
18241  * code also wouldn't work since we can have subprogs where the dead
18242  * code could be located.
18243  */
18244 static void sanitize_dead_code(struct bpf_verifier_env *env)
18245 {
18246 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18247 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18248 	struct bpf_insn *insn = env->prog->insnsi;
18249 	const int insn_cnt = env->prog->len;
18250 	int i;
18251 
18252 	for (i = 0; i < insn_cnt; i++) {
18253 		if (aux_data[i].seen)
18254 			continue;
18255 		memcpy(insn + i, &trap, sizeof(trap));
18256 		aux_data[i].zext_dst = false;
18257 	}
18258 }
18259 
18260 
18261 
18262 static void free_states(struct bpf_verifier_env *env)
18263 {
18264 	struct bpf_verifier_state_list *sl;
18265 	struct list_head *head, *pos, *tmp;
18266 	struct bpf_scc_info *info;
18267 	int i, j;
18268 
18269 	bpf_free_verifier_state(env->cur_state, true);
18270 	env->cur_state = NULL;
18271 	while (!pop_stack(env, NULL, NULL, false));
18272 
18273 	list_for_each_safe(pos, tmp, &env->free_list) {
18274 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18275 		bpf_free_verifier_state(&sl->state, false);
18276 		kfree(sl);
18277 	}
18278 	INIT_LIST_HEAD(&env->free_list);
18279 
18280 	for (i = 0; i < env->scc_cnt; ++i) {
18281 		info = env->scc_info[i];
18282 		if (!info)
18283 			continue;
18284 		for (j = 0; j < info->num_visits; j++)
18285 			bpf_free_backedges(&info->visits[j]);
18286 		kvfree(info);
18287 		env->scc_info[i] = NULL;
18288 	}
18289 
18290 	if (!env->explored_states)
18291 		return;
18292 
18293 	for (i = 0; i < state_htab_size(env); i++) {
18294 		head = &env->explored_states[i];
18295 
18296 		list_for_each_safe(pos, tmp, head) {
18297 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18298 			bpf_free_verifier_state(&sl->state, false);
18299 			kfree(sl);
18300 		}
18301 		INIT_LIST_HEAD(&env->explored_states[i]);
18302 	}
18303 }
18304 
18305 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18306 {
18307 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18308 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18309 	struct bpf_prog_aux *aux = env->prog->aux;
18310 	struct bpf_verifier_state *state;
18311 	struct bpf_reg_state *regs;
18312 	int ret, i;
18313 
18314 	env->prev_linfo = NULL;
18315 	env->pass_cnt++;
18316 
18317 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18318 	if (!state)
18319 		return -ENOMEM;
18320 	state->curframe = 0;
18321 	state->speculative = false;
18322 	state->branches = 1;
18323 	state->in_sleepable = env->prog->sleepable;
18324 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18325 	if (!state->frame[0]) {
18326 		kfree(state);
18327 		return -ENOMEM;
18328 	}
18329 	env->cur_state = state;
18330 	init_func_state(env, state->frame[0],
18331 			BPF_MAIN_FUNC /* callsite */,
18332 			0 /* frameno */,
18333 			subprog);
18334 	state->first_insn_idx = env->subprog_info[subprog].start;
18335 	state->last_insn_idx = -1;
18336 
18337 	regs = state->frame[state->curframe]->regs;
18338 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18339 		const char *sub_name = subprog_name(env, subprog);
18340 		struct bpf_subprog_arg_info *arg;
18341 		struct bpf_reg_state *reg;
18342 
18343 		if (env->log.level & BPF_LOG_LEVEL)
18344 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18345 		ret = btf_prepare_func_args(env, subprog);
18346 		if (ret)
18347 			goto out;
18348 
18349 		if (subprog_is_exc_cb(env, subprog)) {
18350 			state->frame[0]->in_exception_callback_fn = true;
18351 
18352 			/*
18353 			 * Global functions are scalar or void, make sure
18354 			 * we return a scalar.
18355 			 */
18356 			if (subprog_returns_void(env, subprog)) {
18357 				verbose(env, "exception cb cannot return void\n");
18358 				ret = -EINVAL;
18359 				goto out;
18360 			}
18361 
18362 			/* Also ensure the callback only has a single scalar argument. */
18363 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18364 				verbose(env, "exception cb only supports single integer argument\n");
18365 				ret = -EINVAL;
18366 				goto out;
18367 			}
18368 		}
18369 		for (i = BPF_REG_1; i <= min_t(u32, sub->arg_cnt, MAX_BPF_FUNC_REG_ARGS); i++) {
18370 			arg = &sub->args[i - BPF_REG_1];
18371 			reg = &regs[i];
18372 
18373 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18374 				reg->type = PTR_TO_CTX;
18375 				mark_reg_known_zero(env, regs, i);
18376 			} else if (arg->arg_type == ARG_ANYTHING) {
18377 				reg->type = SCALAR_VALUE;
18378 				mark_reg_unknown(env, regs, i);
18379 			} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
18380 				/* assume unspecial LOCAL dynptr type */
18381 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0);
18382 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18383 				reg->type = PTR_TO_MEM;
18384 				reg->type |= arg->arg_type &
18385 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18386 				mark_reg_known_zero(env, regs, i);
18387 				reg->mem_size = arg->mem_size;
18388 				if (arg->arg_type & PTR_MAYBE_NULL)
18389 					reg->id = ++env->id_gen;
18390 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18391 				reg->type = PTR_TO_BTF_ID;
18392 				if (arg->arg_type & PTR_MAYBE_NULL)
18393 					reg->type |= PTR_MAYBE_NULL;
18394 				if (arg->arg_type & PTR_UNTRUSTED)
18395 					reg->type |= PTR_UNTRUSTED;
18396 				if (arg->arg_type & PTR_TRUSTED)
18397 					reg->type |= PTR_TRUSTED;
18398 				mark_reg_known_zero(env, regs, i);
18399 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18400 				reg->btf_id = arg->btf_id;
18401 				reg->id = ++env->id_gen;
18402 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18403 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18404 				mark_reg_unknown(env, regs, i);
18405 			} else {
18406 				verifier_bug(env, "unhandled arg#%d type %d",
18407 					     i - BPF_REG_1 + 1, arg->arg_type);
18408 				ret = -EFAULT;
18409 				goto out;
18410 			}
18411 		}
18412 		if (env->prog->type == BPF_PROG_TYPE_EXT && sub->arg_cnt > MAX_BPF_FUNC_REG_ARGS) {
18413 			verbose(env, "freplace programs with >%d args not supported yet\n",
18414 				MAX_BPF_FUNC_REG_ARGS);
18415 			ret = -EINVAL;
18416 			goto out;
18417 		}
18418 	} else {
18419 		/* if main BPF program has associated BTF info, validate that
18420 		 * it's matching expected signature, and otherwise mark BTF
18421 		 * info for main program as unreliable
18422 		 */
18423 		if (env->prog->aux->func_info_aux) {
18424 			ret = btf_prepare_func_args(env, 0);
18425 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) {
18426 				env->prog->aux->func_info_aux[0].unreliable = true;
18427 				sub->arg_cnt = 1;
18428 				sub->stack_arg_cnt = 0;
18429 			}
18430 		}
18431 
18432 		/* 1st arg to a function */
18433 		regs[BPF_REG_1].type = PTR_TO_CTX;
18434 		mark_reg_known_zero(env, regs, BPF_REG_1);
18435 	}
18436 
18437 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18438 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18439 		for (i = 0; i < aux->ctx_arg_info_size; i++) {
18440 			ret = aux->ctx_arg_info[i].refcounted ? acquire_reference(env, 0, 0) : 0;
18441 			if (ret < 0)
18442 				goto out;
18443 
18444 			aux->ctx_arg_info[i].ref_id = ret;
18445 		}
18446 	}
18447 
18448 	ret = do_check(env);
18449 out:
18450 	if (!ret && pop_log)
18451 		bpf_vlog_reset(&env->log, 0);
18452 	free_states(env);
18453 	return ret;
18454 }
18455 
18456 /* Lazily verify all global functions based on their BTF, if they are called
18457  * from main BPF program or any of subprograms transitively.
18458  * BPF global subprogs called from dead code are not validated.
18459  * All callable global functions must pass verification.
18460  * Otherwise the whole program is rejected.
18461  * Consider:
18462  * int bar(int);
18463  * int foo(int f)
18464  * {
18465  *    return bar(f);
18466  * }
18467  * int bar(int b)
18468  * {
18469  *    ...
18470  * }
18471  * foo() will be verified first for R1=any_scalar_value. During verification it
18472  * will be assumed that bar() already verified successfully and call to bar()
18473  * from foo() will be checked for type match only. Later bar() will be verified
18474  * independently to check that it's safe for R1=any_scalar_value.
18475  */
18476 static int do_check_subprogs(struct bpf_verifier_env *env)
18477 {
18478 	struct bpf_prog_aux *aux = env->prog->aux;
18479 	struct bpf_func_info_aux *sub_aux;
18480 	int i, ret, new_cnt;
18481 	u32 insn_processed;
18482 
18483 	if (!aux->func_info)
18484 		return 0;
18485 
18486 	/* exception callback is presumed to be always called */
18487 	if (env->exception_callback_subprog)
18488 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18489 
18490 again:
18491 	new_cnt = 0;
18492 	for (i = 1; i < env->subprog_cnt; i++) {
18493 		if (!bpf_subprog_is_global(env, i))
18494 			continue;
18495 
18496 		insn_processed = env->insn_processed;
18497 
18498 		sub_aux = subprog_aux(env, i);
18499 		if (!sub_aux->called || sub_aux->verified)
18500 			continue;
18501 
18502 		env->insn_idx = env->subprog_info[i].start;
18503 		WARN_ON_ONCE(env->insn_idx == 0);
18504 		ret = do_check_common(env, i);
18505 		env->subprog_info[i].insn_processed = env->insn_processed - insn_processed;
18506 		if (ret) {
18507 			return ret;
18508 		} else if (env->log.level & BPF_LOG_LEVEL) {
18509 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18510 				i, subprog_name(env, i));
18511 		}
18512 
18513 		/* We verified new global subprog, it might have called some
18514 		 * more global subprogs that we haven't verified yet, so we
18515 		 * need to do another pass over subprogs to verify those.
18516 		 */
18517 		sub_aux->verified = true;
18518 		new_cnt++;
18519 	}
18520 
18521 	/* We can't loop forever as we verify at least one global subprog on
18522 	 * each pass.
18523 	 */
18524 	if (new_cnt)
18525 		goto again;
18526 
18527 	return 0;
18528 }
18529 
18530 static int do_check_main(struct bpf_verifier_env *env)
18531 {
18532 	u32 insn_processed = env->insn_processed;
18533 	int ret;
18534 
18535 	env->insn_idx = 0;
18536 	ret = do_check_common(env, 0);
18537 	env->subprog_info[0].insn_processed = env->insn_processed - insn_processed;
18538 	if (!ret)
18539 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18540 	return ret;
18541 }
18542 
18543 
18544 static void print_verification_stats(struct bpf_verifier_env *env)
18545 {
18546 	/* Skip over hidden subprogs which are not verified. */
18547 	int i, subprog_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18548 
18549 	if (env->log.level & BPF_LOG_STATS) {
18550 		verbose(env, "verification time %lld usec\n",
18551 			div_u64(env->verification_time, 1000));
18552 		verbose(env, "stack depth %d", env->subprog_info[0].stack_depth);
18553 		for (i = 1; i < subprog_cnt; i++)
18554 			verbose(env, "+%d", env->subprog_info[i].stack_depth);
18555 		verbose(env, " max %d\n", env->max_stack_depth);
18556 		verbose(env, "insns processed %d", env->subprog_info[0].insn_processed);
18557 		for (i = 1; i < subprog_cnt; i++)
18558 			if (bpf_subprog_is_global(env, i))
18559 				verbose(env, "+%d", env->subprog_info[i].insn_processed);
18560 		verbose(env, "\n");
18561 	}
18562 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18563 		"total_states %d peak_states %d mark_read %d\n",
18564 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18565 		env->max_states_per_insn, env->total_states,
18566 		env->peak_states, env->longest_mark_read_walk);
18567 }
18568 
18569 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18570 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18571 {
18572 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18573 	prog->aux->ctx_arg_info_size = cnt;
18574 
18575 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18576 }
18577 
18578 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18579 {
18580 	const struct btf_type *t, *func_proto;
18581 	const struct bpf_struct_ops_desc *st_ops_desc;
18582 	const struct bpf_struct_ops *st_ops;
18583 	const struct btf_member *member;
18584 	struct bpf_prog *prog = env->prog;
18585 	bool has_refcounted_arg = false;
18586 	u32 btf_id, member_idx, member_off;
18587 	struct btf *btf;
18588 	const char *mname;
18589 	int i, err;
18590 
18591 	if (!prog->gpl_compatible) {
18592 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18593 		return -EINVAL;
18594 	}
18595 
18596 	if (!prog->aux->attach_btf_id)
18597 		return -ENOTSUPP;
18598 
18599 	btf = prog->aux->attach_btf;
18600 	if (btf_is_module(btf)) {
18601 		/* Make sure st_ops is valid through the lifetime of env */
18602 		env->attach_btf_mod = btf_try_get_module(btf);
18603 		if (!env->attach_btf_mod) {
18604 			verbose(env, "struct_ops module %s is not found\n",
18605 				btf_get_name(btf));
18606 			return -ENOTSUPP;
18607 		}
18608 	}
18609 
18610 	btf_id = prog->aux->attach_btf_id;
18611 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18612 	if (!st_ops_desc) {
18613 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18614 			btf_id);
18615 		return -ENOTSUPP;
18616 	}
18617 	st_ops = st_ops_desc->st_ops;
18618 
18619 	t = st_ops_desc->type;
18620 	member_idx = prog->expected_attach_type;
18621 	if (member_idx >= btf_type_vlen(t)) {
18622 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18623 			member_idx, st_ops->name);
18624 		return -EINVAL;
18625 	}
18626 
18627 	member = &btf_type_member(t)[member_idx];
18628 	mname = btf_name_by_offset(btf, member->name_off);
18629 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18630 					       NULL);
18631 	if (!func_proto) {
18632 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18633 			mname, member_idx, st_ops->name);
18634 		return -EINVAL;
18635 	}
18636 
18637 	member_off = __btf_member_bit_offset(t, member) / 8;
18638 	err = bpf_struct_ops_supported(st_ops, member_off);
18639 	if (err) {
18640 		verbose(env, "attach to unsupported member %s of struct %s\n",
18641 			mname, st_ops->name);
18642 		return err;
18643 	}
18644 
18645 	if (st_ops->check_member) {
18646 		err = st_ops->check_member(t, member, prog);
18647 
18648 		if (err) {
18649 			verbose(env, "attach to unsupported member %s of struct %s\n",
18650 				mname, st_ops->name);
18651 			return err;
18652 		}
18653 	}
18654 
18655 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
18656 		verbose(env, "Private stack not supported by jit\n");
18657 		return -EACCES;
18658 	}
18659 
18660 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
18661 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
18662 			has_refcounted_arg = true;
18663 			break;
18664 		}
18665 	}
18666 
18667 	/* Tail call is not allowed for programs with refcounted arguments since we
18668 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
18669 	 */
18670 	for (i = 0; i < env->subprog_cnt; i++) {
18671 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
18672 			verbose(env, "program with __ref argument cannot tail call\n");
18673 			return -EINVAL;
18674 		}
18675 	}
18676 
18677 	prog->aux->st_ops = st_ops;
18678 	prog->aux->attach_st_ops_member_off = member_off;
18679 
18680 	prog->aux->attach_func_proto = func_proto;
18681 	prog->aux->attach_func_name = mname;
18682 	env->ops = st_ops->verifier_ops;
18683 
18684 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
18685 					  st_ops_desc->arg_info[member_idx].cnt);
18686 }
18687 #define SECURITY_PREFIX "security_"
18688 
18689 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
18690 
18691 /* list of non-sleepable functions that are otherwise on
18692  * ALLOW_ERROR_INJECTION list
18693  */
18694 BTF_SET_START(btf_non_sleepable_error_inject)
18695 /* Three functions below can be called from sleepable and non-sleepable context.
18696  * Assume non-sleepable from bpf safety point of view.
18697  */
18698 BTF_ID(func, __filemap_add_folio)
18699 #ifdef CONFIG_FAIL_PAGE_ALLOC
18700 BTF_ID(func, should_fail_alloc_page)
18701 #endif
18702 #ifdef CONFIG_FAILSLAB
18703 BTF_ID(func, should_failslab)
18704 #endif
18705 BTF_SET_END(btf_non_sleepable_error_inject)
18706 
18707 static int check_non_sleepable_error_inject(u32 btf_id)
18708 {
18709 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
18710 }
18711 
18712 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18713 {
18714 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
18715 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
18716 	 */
18717 	if (!check_non_sleepable_error_inject(btf_id) &&
18718 	    within_error_injection_list(addr))
18719 		return 0;
18720 
18721 	return -EINVAL;
18722 }
18723 
18724 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18725 {
18726 	if (within_error_injection_list(addr) ||
18727 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18728 		return 0;
18729 
18730 	return -EINVAL;
18731 }
18732 
18733 #else
18734 
18735 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
18736  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
18737  * but that just compares two concrete function names.
18738  */
18739 static bool has_arch_syscall_prefix(const char *func_name)
18740 {
18741 #if defined(__x86_64__)
18742 	return !strncmp(func_name, "__x64_", 6);
18743 #elif defined(__i386__)
18744 	return !strncmp(func_name, "__ia32_", 7);
18745 #elif defined(__s390x__)
18746 	return !strncmp(func_name, "__s390x_", 8);
18747 #elif defined(__aarch64__)
18748 	return !strncmp(func_name, "__arm64_", 8);
18749 #elif defined(__riscv)
18750 	return !strncmp(func_name, "__riscv_", 8);
18751 #elif defined(__powerpc__) || defined(__powerpc64__)
18752 	return !strncmp(func_name, "sys_", 4);
18753 #elif defined(__loongarch__)
18754 	return !strncmp(func_name, "sys_", 4);
18755 #else
18756 	return false;
18757 #endif
18758 }
18759 
18760 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
18761 
18762 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18763 {
18764 	if (has_arch_syscall_prefix(func_name))
18765 		return 0;
18766 
18767 	return -EINVAL;
18768 }
18769 
18770 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18771 {
18772 	if (has_arch_syscall_prefix(func_name) ||
18773 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18774 		return 0;
18775 
18776 	return -EINVAL;
18777 }
18778 
18779 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
18780 
18781 static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id)
18782 {
18783 	return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id;
18784 }
18785 
18786 static int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog,
18787 				  const struct btf *btf)
18788 {
18789 	const struct btf_type *t;
18790 	const char *tname;
18791 
18792 	switch (prog->type) {
18793 	case BPF_PROG_TYPE_TRACING:
18794 		t = btf_type_by_id(btf, btf_id);
18795 		if (!t)
18796 			return -EINVAL;
18797 		tname = btf_name_by_offset(btf, t->name_off);
18798 		if (!tname)
18799 			return -EINVAL;
18800 
18801 		/*
18802 		 * *.multi sleepable programs will pass initial sleepable check,
18803 		 * the actual attached btf ids are checked later during the link
18804 		 * attachment.
18805 		 */
18806 		if (is_tracing_multi_id(prog, btf_id))
18807 			return 0;
18808 		if (!check_attach_sleepable(btf_id, addr, tname))
18809 			return 0;
18810 		/*
18811 		 * fentry/fexit/fmod_ret progs can also be sleepable if they are
18812 		 * in the fmodret id set with the KF_SLEEPABLE flag.
18813 		 */
18814 		else {
18815 			u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, prog);
18816 
18817 			if (flags && (*flags & KF_SLEEPABLE))
18818 				return 0;
18819 		}
18820 		break;
18821 	case BPF_PROG_TYPE_LSM:
18822 		/*
18823 		 * LSM progs check that they are attached to bpf_lsm_*() funcs.
18824 		 * Only some of them are sleepable.
18825 		 */
18826 		if (bpf_lsm_is_sleepable_hook(btf_id))
18827 			return 0;
18828 		break;
18829 	default:
18830 		break;
18831 	}
18832 	return -EINVAL;
18833 }
18834 
18835 int bpf_check_attach_target(struct bpf_verifier_log *log,
18836 			    const struct bpf_prog *prog,
18837 			    const struct bpf_prog *tgt_prog,
18838 			    u32 btf_id,
18839 			    struct bpf_attach_target_info *tgt_info)
18840 {
18841 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
18842 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
18843 	char trace_symbol[KSYM_SYMBOL_LEN];
18844 	const char prefix[] = "btf_trace_";
18845 	struct bpf_raw_event_map *btp;
18846 	int ret = 0, subprog = -1, i;
18847 	const struct btf_type *t;
18848 	bool conservative = true;
18849 	const char *tname, *fname;
18850 	struct btf *btf;
18851 	long addr = 0;
18852 	struct module *mod = NULL;
18853 
18854 	if (!btf_id) {
18855 		bpf_log(log, "Tracing programs must provide btf_id\n");
18856 		return -EINVAL;
18857 	}
18858 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
18859 	if (!btf) {
18860 		bpf_log(log,
18861 			"Tracing program can only be attached to another program annotated with BTF\n");
18862 		return -EINVAL;
18863 	}
18864 	t = btf_type_by_id(btf, btf_id);
18865 	if (!t) {
18866 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
18867 		return -EINVAL;
18868 	}
18869 	tname = btf_name_by_offset(btf, t->name_off);
18870 	if (!tname) {
18871 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
18872 		return -EINVAL;
18873 	}
18874 	if (tgt_prog) {
18875 		struct bpf_prog_aux *aux = tgt_prog->aux;
18876 		bool tgt_changes_pkt_data;
18877 		bool tgt_might_sleep;
18878 
18879 		if (bpf_prog_is_dev_bound(prog->aux) &&
18880 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
18881 			bpf_log(log, "Target program bound device mismatch");
18882 			return -EINVAL;
18883 		}
18884 
18885 		for (i = 0; i < aux->func_info_cnt; i++)
18886 			if (aux->func_info[i].type_id == btf_id) {
18887 				subprog = i;
18888 				break;
18889 			}
18890 		if (subprog == -1) {
18891 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
18892 			return -EINVAL;
18893 		}
18894 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
18895 			bpf_log(log,
18896 				"%s programs cannot attach to exception callback\n",
18897 				prog_extension ? "Extension" : "Tracing");
18898 			return -EINVAL;
18899 		}
18900 		conservative = aux->func_info_aux[subprog].unreliable;
18901 		if (prog_extension) {
18902 			if (conservative) {
18903 				bpf_log(log,
18904 					"Cannot replace static functions\n");
18905 				return -EINVAL;
18906 			}
18907 			if (!prog->jit_requested) {
18908 				bpf_log(log,
18909 					"Extension programs should be JITed\n");
18910 				return -EINVAL;
18911 			}
18912 			tgt_changes_pkt_data = aux->func
18913 					       ? aux->func[subprog]->aux->changes_pkt_data
18914 					       : aux->changes_pkt_data;
18915 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
18916 				bpf_log(log,
18917 					"Extension program changes packet data, while original does not\n");
18918 				return -EINVAL;
18919 			}
18920 
18921 			tgt_might_sleep = aux->func
18922 					  ? aux->func[subprog]->aux->might_sleep
18923 					  : aux->might_sleep;
18924 			if (prog->aux->might_sleep && !tgt_might_sleep) {
18925 				bpf_log(log,
18926 					"Extension program may sleep, while original does not\n");
18927 				return -EINVAL;
18928 			}
18929 		}
18930 		if (!tgt_prog->jited) {
18931 			bpf_log(log, "Can attach to only JITed progs\n");
18932 			return -EINVAL;
18933 		}
18934 		if (prog_tracing) {
18935 			if (aux->attach_tracing_prog) {
18936 				/*
18937 				 * Target program is an fentry/fexit which is already attached
18938 				 * to another tracing program. More levels of nesting
18939 				 * attachment are not allowed.
18940 				 */
18941 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
18942 				return -EINVAL;
18943 			}
18944 		} else if (tgt_prog->type == prog->type) {
18945 			/*
18946 			 * To avoid potential call chain cycles, prevent attaching of a
18947 			 * program extension to another extension. It's ok to attach
18948 			 * fentry/fexit to extension program.
18949 			 */
18950 			bpf_log(log, "Cannot recursively attach\n");
18951 			return -EINVAL;
18952 		}
18953 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
18954 		    prog_extension &&
18955 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
18956 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
18957 		     tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI ||
18958 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
18959 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION ||
18960 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
18961 			/* Program extensions can extend all program types
18962 			 * except fentry/fexit. The reason is the following.
18963 			 * The fentry/fexit programs are used for performance
18964 			 * analysis, stats and can be attached to any program
18965 			 * type. When extension program is replacing XDP function
18966 			 * it is necessary to allow performance analysis of all
18967 			 * functions. Both original XDP program and its program
18968 			 * extension. Hence attaching fentry/fexit to
18969 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
18970 			 * fentry/fexit was allowed it would be possible to create
18971 			 * long call chain fentry->extension->fentry->extension
18972 			 * beyond reasonable stack size. Hence extending fentry
18973 			 * is not allowed.
18974 			 */
18975 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
18976 			return -EINVAL;
18977 		}
18978 	} else {
18979 		if (prog_extension) {
18980 			bpf_log(log, "Cannot replace kernel functions\n");
18981 			return -EINVAL;
18982 		}
18983 	}
18984 
18985 	switch (prog->expected_attach_type) {
18986 	case BPF_TRACE_RAW_TP:
18987 		if (tgt_prog) {
18988 			bpf_log(log,
18989 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
18990 			return -EINVAL;
18991 		}
18992 		if (!btf_type_is_typedef(t)) {
18993 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
18994 				btf_id);
18995 			return -EINVAL;
18996 		}
18997 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
18998 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
18999 				btf_id, tname);
19000 			return -EINVAL;
19001 		}
19002 		tname += sizeof(prefix) - 1;
19003 
19004 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
19005 		 * names. Thus using bpf_raw_event_map to get argument names.
19006 		 */
19007 		btp = bpf_get_raw_tracepoint(tname);
19008 		if (!btp)
19009 			return -EINVAL;
19010 		if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
19011 			bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n",
19012 				tname);
19013 			bpf_put_raw_tracepoint(btp);
19014 			return -EINVAL;
19015 		}
19016 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
19017 					trace_symbol);
19018 		bpf_put_raw_tracepoint(btp);
19019 
19020 		if (fname)
19021 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
19022 
19023 		if (!fname || ret < 0) {
19024 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
19025 				prefix, tname);
19026 			t = btf_type_by_id(btf, t->type);
19027 			if (!btf_type_is_ptr(t))
19028 				/* should never happen in valid vmlinux build */
19029 				return -EINVAL;
19030 		} else {
19031 			t = btf_type_by_id(btf, ret);
19032 			if (!btf_type_is_func(t))
19033 				/* should never happen in valid vmlinux build */
19034 				return -EINVAL;
19035 		}
19036 
19037 		t = btf_type_by_id(btf, t->type);
19038 		if (!btf_type_is_func_proto(t))
19039 			/* should never happen in valid vmlinux build */
19040 			return -EINVAL;
19041 
19042 		break;
19043 	case BPF_TRACE_ITER:
19044 		if (!btf_type_is_func(t)) {
19045 			bpf_log(log, "attach_btf_id %u is not a function\n",
19046 				btf_id);
19047 			return -EINVAL;
19048 		}
19049 		t = btf_type_by_id(btf, t->type);
19050 		if (!btf_type_is_func_proto(t))
19051 			return -EINVAL;
19052 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19053 		if (ret)
19054 			return ret;
19055 		break;
19056 	default:
19057 		if (!prog_extension)
19058 			return -EINVAL;
19059 		fallthrough;
19060 	case BPF_MODIFY_RETURN:
19061 	case BPF_LSM_MAC:
19062 	case BPF_LSM_CGROUP:
19063 	case BPF_TRACE_FENTRY:
19064 	case BPF_TRACE_FEXIT:
19065 	case BPF_TRACE_FSESSION:
19066 	case BPF_TRACE_FSESSION_MULTI:
19067 	case BPF_TRACE_FENTRY_MULTI:
19068 	case BPF_TRACE_FEXIT_MULTI:
19069 		if ((prog->expected_attach_type == BPF_TRACE_FSESSION ||
19070 		    prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19071 		    !bpf_jit_supports_fsession()) {
19072 			bpf_log(log, "JIT does not support fsession\n");
19073 			return -EOPNOTSUPP;
19074 		}
19075 		if (!btf_type_is_func(t)) {
19076 			bpf_log(log, "attach_btf_id %u is not a function\n",
19077 				btf_id);
19078 			return -EINVAL;
19079 		}
19080 		if (prog_extension &&
19081 		    btf_check_type_match(log, prog, btf, t))
19082 			return -EINVAL;
19083 		t = btf_type_by_id(btf, t->type);
19084 		if (!btf_type_is_func_proto(t))
19085 			return -EINVAL;
19086 
19087 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19088 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19089 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19090 			return -EINVAL;
19091 
19092 		if (tgt_prog && conservative)
19093 			t = NULL;
19094 
19095 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19096 		if (ret < 0)
19097 			return ret;
19098 
19099 		/*
19100 		 * *.multi programs don't need an address during program
19101 		 * verification, we just take the module ref if needed.
19102 		 */
19103 		if (is_tracing_multi_id(prog, btf_id)) {
19104 			if (btf_is_module(btf)) {
19105 				mod = btf_try_get_module(btf);
19106 				if (!mod)
19107 					return -ENOENT;
19108 			}
19109 			addr = 0;
19110 		} else if (tgt_prog) {
19111 			if (subprog == 0)
19112 				addr = (long) tgt_prog->bpf_func;
19113 			else
19114 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19115 		} else {
19116 			if (btf_is_module(btf)) {
19117 				mod = btf_try_get_module(btf);
19118 				if (mod)
19119 					addr = find_kallsyms_symbol_value(mod, tname);
19120 				else
19121 					addr = 0;
19122 			} else {
19123 				addr = kallsyms_lookup_name(tname);
19124 			}
19125 			if (!addr) {
19126 				module_put(mod);
19127 				bpf_log(log,
19128 					"The address of function %s cannot be found\n",
19129 					tname);
19130 				return -ENOENT;
19131 			}
19132 		}
19133 
19134 		if (prog->sleepable) {
19135 			ret = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19136 			if (ret) {
19137 				module_put(mod);
19138 				bpf_log(log, "%s is not sleepable\n", tname);
19139 				return ret;
19140 			}
19141 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19142 			if (tgt_prog) {
19143 				module_put(mod);
19144 				bpf_log(log, "can't modify return codes of BPF programs\n");
19145 				return -EINVAL;
19146 			}
19147 			ret = -EINVAL;
19148 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19149 			    !check_attach_modify_return(addr, tname))
19150 				ret = 0;
19151 			if (ret) {
19152 				module_put(mod);
19153 				bpf_log(log, "%s() is not modifiable\n", tname);
19154 				return ret;
19155 			}
19156 		}
19157 
19158 		break;
19159 	}
19160 	tgt_info->tgt_addr = addr;
19161 	tgt_info->tgt_name = tname;
19162 	tgt_info->tgt_type = t;
19163 	tgt_info->tgt_mod = mod;
19164 	return 0;
19165 }
19166 
19167 BTF_SET_START(btf_id_deny)
19168 BTF_ID_UNUSED
19169 #ifdef CONFIG_SMP
19170 BTF_ID(func, ___migrate_enable)
19171 BTF_ID(func, migrate_disable)
19172 BTF_ID(func, migrate_enable)
19173 #endif
19174 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19175 BTF_ID(func, rcu_read_unlock_strict)
19176 #endif
19177 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19178 BTF_ID(func, preempt_count_add)
19179 BTF_ID(func, preempt_count_sub)
19180 #endif
19181 #ifdef CONFIG_PREEMPT_RCU
19182 BTF_ID(func, __rcu_read_lock)
19183 BTF_ID(func, __rcu_read_unlock)
19184 #endif
19185 BTF_SET_END(btf_id_deny)
19186 
19187 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19188  * Currently, we must manually list all __noreturn functions here. Once a more
19189  * robust solution is implemented, this workaround can be removed.
19190  */
19191 BTF_SET_START(noreturn_deny)
19192 #ifdef CONFIG_IA32_EMULATION
19193 BTF_ID(func, __ia32_sys_exit)
19194 BTF_ID(func, __ia32_sys_exit_group)
19195 #endif
19196 #ifdef CONFIG_KUNIT
19197 BTF_ID(func, __kunit_abort)
19198 BTF_ID(func, kunit_try_catch_throw)
19199 #endif
19200 #ifdef CONFIG_MODULES
19201 BTF_ID(func, __module_put_and_kthread_exit)
19202 #endif
19203 #ifdef CONFIG_X86_64
19204 BTF_ID(func, __x64_sys_exit)
19205 BTF_ID(func, __x64_sys_exit_group)
19206 #endif
19207 BTF_ID(func, do_exit)
19208 BTF_ID(func, do_group_exit)
19209 BTF_ID(func, kthread_complete_and_exit)
19210 BTF_ID(func, make_task_dead)
19211 BTF_SET_END(noreturn_deny)
19212 
19213 static bool can_be_sleepable(struct bpf_prog *prog)
19214 {
19215 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19216 		switch (prog->expected_attach_type) {
19217 		case BPF_TRACE_FENTRY:
19218 		case BPF_TRACE_FEXIT:
19219 		case BPF_MODIFY_RETURN:
19220 		case BPF_TRACE_ITER:
19221 		case BPF_TRACE_FSESSION:
19222 		case BPF_TRACE_RAW_TP:
19223 		case BPF_TRACE_FENTRY_MULTI:
19224 		case BPF_TRACE_FEXIT_MULTI:
19225 		case BPF_TRACE_FSESSION_MULTI:
19226 			return true;
19227 		default:
19228 			return false;
19229 		}
19230 	}
19231 	if (prog->type == BPF_PROG_TYPE_LSM)
19232 		return prog->expected_attach_type != BPF_LSM_CGROUP;
19233 
19234 	return prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19235 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
19236 	       prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT ||
19237 	       prog->type == BPF_PROG_TYPE_TRACEPOINT;
19238 }
19239 
19240 static int check_attach_btf_id(struct bpf_verifier_env *env)
19241 {
19242 	struct bpf_prog *prog = env->prog;
19243 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19244 	struct bpf_attach_target_info tgt_info = {};
19245 	u32 btf_id = prog->aux->attach_btf_id;
19246 	struct bpf_trampoline *tr;
19247 	int ret;
19248 	u64 key;
19249 
19250 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19251 		if (prog->sleepable)
19252 			/* attach_btf_id checked to be zero already */
19253 			return 0;
19254 		verbose(env, "Syscall programs can only be sleepable\n");
19255 		return -EINVAL;
19256 	}
19257 
19258 	if (prog->sleepable && !can_be_sleepable(prog)) {
19259 		verbose(env, "Program of this type cannot be sleepable\n");
19260 		return -EINVAL;
19261 	}
19262 
19263 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19264 		return check_struct_ops_btf_id(env);
19265 
19266 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19267 	    prog->type != BPF_PROG_TYPE_LSM &&
19268 	    prog->type != BPF_PROG_TYPE_EXT)
19269 		return 0;
19270 
19271 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19272 	if (ret)
19273 		return ret;
19274 
19275 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19276 		/* to make freplace equivalent to their targets, they need to
19277 		 * inherit env->ops and expected_attach_type for the rest of the
19278 		 * verification
19279 		 */
19280 		env->ops = bpf_verifier_ops[tgt_prog->type];
19281 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19282 	}
19283 
19284 	/* store info about the attachment target that will be used later */
19285 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19286 	prog->aux->attach_func_name = tgt_info.tgt_name;
19287 	prog->aux->mod = tgt_info.tgt_mod;
19288 
19289 	if (tgt_prog) {
19290 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19291 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19292 	}
19293 
19294 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19295 		prog->aux->attach_btf_trace = true;
19296 		return 0;
19297 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19298 		return bpf_iter_prog_supported(prog);
19299 	}
19300 
19301 	if (prog->type == BPF_PROG_TYPE_LSM) {
19302 		ret = bpf_lsm_verify_prog(&env->log, prog);
19303 		if (ret < 0)
19304 			return ret;
19305 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19306 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19307 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19308 			tgt_info.tgt_name);
19309 		return -EINVAL;
19310 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19311 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19312 		   prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI ||
19313 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19314 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19315 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19316 			tgt_info.tgt_name);
19317 		return -EINVAL;
19318 	}
19319 
19320 	/*
19321 	 * We don't get trampoline for tracing_multi programs at this point,
19322 	 * it's done when tracing_multi link is created.
19323 	 */
19324 	if (prog->type == BPF_PROG_TYPE_TRACING &&
19325 	    is_tracing_multi(prog->expected_attach_type))
19326 		return 0;
19327 
19328 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19329 	tr = bpf_trampoline_get(key, &tgt_info);
19330 	if (!tr)
19331 		return -ENOMEM;
19332 
19333 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19334 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19335 
19336 	prog->aux->dst_trampoline = tr;
19337 	return 0;
19338 }
19339 
19340 int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, u32 btf_id,
19341 				  struct bpf_attach_target_info *tgt_info)
19342 {
19343 	const struct btf_type *t;
19344 	unsigned long addr;
19345 	const char *tname;
19346 	int err;
19347 
19348 	if (!btf_id || !btf)
19349 		return -EINVAL;
19350 
19351 	/* Check noreturn attachment. */
19352 	if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
19353 	     prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19354 	     btf_id_set_contains(&noreturn_deny, btf_id))
19355 		return -EINVAL;
19356 	/* Check denied attachment. */
19357 	if (btf_id_set_contains(&btf_id_deny, btf_id))
19358 		return -EINVAL;
19359 
19360 	/* Check and get function target data. */
19361 	t = btf_type_by_id(btf, btf_id);
19362 	if (!t)
19363 		return -EINVAL;
19364 	tname = btf_name_by_offset(btf, t->name_off);
19365 	if (!tname)
19366 		return -EINVAL;
19367 	if (!btf_type_is_func(t))
19368 		return -EINVAL;
19369 	t = btf_type_by_id(btf, t->type);
19370 	if (!btf_type_is_func_proto(t))
19371 		return -EINVAL;
19372 	err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel);
19373 	if (err < 0)
19374 		return err;
19375 	if (btf_is_module(btf)) {
19376 		/* The bpf program already holds reference to module. */
19377 		if (WARN_ON_ONCE(!prog->aux->mod))
19378 			return -EINVAL;
19379 		addr = find_kallsyms_symbol_value(prog->aux->mod, tname);
19380 	} else {
19381 		addr = kallsyms_lookup_name(tname);
19382 	}
19383 	if (!addr || !ftrace_location(addr))
19384 		return -ENOENT;
19385 
19386 	/* Check sleepable program attachment. */
19387 	if (prog->sleepable) {
19388 		err = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19389 		if (err)
19390 			return err;
19391 	}
19392 	tgt_info->tgt_addr = addr;
19393 	return 0;
19394 }
19395 
19396 struct btf *bpf_get_btf_vmlinux(void)
19397 {
19398 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19399 		mutex_lock(&bpf_verifier_lock);
19400 		if (!btf_vmlinux)
19401 			btf_vmlinux = btf_parse_vmlinux();
19402 		mutex_unlock(&bpf_verifier_lock);
19403 	}
19404 	return btf_vmlinux;
19405 }
19406 
19407 /*
19408  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19409  * this case expect that every file descriptor in the array is either a map or
19410  * a BTF. Everything else is considered to be trash.
19411  */
19412 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19413 {
19414 	struct bpf_map *map;
19415 	struct btf *btf;
19416 	CLASS(fd, f)(fd);
19417 	int err;
19418 
19419 	map = __bpf_map_get(f);
19420 	if (!IS_ERR(map)) {
19421 		err = __add_used_map(env, map);
19422 		if (err < 0)
19423 			return err;
19424 		return 0;
19425 	}
19426 
19427 	btf = __btf_get_by_fd(f);
19428 	if (!IS_ERR(btf)) {
19429 		btf_get(btf);
19430 		return __add_used_btf(env, btf);
19431 	}
19432 
19433 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19434 	return PTR_ERR(map);
19435 }
19436 
19437 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19438 {
19439 	size_t size = sizeof(int);
19440 	int ret;
19441 	int fd;
19442 	u32 i;
19443 
19444 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19445 
19446 	/*
19447 	 * The only difference between old (no fd_array_cnt is given) and new
19448 	 * APIs is that in the latter case the fd_array is expected to be
19449 	 * continuous and is scanned for map fds right away
19450 	 */
19451 	if (!attr->fd_array_cnt)
19452 		return 0;
19453 
19454 	/* Check for integer overflow */
19455 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19456 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19457 		return -EINVAL;
19458 	}
19459 
19460 	for (i = 0; i < attr->fd_array_cnt; i++) {
19461 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19462 			return -EFAULT;
19463 
19464 		ret = add_fd_from_fd_array(env, fd);
19465 		if (ret)
19466 			return ret;
19467 	}
19468 
19469 	return 0;
19470 }
19471 
19472 /* replace a generic kfunc with a specialized version if necessary */
19473 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19474 {
19475 	struct bpf_prog *prog = env->prog;
19476 	bool seen_direct_write;
19477 	void *xdp_kfunc;
19478 	bool is_rdonly;
19479 	u32 func_id = desc->func_id;
19480 	u16 offset = desc->offset;
19481 	unsigned long addr = desc->addr;
19482 
19483 	if (offset) /* return if module BTF is used */
19484 		return 0;
19485 
19486 	if (bpf_dev_bound_kfunc_id(func_id)) {
19487 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19488 		if (xdp_kfunc)
19489 			addr = (unsigned long)xdp_kfunc;
19490 		/* fallback to default kfunc when not supported by netdev */
19491 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19492 		seen_direct_write = env->seen_direct_write;
19493 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19494 
19495 		if (is_rdonly)
19496 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19497 
19498 		/* restore env->seen_direct_write to its original value, since
19499 		 * may_access_direct_pkt_data mutates it
19500 		 */
19501 		env->seen_direct_write = seen_direct_write;
19502 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19503 		if (bpf_lsm_has_d_inode_locked(prog))
19504 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19505 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19506 		if (bpf_lsm_has_d_inode_locked(prog))
19507 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19508 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19509 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19510 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19511 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19512 		if (env->insn_aux_data[insn_idx].non_sleepable)
19513 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19514 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19515 		if (env->insn_aux_data[insn_idx].non_sleepable)
19516 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19517 	}
19518 	desc->addr = addr;
19519 	return 0;
19520 }
19521 
19522 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19523 					    u16 struct_meta_reg,
19524 					    u16 node_offset_reg,
19525 					    struct bpf_insn *insn,
19526 					    struct bpf_insn *insn_buf,
19527 					    int *cnt)
19528 {
19529 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19530 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19531 
19532 	insn_buf[0] = addr[0];
19533 	insn_buf[1] = addr[1];
19534 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19535 	insn_buf[3] = *insn;
19536 	*cnt = 4;
19537 }
19538 
19539 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19540 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19541 {
19542 	struct bpf_kfunc_desc *desc;
19543 	int err;
19544 
19545 	if (!insn->imm) {
19546 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19547 		return -EINVAL;
19548 	}
19549 
19550 	*cnt = 0;
19551 
19552 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19553 	 * __bpf_call_base, unless the JIT needs to call functions that are
19554 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19555 	 */
19556 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19557 	if (!desc) {
19558 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19559 			     insn->imm);
19560 		return -EFAULT;
19561 	}
19562 
19563 	err = specialize_kfunc(env, desc, insn_idx);
19564 	if (err)
19565 		return err;
19566 
19567 	if (!bpf_jit_supports_far_kfunc_call())
19568 		insn->imm = BPF_CALL_IMM(desc->addr);
19569 
19570 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19571 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19572 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19573 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19574 
19575 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19576 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19577 				     insn_idx);
19578 			return -EFAULT;
19579 		}
19580 
19581 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19582 		insn_buf[1] = addr[0];
19583 		insn_buf[2] = addr[1];
19584 		insn_buf[3] = *insn;
19585 		*cnt = 4;
19586 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19587 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19588 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19589 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19590 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19591 
19592 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19593 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19594 				     insn_idx);
19595 			return -EFAULT;
19596 		}
19597 
19598 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19599 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19600 				     insn_idx);
19601 			return -EFAULT;
19602 		}
19603 
19604 		insn_buf[0] = addr[0];
19605 		insn_buf[1] = addr[1];
19606 		insn_buf[2] = *insn;
19607 		*cnt = 3;
19608 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19609 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19610 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19611 		int struct_meta_reg = BPF_REG_3;
19612 		int node_offset_reg = BPF_REG_4;
19613 
19614 		/* list_add/rbtree_add have an extra arg (prev/less),
19615 		 * so args-to-fixup are in diff regs.
19616 		 */
19617 		if (desc->func_id == special_kfunc_list[KF_bpf_list_add] ||
19618 		    is_bpf_rbtree_add_kfunc(desc->func_id)) {
19619 			struct_meta_reg = BPF_REG_4;
19620 			node_offset_reg = BPF_REG_5;
19621 		}
19622 
19623 		if (!kptr_struct_meta) {
19624 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19625 				     insn_idx);
19626 			return -EFAULT;
19627 		}
19628 
19629 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19630 						node_offset_reg, insn, insn_buf, cnt);
19631 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19632 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19633 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19634 		*cnt = 1;
19635 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19636 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19637 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19638 
19639 		/*
19640 		 * inline the bpf_session_is_return() for fsession:
19641 		 *   bool bpf_session_is_return(void *ctx)
19642 		 *   {
19643 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19644 		 *   }
19645 		 */
19646 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19647 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19648 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19649 		*cnt = 3;
19650 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19651 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19652 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19653 		/*
19654 		 * inline bpf_session_cookie() for fsession:
19655 		 *   __u64 *bpf_session_cookie(void *ctx)
19656 		 *   {
19657 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19658 		 *       return &((u64 *)ctx)[-off];
19659 		 *   }
19660 		 */
19661 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19662 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19663 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19664 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19665 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19666 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19667 		*cnt = 6;
19668 	}
19669 
19670 	if (env->insn_aux_data[insn_idx].arg_prog) {
19671 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19672 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19673 		int idx = *cnt;
19674 
19675 		insn_buf[idx++] = ld_addrs[0];
19676 		insn_buf[idx++] = ld_addrs[1];
19677 		insn_buf[idx++] = *insn;
19678 		*cnt = idx;
19679 	}
19680 	return 0;
19681 }
19682 
19683 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
19684 	      struct bpf_log_attr *attr_log)
19685 {
19686 	u64 start_time = ktime_get_ns();
19687 	struct bpf_verifier_env *env;
19688 	int i, len, ret = -EINVAL, err;
19689 	bool is_priv;
19690 
19691 	BTF_TYPE_EMIT(enum bpf_features);
19692 
19693 	/* no program is valid */
19694 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19695 		return -EINVAL;
19696 
19697 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19698 	 * allocate/free it every time bpf_check() is called
19699 	 */
19700 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19701 	if (!env)
19702 		return -ENOMEM;
19703 
19704 	env->bt.env = env;
19705 
19706 	len = (*prog)->len;
19707 	env->insn_aux_data =
19708 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19709 	ret = -ENOMEM;
19710 	if (!env->insn_aux_data)
19711 		goto err_free_env;
19712 	for (i = 0; i < len; i++)
19713 		env->insn_aux_data[i].orig_idx = i;
19714 	env->succ = bpf_iarray_realloc(NULL, 2);
19715 	if (!env->succ)
19716 		goto err_free_env;
19717 	env->prog = *prog;
19718 	env->ops = bpf_verifier_ops[env->prog->type];
19719 
19720 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19721 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19722 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19723 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19724 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19725 
19726 	bpf_get_btf_vmlinux();
19727 
19728 	/* grab the mutex to protect few globals used by verifier */
19729 	if (!is_priv)
19730 		mutex_lock(&bpf_verifier_lock);
19731 
19732 	/* user could have requested verbose verifier output
19733 	 * and supplied buffer to store the verification trace
19734 	 */
19735 	ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size);
19736 	if (ret)
19737 		goto err_unlock;
19738 
19739 	ret = process_fd_array(env, attr, uattr);
19740 	if (ret)
19741 		goto skip_full_check;
19742 
19743 	mark_verifier_state_clean(env);
19744 
19745 	if (IS_ERR(btf_vmlinux)) {
19746 		/* Either gcc or pahole or kernel are broken. */
19747 		verbose(env, "in-kernel BTF is malformed\n");
19748 		ret = PTR_ERR(btf_vmlinux);
19749 		goto skip_full_check;
19750 	}
19751 
19752 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19753 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19754 		env->strict_alignment = true;
19755 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19756 		env->strict_alignment = false;
19757 
19758 	if (is_priv)
19759 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19760 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19761 
19762 	env->explored_states = kvzalloc_objs(struct list_head,
19763 					     state_htab_size(env),
19764 					     GFP_KERNEL_ACCOUNT);
19765 	ret = -ENOMEM;
19766 	if (!env->explored_states)
19767 		goto skip_full_check;
19768 
19769 	for (i = 0; i < state_htab_size(env); i++)
19770 		INIT_LIST_HEAD(&env->explored_states[i]);
19771 	INIT_LIST_HEAD(&env->free_list);
19772 
19773 	ret = bpf_check_btf_info_early(env, attr, uattr);
19774 	if (ret < 0)
19775 		goto skip_full_check;
19776 
19777 	ret = add_subprog_and_kfunc(env);
19778 	if (ret < 0)
19779 		goto skip_full_check;
19780 
19781 	ret = check_subprogs(env);
19782 	if (ret < 0)
19783 		goto skip_full_check;
19784 
19785 	ret = bpf_check_btf_info(env, attr, uattr);
19786 	if (ret < 0)
19787 		goto skip_full_check;
19788 
19789 	ret = check_and_resolve_insns(env);
19790 	if (ret < 0)
19791 		goto skip_full_check;
19792 
19793 	if (bpf_prog_is_offloaded(env->prog->aux)) {
19794 		ret = bpf_prog_offload_verifier_prep(env->prog);
19795 		if (ret)
19796 			goto skip_full_check;
19797 	}
19798 
19799 	ret = bpf_check_cfg(env);
19800 	if (ret < 0)
19801 		goto skip_full_check;
19802 
19803 	ret = bpf_compute_postorder(env);
19804 	if (ret < 0)
19805 		goto skip_full_check;
19806 
19807 	ret = bpf_stack_liveness_init(env);
19808 	if (ret)
19809 		goto skip_full_check;
19810 
19811 	ret = check_attach_btf_id(env);
19812 	if (ret)
19813 		goto skip_full_check;
19814 
19815 	ret = bpf_compute_const_regs(env);
19816 	if (ret < 0)
19817 		goto skip_full_check;
19818 
19819 	ret = bpf_prune_dead_branches(env);
19820 	if (ret < 0)
19821 		goto skip_full_check;
19822 
19823 	ret = sort_subprogs_topo(env);
19824 	if (ret < 0)
19825 		goto skip_full_check;
19826 
19827 	ret = bpf_compute_scc(env);
19828 	if (ret < 0)
19829 		goto skip_full_check;
19830 
19831 	ret = bpf_compute_live_registers(env);
19832 	if (ret < 0)
19833 		goto skip_full_check;
19834 
19835 	ret = mark_fastcall_patterns(env);
19836 	if (ret < 0)
19837 		goto skip_full_check;
19838 
19839 	ret = do_check_main(env);
19840 	ret = ret ?: do_check_subprogs(env);
19841 
19842 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
19843 		ret = bpf_prog_offload_finalize(env);
19844 
19845 skip_full_check:
19846 	kvfree(env->explored_states);
19847 
19848 	/* might decrease stack depth, keep it before passes that
19849 	 * allocate additional slots.
19850 	 */
19851 	if (ret == 0)
19852 		ret = bpf_remove_fastcall_spills_fills(env);
19853 
19854 	if (ret == 0)
19855 		ret = check_max_stack_depth(env);
19856 
19857 	/* instruction rewrites happen after this point */
19858 	if (ret == 0)
19859 		ret = bpf_optimize_bpf_loop(env);
19860 
19861 	if (is_priv) {
19862 		if (ret == 0)
19863 			bpf_opt_hard_wire_dead_code_branches(env);
19864 		if (ret == 0)
19865 			ret = bpf_opt_remove_dead_code(env);
19866 		if (ret == 0)
19867 			ret = bpf_opt_remove_nops(env);
19868 	} else {
19869 		if (ret == 0)
19870 			sanitize_dead_code(env);
19871 	}
19872 
19873 	if (ret == 0)
19874 		/* program is valid, convert *(u32*)(ctx + off) accesses */
19875 		ret = bpf_convert_ctx_accesses(env);
19876 
19877 	if (ret == 0)
19878 		ret = bpf_do_misc_fixups(env);
19879 
19880 	/* do 32-bit optimization after insn patching has done so those patched
19881 	 * insns could be handled correctly.
19882 	 */
19883 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
19884 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
19885 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
19886 								     : false;
19887 	}
19888 
19889 	if (ret == 0)
19890 		ret = bpf_fixup_call_args(env);
19891 
19892 	env->verification_time = ktime_get_ns() - start_time;
19893 	print_verification_stats(env);
19894 	env->prog->aux->verified_insns = env->insn_processed;
19895 
19896 	/* preserve original error even if log finalization is successful */
19897 	err = bpf_log_attr_finalize(attr_log, &env->log);
19898 	if (err)
19899 		ret = err;
19900 
19901 	if (ret)
19902 		goto err_release_maps;
19903 
19904 	if (env->used_map_cnt) {
19905 		/* if program passed verifier, update used_maps in bpf_prog_info */
19906 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
19907 							 env->used_map_cnt,
19908 							 GFP_KERNEL_ACCOUNT);
19909 
19910 		if (!env->prog->aux->used_maps) {
19911 			ret = -ENOMEM;
19912 			goto err_release_maps;
19913 		}
19914 
19915 		memcpy(env->prog->aux->used_maps, env->used_maps,
19916 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
19917 		env->prog->aux->used_map_cnt = env->used_map_cnt;
19918 	}
19919 	if (env->used_btf_cnt) {
19920 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
19921 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
19922 							 env->used_btf_cnt,
19923 							 GFP_KERNEL_ACCOUNT);
19924 		if (!env->prog->aux->used_btfs) {
19925 			ret = -ENOMEM;
19926 			goto err_release_maps;
19927 		}
19928 
19929 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
19930 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
19931 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
19932 	}
19933 	if (env->used_map_cnt || env->used_btf_cnt) {
19934 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
19935 		 * bpf_ld_imm64 instructions
19936 		 */
19937 		convert_pseudo_ld_imm64(env);
19938 	}
19939 
19940 	adjust_btf_func(env);
19941 
19942 	/* extension progs temporarily inherit the attach_type of their targets
19943 	   for verification purposes, so set it back to zero before returning
19944 	 */
19945 	if (env->prog->type == BPF_PROG_TYPE_EXT)
19946 		env->prog->expected_attach_type = 0;
19947 
19948 	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
19949 
19950 err_release_maps:
19951 	if (ret)
19952 		release_insn_arrays(env);
19953 	if (!env->prog->aux->used_maps)
19954 		/* if we didn't copy map pointers into bpf_prog_info, release
19955 		 * them now. Otherwise free_used_maps() will release them.
19956 		 */
19957 		release_maps(env);
19958 	if (!env->prog->aux->used_btfs)
19959 		release_btfs(env);
19960 
19961 	*prog = env->prog;
19962 
19963 	module_put(env->attach_btf_mod);
19964 err_unlock:
19965 	if (!is_priv)
19966 		mutex_unlock(&bpf_verifier_lock);
19967 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
19968 	vfree(env->insn_aux_data);
19969 err_free_env:
19970 	bpf_stack_liveness_free(env);
19971 	kvfree(env->cfg.insn_postorder);
19972 	kvfree(env->scc_info);
19973 	kvfree(env->succ);
19974 	kvfree(env->gotox_tmp_buf);
19975 	kvfree(env);
19976 	return ret;
19977 }
19978