xref: /linux/kernel/bpf/verifier.c (revision 4d0a375887ab4d49e4da1ff10f9606cab8f7c3ad)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
30 #include <net/xdp.h>
31 #include <linux/trace_events.h>
32 #include <linux/kallsyms.h>
33 
34 #include "disasm.h"
35 
36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
38 	[_id] = & _name ## _verifier_ops,
39 #define BPF_MAP_TYPE(_id, _ops)
40 #define BPF_LINK_TYPE(_id, _name)
41 #include <linux/bpf_types.h>
42 #undef BPF_PROG_TYPE
43 #undef BPF_MAP_TYPE
44 #undef BPF_LINK_TYPE
45 };
46 
47 enum bpf_features {
48 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
49 	BPF_FEAT_STREAMS	     = 1,
50 	__MAX_BPF_FEAT,
51 };
52 
53 struct bpf_mem_alloc bpf_global_percpu_ma;
54 static bool bpf_global_percpu_ma_set;
55 
56 /* bpf_check() is a static code analyzer that walks eBPF program
57  * instruction by instruction and updates register/stack state.
58  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
59  *
60  * The first pass is depth-first-search to check that the program is a DAG.
61  * It rejects the following programs:
62  * - larger than BPF_MAXINSNS insns
63  * - if loop is present (detected via back-edge)
64  * - unreachable insns exist (shouldn't be a forest. program = one function)
65  * - out of bounds or malformed jumps
66  * The second pass is all possible path descent from the 1st insn.
67  * Since it's analyzing all paths through the program, the length of the
68  * analysis is limited to 64k insn, which may be hit even if total number of
69  * insn is less then 4K, but there are too many branches that change stack/regs.
70  * Number of 'branches to be analyzed' is limited to 1k
71  *
72  * On entry to each instruction, each register has a type, and the instruction
73  * changes the types of the registers depending on instruction semantics.
74  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
75  * copied to R1.
76  *
77  * All registers are 64-bit.
78  * R0 - return register
79  * R1-R5 argument passing registers
80  * R6-R9 callee saved registers
81  * R10 - frame pointer read-only
82  *
83  * At the start of BPF program the register R1 contains a pointer to bpf_context
84  * and has type PTR_TO_CTX.
85  *
86  * Verifier tracks arithmetic operations on pointers in case:
87  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
88  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
89  * 1st insn copies R10 (which has FRAME_PTR) type into R1
90  * and 2nd arithmetic instruction is pattern matched to recognize
91  * that it wants to construct a pointer to some element within stack.
92  * So after 2nd insn, the register R1 has type PTR_TO_STACK
93  * (and -20 constant is saved for further stack bounds checking).
94  * Meaning that this reg is a pointer to stack plus known immediate constant.
95  *
96  * Most of the time the registers have SCALAR_VALUE type, which
97  * means the register has some value, but it's not a valid pointer.
98  * (like pointer plus pointer becomes SCALAR_VALUE type)
99  *
100  * When verifier sees load or store instructions the type of base register
101  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
102  * four pointer types recognized by check_mem_access() function.
103  *
104  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
105  * and the range of [ptr, ptr + map's value_size) is accessible.
106  *
107  * registers used to pass values to function calls are checked against
108  * function argument constraints.
109  *
110  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
111  * It means that the register type passed to this function must be
112  * PTR_TO_STACK and it will be used inside the function as
113  * 'pointer to map element key'
114  *
115  * For example the argument constraints for bpf_map_lookup_elem():
116  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
117  *   .arg1_type = ARG_CONST_MAP_PTR,
118  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
119  *
120  * ret_type says that this function returns 'pointer to map elem value or null'
121  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
122  * 2nd argument should be a pointer to stack, which will be used inside
123  * the helper function as a pointer to map element key.
124  *
125  * On the kernel side the helper function looks like:
126  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
127  * {
128  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
129  *    void *key = (void *) (unsigned long) r2;
130  *    void *value;
131  *
132  *    here kernel can access 'key' and 'map' pointers safely, knowing that
133  *    [key, key + map->key_size) bytes are valid and were initialized on
134  *    the stack of eBPF program.
135  * }
136  *
137  * Corresponding eBPF program may look like:
138  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
139  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
140  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
141  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
142  * here verifier looks at prototype of map_lookup_elem() and sees:
143  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
144  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
145  *
146  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
147  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
148  * and were initialized prior to this call.
149  * If it's ok, then verifier allows this BPF_CALL insn and looks at
150  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
151  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
152  * returns either pointer to map value or NULL.
153  *
154  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
155  * insn, the register holding that pointer in the true branch changes state to
156  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
157  * branch. See check_cond_jmp_op().
158  *
159  * After the call R0 is set to return type of the function and registers R1-R5
160  * are set to NOT_INIT to indicate that they are no longer readable.
161  *
162  * The following reference types represent a potential reference to a kernel
163  * resource which, after first being allocated, must be checked and freed by
164  * the BPF program:
165  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
166  *
167  * When the verifier sees a helper call return a reference type, it allocates a
168  * pointer id for the reference and stores it in the current function state.
169  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
170  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
171  * passes through a NULL-check conditional. For the branch wherein the state is
172  * changed to CONST_IMM, the verifier releases the reference.
173  *
174  * For each helper function that allocates a reference, such as
175  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
176  * bpf_sk_release(). When a reference type passes into the release function,
177  * the verifier also releases the reference. If any unchecked or unreleased
178  * reference remains at the end of the program, the verifier rejects it.
179  */
180 
181 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
182 struct bpf_verifier_stack_elem {
183 	/* verifier state is 'st'
184 	 * before processing instruction 'insn_idx'
185 	 * and after processing instruction 'prev_insn_idx'
186 	 */
187 	struct bpf_verifier_state st;
188 	int insn_idx;
189 	int prev_insn_idx;
190 	struct bpf_verifier_stack_elem *next;
191 	/* length of verifier log at the time this state was pushed on stack */
192 	u32 log_pos;
193 };
194 
195 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
196 #define BPF_COMPLEXITY_LIMIT_STATES	64
197 
198 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
199 
200 #define BPF_PRIV_STACK_MIN_SIZE		64
201 
202 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
203 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
204 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
205 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
206 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
207 static int ref_set_non_owning(struct bpf_verifier_env *env,
208 			      struct bpf_reg_state *reg);
209 static bool is_trusted_reg(const struct bpf_reg_state *reg);
210 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
211 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
212 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
213 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
214 
215 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
216 			      struct bpf_map *map,
217 			      bool unpriv, bool poison)
218 {
219 	unpriv |= bpf_map_ptr_unpriv(aux);
220 	aux->map_ptr_state.unpriv = unpriv;
221 	aux->map_ptr_state.poison = poison;
222 	aux->map_ptr_state.map_ptr = map;
223 }
224 
225 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
226 {
227 	bool poisoned = bpf_map_key_poisoned(aux);
228 
229 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
230 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
231 }
232 
233 struct bpf_call_arg_meta {
234 	struct bpf_map_desc map;
235 	bool raw_mode;
236 	bool pkt_access;
237 	u8 release_regno;
238 	int regno;
239 	int access_size;
240 	int mem_size;
241 	u64 msize_max_value;
242 	int ref_obj_id;
243 	int dynptr_id;
244 	int func_id;
245 	struct btf *btf;
246 	u32 btf_id;
247 	struct btf *ret_btf;
248 	u32 ret_btf_id;
249 	u32 subprogno;
250 	struct btf_field *kptr_field;
251 	s64 const_map_key;
252 };
253 
254 struct bpf_kfunc_meta {
255 	struct btf *btf;
256 	const struct btf_type *proto;
257 	const char *name;
258 	const u32 *flags;
259 	s32 id;
260 };
261 
262 struct btf *btf_vmlinux;
263 
264 static const char *btf_type_name(const struct btf *btf, u32 id)
265 {
266 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
267 }
268 
269 static DEFINE_MUTEX(bpf_verifier_lock);
270 static DEFINE_MUTEX(bpf_percpu_ma_lock);
271 
272 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
273 {
274 	struct bpf_verifier_env *env = private_data;
275 	va_list args;
276 
277 	if (!bpf_verifier_log_needed(&env->log))
278 		return;
279 
280 	va_start(args, fmt);
281 	bpf_verifier_vlog(&env->log, fmt, args);
282 	va_end(args);
283 }
284 
285 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
286 				   struct bpf_reg_state *reg,
287 				   struct bpf_retval_range range, const char *ctx,
288 				   const char *reg_name)
289 {
290 	bool unknown = true;
291 
292 	verbose(env, "%s the register %s has", ctx, reg_name);
293 	if (reg->smin_value > S64_MIN) {
294 		verbose(env, " smin=%lld", reg->smin_value);
295 		unknown = false;
296 	}
297 	if (reg->smax_value < S64_MAX) {
298 		verbose(env, " smax=%lld", reg->smax_value);
299 		unknown = false;
300 	}
301 	if (unknown)
302 		verbose(env, " unknown scalar value");
303 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
304 }
305 
306 static bool reg_not_null(const struct bpf_reg_state *reg)
307 {
308 	enum bpf_reg_type type;
309 
310 	type = reg->type;
311 	if (type_may_be_null(type))
312 		return false;
313 
314 	type = base_type(type);
315 	return type == PTR_TO_SOCKET ||
316 		type == PTR_TO_TCP_SOCK ||
317 		type == PTR_TO_MAP_VALUE ||
318 		type == PTR_TO_MAP_KEY ||
319 		type == PTR_TO_SOCK_COMMON ||
320 		(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
321 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
322 		type == CONST_PTR_TO_MAP;
323 }
324 
325 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
326 {
327 	struct btf_record *rec = NULL;
328 	struct btf_struct_meta *meta;
329 
330 	if (reg->type == PTR_TO_MAP_VALUE) {
331 		rec = reg->map_ptr->record;
332 	} else if (type_is_ptr_alloc_obj(reg->type)) {
333 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
334 		if (meta)
335 			rec = meta->record;
336 	}
337 	return rec;
338 }
339 
340 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
341 {
342 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
343 
344 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
345 }
346 
347 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
348 {
349 	const struct btf_type *type, *func, *func_proto;
350 	const struct btf *btf = env->prog->aux->btf;
351 	u32 btf_id;
352 
353 	btf_id = env->prog->aux->func_info[subprog].type_id;
354 
355 	func = btf_type_by_id(btf, btf_id);
356 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
357 		return false;
358 
359 	func_proto = btf_type_by_id(btf, func->type);
360 	if (!func_proto)
361 		return false;
362 
363 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
364 	if (!type)
365 		return false;
366 
367 	return btf_type_is_void(type);
368 }
369 
370 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
371 {
372 	struct bpf_func_info *info;
373 
374 	if (!env->prog->aux->func_info)
375 		return "";
376 
377 	info = &env->prog->aux->func_info[subprog];
378 	return btf_type_name(env->prog->aux->btf, info->type_id);
379 }
380 
381 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
382 {
383 	struct bpf_subprog_info *info = subprog_info(env, subprog);
384 
385 	info->is_cb = true;
386 	info->is_async_cb = true;
387 	info->is_exception_cb = true;
388 }
389 
390 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
391 {
392 	return subprog_info(env, subprog)->is_exception_cb;
393 }
394 
395 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
396 {
397 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
398 }
399 
400 static bool type_is_rdonly_mem(u32 type)
401 {
402 	return type & MEM_RDONLY;
403 }
404 
405 static bool is_acquire_function(enum bpf_func_id func_id,
406 				const struct bpf_map *map)
407 {
408 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
409 
410 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
411 	    func_id == BPF_FUNC_sk_lookup_udp ||
412 	    func_id == BPF_FUNC_skc_lookup_tcp ||
413 	    func_id == BPF_FUNC_ringbuf_reserve ||
414 	    func_id == BPF_FUNC_kptr_xchg)
415 		return true;
416 
417 	if (func_id == BPF_FUNC_map_lookup_elem &&
418 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
419 	     map_type == BPF_MAP_TYPE_SOCKHASH))
420 		return true;
421 
422 	return false;
423 }
424 
425 static bool is_ptr_cast_function(enum bpf_func_id func_id)
426 {
427 	return func_id == BPF_FUNC_tcp_sock ||
428 		func_id == BPF_FUNC_sk_fullsock ||
429 		func_id == BPF_FUNC_skc_to_tcp_sock ||
430 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
431 		func_id == BPF_FUNC_skc_to_udp6_sock ||
432 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
433 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
434 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
435 }
436 
437 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
438 {
439 	return func_id == BPF_FUNC_dynptr_data;
440 }
441 
442 static bool is_sync_callback_calling_kfunc(u32 btf_id);
443 static bool is_async_callback_calling_kfunc(u32 btf_id);
444 static bool is_callback_calling_kfunc(u32 btf_id);
445 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
446 
447 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
448 static bool is_task_work_add_kfunc(u32 func_id);
449 
450 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
451 {
452 	return func_id == BPF_FUNC_for_each_map_elem ||
453 	       func_id == BPF_FUNC_find_vma ||
454 	       func_id == BPF_FUNC_loop ||
455 	       func_id == BPF_FUNC_user_ringbuf_drain;
456 }
457 
458 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
459 {
460 	return func_id == BPF_FUNC_timer_set_callback;
461 }
462 
463 static bool is_callback_calling_function(enum bpf_func_id func_id)
464 {
465 	return is_sync_callback_calling_function(func_id) ||
466 	       is_async_callback_calling_function(func_id);
467 }
468 
469 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
470 {
471 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
472 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
473 }
474 
475 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
476 {
477 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
478 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
479 }
480 
481 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
482 {
483 	/* bpf_timer callbacks are never sleepable. */
484 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
485 		return false;
486 
487 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
488 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
489 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
490 		return true;
491 
492 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
493 	return false;
494 }
495 
496 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
497 {
498 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
499 }
500 
501 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
502 					const struct bpf_map *map)
503 {
504 	int ref_obj_uses = 0;
505 
506 	if (is_ptr_cast_function(func_id))
507 		ref_obj_uses++;
508 	if (is_acquire_function(func_id, map))
509 		ref_obj_uses++;
510 	if (is_dynptr_ref_function(func_id))
511 		ref_obj_uses++;
512 
513 	return ref_obj_uses > 1;
514 }
515 
516 
517 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
518 {
519        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
520 
521        /* We need to check that slots between [spi - nr_slots + 1, spi] are
522 	* within [0, allocated_stack).
523 	*
524 	* Please note that the spi grows downwards. For example, a dynptr
525 	* takes the size of two stack slots; the first slot will be at
526 	* spi and the second slot will be at spi - 1.
527 	*/
528        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
529 }
530 
531 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
532 			          const char *obj_kind, int nr_slots)
533 {
534 	int off, spi;
535 
536 	if (!tnum_is_const(reg->var_off)) {
537 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
538 		return -EINVAL;
539 	}
540 
541 	off = reg->var_off.value;
542 	if (off % BPF_REG_SIZE) {
543 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
544 		return -EINVAL;
545 	}
546 
547 	spi = bpf_get_spi(off);
548 	if (spi + 1 < nr_slots) {
549 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
550 		return -EINVAL;
551 	}
552 
553 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
554 		return -ERANGE;
555 	return spi;
556 }
557 
558 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
559 {
560 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
561 }
562 
563 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
564 {
565 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
566 }
567 
568 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
569 {
570 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
571 }
572 
573 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
574 {
575 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
576 	case DYNPTR_TYPE_LOCAL:
577 		return BPF_DYNPTR_TYPE_LOCAL;
578 	case DYNPTR_TYPE_RINGBUF:
579 		return BPF_DYNPTR_TYPE_RINGBUF;
580 	case DYNPTR_TYPE_SKB:
581 		return BPF_DYNPTR_TYPE_SKB;
582 	case DYNPTR_TYPE_XDP:
583 		return BPF_DYNPTR_TYPE_XDP;
584 	case DYNPTR_TYPE_SKB_META:
585 		return BPF_DYNPTR_TYPE_SKB_META;
586 	case DYNPTR_TYPE_FILE:
587 		return BPF_DYNPTR_TYPE_FILE;
588 	default:
589 		return BPF_DYNPTR_TYPE_INVALID;
590 	}
591 }
592 
593 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
594 {
595 	switch (type) {
596 	case BPF_DYNPTR_TYPE_LOCAL:
597 		return DYNPTR_TYPE_LOCAL;
598 	case BPF_DYNPTR_TYPE_RINGBUF:
599 		return DYNPTR_TYPE_RINGBUF;
600 	case BPF_DYNPTR_TYPE_SKB:
601 		return DYNPTR_TYPE_SKB;
602 	case BPF_DYNPTR_TYPE_XDP:
603 		return DYNPTR_TYPE_XDP;
604 	case BPF_DYNPTR_TYPE_SKB_META:
605 		return DYNPTR_TYPE_SKB_META;
606 	case BPF_DYNPTR_TYPE_FILE:
607 		return DYNPTR_TYPE_FILE;
608 	default:
609 		return 0;
610 	}
611 }
612 
613 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
614 {
615 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
616 }
617 
618 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
619 			      enum bpf_dynptr_type type,
620 			      bool first_slot, int dynptr_id);
621 
622 
623 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
624 				   struct bpf_reg_state *sreg1,
625 				   struct bpf_reg_state *sreg2,
626 				   enum bpf_dynptr_type type)
627 {
628 	int id = ++env->id_gen;
629 
630 	__mark_dynptr_reg(sreg1, type, true, id);
631 	__mark_dynptr_reg(sreg2, type, false, id);
632 }
633 
634 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
635 			       struct bpf_reg_state *reg,
636 			       enum bpf_dynptr_type type)
637 {
638 	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
639 }
640 
641 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
642 				        struct bpf_func_state *state, int spi);
643 
644 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
645 				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
646 {
647 	struct bpf_func_state *state = bpf_func(env, reg);
648 	enum bpf_dynptr_type type;
649 	int spi, i, err;
650 
651 	spi = dynptr_get_spi(env, reg);
652 	if (spi < 0)
653 		return spi;
654 
655 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
656 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
657 	 * to ensure that for the following example:
658 	 *	[d1][d1][d2][d2]
659 	 * spi    3   2   1   0
660 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
661 	 * case they do belong to same dynptr, second call won't see slot_type
662 	 * as STACK_DYNPTR and will simply skip destruction.
663 	 */
664 	err = destroy_if_dynptr_stack_slot(env, state, spi);
665 	if (err)
666 		return err;
667 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
668 	if (err)
669 		return err;
670 
671 	for (i = 0; i < BPF_REG_SIZE; i++) {
672 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
673 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
674 	}
675 
676 	type = arg_to_dynptr_type(arg_type);
677 	if (type == BPF_DYNPTR_TYPE_INVALID)
678 		return -EINVAL;
679 
680 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
681 			       &state->stack[spi - 1].spilled_ptr, type);
682 
683 	if (dynptr_type_refcounted(type)) {
684 		/* The id is used to track proper releasing */
685 		int id;
686 
687 		if (clone_ref_obj_id)
688 			id = clone_ref_obj_id;
689 		else
690 			id = acquire_reference(env, insn_idx);
691 
692 		if (id < 0)
693 			return id;
694 
695 		state->stack[spi].spilled_ptr.ref_obj_id = id;
696 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
697 	}
698 
699 	return 0;
700 }
701 
702 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
703 {
704 	int i;
705 
706 	for (i = 0; i < BPF_REG_SIZE; i++) {
707 		state->stack[spi].slot_type[i] = STACK_INVALID;
708 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
709 	}
710 
711 	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
712 	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
713 }
714 
715 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
716 {
717 	struct bpf_func_state *state = bpf_func(env, reg);
718 	int spi, ref_obj_id, i;
719 
720 	/*
721 	 * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
722 	 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
723 	 * is safe to do directly.
724 	 */
725 	if (reg->type == CONST_PTR_TO_DYNPTR) {
726 		verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
727 		return -EFAULT;
728 	}
729 	spi = dynptr_get_spi(env, reg);
730 	if (spi < 0)
731 		return spi;
732 
733 	if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
734 		invalidate_dynptr(env, state, spi);
735 		return 0;
736 	}
737 
738 	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
739 
740 	/* If the dynptr has a ref_obj_id, then we need to invalidate
741 	 * two things:
742 	 *
743 	 * 1) Any dynptrs with a matching ref_obj_id (clones)
744 	 * 2) Any slices derived from this dynptr.
745 	 */
746 
747 	/* Invalidate any slices associated with this dynptr */
748 	WARN_ON_ONCE(release_reference(env, ref_obj_id));
749 
750 	/* Invalidate any dynptr clones */
751 	for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
752 		if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
753 			continue;
754 
755 		/* it should always be the case that if the ref obj id
756 		 * matches then the stack slot also belongs to a
757 		 * dynptr
758 		 */
759 		if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
760 			verifier_bug(env, "misconfigured ref_obj_id");
761 			return -EFAULT;
762 		}
763 		if (state->stack[i].spilled_ptr.dynptr.first_slot)
764 			invalidate_dynptr(env, state, i);
765 	}
766 
767 	return 0;
768 }
769 
770 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
771 			       struct bpf_reg_state *reg);
772 
773 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
774 {
775 	if (!env->allow_ptr_leaks)
776 		bpf_mark_reg_not_init(env, reg);
777 	else
778 		__mark_reg_unknown(env, reg);
779 }
780 
781 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
782 				        struct bpf_func_state *state, int spi)
783 {
784 	struct bpf_func_state *fstate;
785 	struct bpf_reg_state *dreg;
786 	int i, dynptr_id;
787 
788 	/* We always ensure that STACK_DYNPTR is never set partially,
789 	 * hence just checking for slot_type[0] is enough. This is
790 	 * different for STACK_SPILL, where it may be only set for
791 	 * 1 byte, so code has to use is_spilled_reg.
792 	 */
793 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
794 		return 0;
795 
796 	/* Reposition spi to first slot */
797 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
798 		spi = spi + 1;
799 
800 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
801 		int ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
802 		int ref_cnt = 0;
803 
804 		/*
805 		 * A referenced dynptr can be overwritten only if there is at
806 		 * least one other dynptr sharing the same ref_obj_id,
807 		 * ensuring the reference can still be properly released.
808 		 */
809 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
810 			if (state->stack[i].slot_type[0] != STACK_DYNPTR)
811 				continue;
812 			if (!state->stack[i].spilled_ptr.dynptr.first_slot)
813 				continue;
814 			if (state->stack[i].spilled_ptr.ref_obj_id == ref_obj_id)
815 				ref_cnt++;
816 		}
817 
818 		if (ref_cnt <= 1) {
819 			verbose(env, "cannot overwrite referenced dynptr\n");
820 			return -EINVAL;
821 		}
822 	}
823 
824 	mark_stack_slot_scratched(env, spi);
825 	mark_stack_slot_scratched(env, spi - 1);
826 
827 	/* Writing partially to one dynptr stack slot destroys both. */
828 	for (i = 0; i < BPF_REG_SIZE; i++) {
829 		state->stack[spi].slot_type[i] = STACK_INVALID;
830 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
831 	}
832 
833 	dynptr_id = state->stack[spi].spilled_ptr.id;
834 	/* Invalidate any slices associated with this dynptr */
835 	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
836 		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
837 		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
838 			continue;
839 		if (dreg->dynptr_id == dynptr_id)
840 			mark_reg_invalid(env, dreg);
841 	}));
842 
843 	/* Do not release reference state, we are destroying dynptr on stack,
844 	 * not using some helper to release it. Just reset register.
845 	 */
846 	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
847 	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
848 
849 	return 0;
850 }
851 
852 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
853 {
854 	int spi;
855 
856 	if (reg->type == CONST_PTR_TO_DYNPTR)
857 		return false;
858 
859 	spi = dynptr_get_spi(env, reg);
860 
861 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
862 	 * error because this just means the stack state hasn't been updated yet.
863 	 * We will do check_mem_access to check and update stack bounds later.
864 	 */
865 	if (spi < 0 && spi != -ERANGE)
866 		return false;
867 
868 	/* We don't need to check if the stack slots are marked by previous
869 	 * dynptr initializations because we allow overwriting existing unreferenced
870 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
871 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
872 	 * touching are completely destructed before we reinitialize them for a new
873 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
874 	 * instead of delaying it until the end where the user will get "Unreleased
875 	 * reference" error.
876 	 */
877 	return true;
878 }
879 
880 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
881 {
882 	struct bpf_func_state *state = bpf_func(env, reg);
883 	int i, spi;
884 
885 	/* This already represents first slot of initialized bpf_dynptr.
886 	 *
887 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
888 	 * check_func_arg_reg_off's logic, so we don't need to check its
889 	 * offset and alignment.
890 	 */
891 	if (reg->type == CONST_PTR_TO_DYNPTR)
892 		return true;
893 
894 	spi = dynptr_get_spi(env, reg);
895 	if (spi < 0)
896 		return false;
897 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
898 		return false;
899 
900 	for (i = 0; i < BPF_REG_SIZE; i++) {
901 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
902 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
903 			return false;
904 	}
905 
906 	return true;
907 }
908 
909 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
910 				    enum bpf_arg_type arg_type)
911 {
912 	struct bpf_func_state *state = bpf_func(env, reg);
913 	enum bpf_dynptr_type dynptr_type;
914 	int spi;
915 
916 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
917 	if (arg_type == ARG_PTR_TO_DYNPTR)
918 		return true;
919 
920 	dynptr_type = arg_to_dynptr_type(arg_type);
921 	if (reg->type == CONST_PTR_TO_DYNPTR) {
922 		return reg->dynptr.type == dynptr_type;
923 	} else {
924 		spi = dynptr_get_spi(env, reg);
925 		if (spi < 0)
926 			return false;
927 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
928 	}
929 }
930 
931 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
932 
933 static bool in_rcu_cs(struct bpf_verifier_env *env);
934 
935 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
936 
937 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
938 				 struct bpf_kfunc_call_arg_meta *meta,
939 				 struct bpf_reg_state *reg, int insn_idx,
940 				 struct btf *btf, u32 btf_id, int nr_slots)
941 {
942 	struct bpf_func_state *state = bpf_func(env, reg);
943 	int spi, i, j, id;
944 
945 	spi = iter_get_spi(env, reg, nr_slots);
946 	if (spi < 0)
947 		return spi;
948 
949 	id = acquire_reference(env, insn_idx);
950 	if (id < 0)
951 		return id;
952 
953 	for (i = 0; i < nr_slots; i++) {
954 		struct bpf_stack_state *slot = &state->stack[spi - i];
955 		struct bpf_reg_state *st = &slot->spilled_ptr;
956 
957 		__mark_reg_known_zero(st);
958 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
959 		if (is_kfunc_rcu_protected(meta)) {
960 			if (in_rcu_cs(env))
961 				st->type |= MEM_RCU;
962 			else
963 				st->type |= PTR_UNTRUSTED;
964 		}
965 		st->ref_obj_id = i == 0 ? id : 0;
966 		st->iter.btf = btf;
967 		st->iter.btf_id = btf_id;
968 		st->iter.state = BPF_ITER_STATE_ACTIVE;
969 		st->iter.depth = 0;
970 
971 		for (j = 0; j < BPF_REG_SIZE; j++)
972 			slot->slot_type[j] = STACK_ITER;
973 
974 		mark_stack_slot_scratched(env, spi - i);
975 	}
976 
977 	return 0;
978 }
979 
980 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
981 				   struct bpf_reg_state *reg, int nr_slots)
982 {
983 	struct bpf_func_state *state = bpf_func(env, reg);
984 	int spi, i, j;
985 
986 	spi = iter_get_spi(env, reg, nr_slots);
987 	if (spi < 0)
988 		return spi;
989 
990 	for (i = 0; i < nr_slots; i++) {
991 		struct bpf_stack_state *slot = &state->stack[spi - i];
992 		struct bpf_reg_state *st = &slot->spilled_ptr;
993 
994 		if (i == 0)
995 			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
996 
997 		bpf_mark_reg_not_init(env, st);
998 
999 		for (j = 0; j < BPF_REG_SIZE; j++)
1000 			slot->slot_type[j] = STACK_INVALID;
1001 
1002 		mark_stack_slot_scratched(env, spi - i);
1003 	}
1004 
1005 	return 0;
1006 }
1007 
1008 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1009 				     struct bpf_reg_state *reg, int nr_slots)
1010 {
1011 	struct bpf_func_state *state = bpf_func(env, reg);
1012 	int spi, i, j;
1013 
1014 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1015 	 * will do check_mem_access to check and update stack bounds later, so
1016 	 * return true for that case.
1017 	 */
1018 	spi = iter_get_spi(env, reg, nr_slots);
1019 	if (spi == -ERANGE)
1020 		return true;
1021 	if (spi < 0)
1022 		return false;
1023 
1024 	for (i = 0; i < nr_slots; i++) {
1025 		struct bpf_stack_state *slot = &state->stack[spi - i];
1026 
1027 		for (j = 0; j < BPF_REG_SIZE; j++)
1028 			if (slot->slot_type[j] == STACK_ITER)
1029 				return false;
1030 	}
1031 
1032 	return true;
1033 }
1034 
1035 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1036 				   struct btf *btf, u32 btf_id, int nr_slots)
1037 {
1038 	struct bpf_func_state *state = bpf_func(env, reg);
1039 	int spi, i, j;
1040 
1041 	spi = iter_get_spi(env, reg, nr_slots);
1042 	if (spi < 0)
1043 		return -EINVAL;
1044 
1045 	for (i = 0; i < nr_slots; i++) {
1046 		struct bpf_stack_state *slot = &state->stack[spi - i];
1047 		struct bpf_reg_state *st = &slot->spilled_ptr;
1048 
1049 		if (st->type & PTR_UNTRUSTED)
1050 			return -EPROTO;
1051 		/* only main (first) slot has ref_obj_id set */
1052 		if (i == 0 && !st->ref_obj_id)
1053 			return -EINVAL;
1054 		if (i != 0 && st->ref_obj_id)
1055 			return -EINVAL;
1056 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1057 			return -EINVAL;
1058 
1059 		for (j = 0; j < BPF_REG_SIZE; j++)
1060 			if (slot->slot_type[j] != STACK_ITER)
1061 				return -EINVAL;
1062 	}
1063 
1064 	return 0;
1065 }
1066 
1067 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1068 static int release_irq_state(struct bpf_verifier_state *state, int id);
1069 
1070 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1071 				     struct bpf_kfunc_call_arg_meta *meta,
1072 				     struct bpf_reg_state *reg, int insn_idx,
1073 				     int kfunc_class)
1074 {
1075 	struct bpf_func_state *state = bpf_func(env, reg);
1076 	struct bpf_stack_state *slot;
1077 	struct bpf_reg_state *st;
1078 	int spi, i, id;
1079 
1080 	spi = irq_flag_get_spi(env, reg);
1081 	if (spi < 0)
1082 		return spi;
1083 
1084 	id = acquire_irq_state(env, insn_idx);
1085 	if (id < 0)
1086 		return id;
1087 
1088 	slot = &state->stack[spi];
1089 	st = &slot->spilled_ptr;
1090 
1091 	__mark_reg_known_zero(st);
1092 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1093 	st->ref_obj_id = id;
1094 	st->irq.kfunc_class = kfunc_class;
1095 
1096 	for (i = 0; i < BPF_REG_SIZE; i++)
1097 		slot->slot_type[i] = STACK_IRQ_FLAG;
1098 
1099 	mark_stack_slot_scratched(env, spi);
1100 	return 0;
1101 }
1102 
1103 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1104 				      int kfunc_class)
1105 {
1106 	struct bpf_func_state *state = bpf_func(env, reg);
1107 	struct bpf_stack_state *slot;
1108 	struct bpf_reg_state *st;
1109 	int spi, i, err;
1110 
1111 	spi = irq_flag_get_spi(env, reg);
1112 	if (spi < 0)
1113 		return spi;
1114 
1115 	slot = &state->stack[spi];
1116 	st = &slot->spilled_ptr;
1117 
1118 	if (st->irq.kfunc_class != kfunc_class) {
1119 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1120 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1121 
1122 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1123 			flag_kfunc, used_kfunc);
1124 		return -EINVAL;
1125 	}
1126 
1127 	err = release_irq_state(env->cur_state, st->ref_obj_id);
1128 	WARN_ON_ONCE(err && err != -EACCES);
1129 	if (err) {
1130 		int insn_idx = 0;
1131 
1132 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1133 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1134 				insn_idx = env->cur_state->refs[i].insn_idx;
1135 				break;
1136 			}
1137 		}
1138 
1139 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1140 			env->cur_state->active_irq_id, insn_idx);
1141 		return err;
1142 	}
1143 
1144 	bpf_mark_reg_not_init(env, st);
1145 
1146 	for (i = 0; i < BPF_REG_SIZE; i++)
1147 		slot->slot_type[i] = STACK_INVALID;
1148 
1149 	mark_stack_slot_scratched(env, spi);
1150 	return 0;
1151 }
1152 
1153 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1154 {
1155 	struct bpf_func_state *state = bpf_func(env, reg);
1156 	struct bpf_stack_state *slot;
1157 	int spi, i;
1158 
1159 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1160 	 * will do check_mem_access to check and update stack bounds later, so
1161 	 * return true for that case.
1162 	 */
1163 	spi = irq_flag_get_spi(env, reg);
1164 	if (spi == -ERANGE)
1165 		return true;
1166 	if (spi < 0)
1167 		return false;
1168 
1169 	slot = &state->stack[spi];
1170 
1171 	for (i = 0; i < BPF_REG_SIZE; i++)
1172 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1173 			return false;
1174 	return true;
1175 }
1176 
1177 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1178 {
1179 	struct bpf_func_state *state = bpf_func(env, reg);
1180 	struct bpf_stack_state *slot;
1181 	struct bpf_reg_state *st;
1182 	int spi, i;
1183 
1184 	spi = irq_flag_get_spi(env, reg);
1185 	if (spi < 0)
1186 		return -EINVAL;
1187 
1188 	slot = &state->stack[spi];
1189 	st = &slot->spilled_ptr;
1190 
1191 	if (!st->ref_obj_id)
1192 		return -EINVAL;
1193 
1194 	for (i = 0; i < BPF_REG_SIZE; i++)
1195 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1196 			return -EINVAL;
1197 	return 0;
1198 }
1199 
1200 /* Check if given stack slot is "special":
1201  *   - spilled register state (STACK_SPILL);
1202  *   - dynptr state (STACK_DYNPTR);
1203  *   - iter state (STACK_ITER).
1204  *   - irq flag state (STACK_IRQ_FLAG)
1205  */
1206 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1207 {
1208 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1209 
1210 	switch (type) {
1211 	case STACK_SPILL:
1212 	case STACK_DYNPTR:
1213 	case STACK_ITER:
1214 	case STACK_IRQ_FLAG:
1215 		return true;
1216 	case STACK_INVALID:
1217 	case STACK_POISON:
1218 	case STACK_MISC:
1219 	case STACK_ZERO:
1220 		return false;
1221 	default:
1222 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1223 		return true;
1224 	}
1225 }
1226 
1227 /* The reg state of a pointer or a bounded scalar was saved when
1228  * it was spilled to the stack.
1229  */
1230 
1231 /*
1232  * Mark stack slot as STACK_MISC, unless it is already:
1233  * - STACK_INVALID, in which case they are equivalent.
1234  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1235  * - STACK_POISON, which truly forbids access to the slot.
1236  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1237  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1238  * unnecessary as both are considered equivalent when loading data and pruning,
1239  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1240  * slots.
1241  */
1242 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1243 {
1244 	if (*stype == STACK_ZERO)
1245 		return;
1246 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1247 		return;
1248 	*stype = STACK_MISC;
1249 }
1250 
1251 static void scrub_spilled_slot(u8 *stype)
1252 {
1253 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1254 		*stype = STACK_MISC;
1255 }
1256 
1257 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1258  * small to hold src. This is different from krealloc since we don't want to preserve
1259  * the contents of dst.
1260  *
1261  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1262  * not be allocated.
1263  */
1264 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1265 {
1266 	size_t alloc_bytes;
1267 	void *orig = dst;
1268 	size_t bytes;
1269 
1270 	if (ZERO_OR_NULL_PTR(src))
1271 		goto out;
1272 
1273 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1274 		return NULL;
1275 
1276 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1277 	dst = krealloc(orig, alloc_bytes, flags);
1278 	if (!dst) {
1279 		kfree(orig);
1280 		return NULL;
1281 	}
1282 
1283 	memcpy(dst, src, bytes);
1284 out:
1285 	return dst ? dst : ZERO_SIZE_PTR;
1286 }
1287 
1288 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1289  * small to hold new_n items. new items are zeroed out if the array grows.
1290  *
1291  * Contrary to krealloc_array, does not free arr if new_n is zero.
1292  */
1293 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1294 {
1295 	size_t alloc_size;
1296 	void *new_arr;
1297 
1298 	if (!new_n || old_n == new_n)
1299 		goto out;
1300 
1301 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1302 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1303 	if (!new_arr) {
1304 		kfree(arr);
1305 		return NULL;
1306 	}
1307 	arr = new_arr;
1308 
1309 	if (new_n > old_n)
1310 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1311 
1312 out:
1313 	return arr ? arr : ZERO_SIZE_PTR;
1314 }
1315 
1316 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1317 {
1318 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1319 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1320 	if (!dst->refs)
1321 		return -ENOMEM;
1322 
1323 	dst->acquired_refs = src->acquired_refs;
1324 	dst->active_locks = src->active_locks;
1325 	dst->active_preempt_locks = src->active_preempt_locks;
1326 	dst->active_rcu_locks = src->active_rcu_locks;
1327 	dst->active_irq_id = src->active_irq_id;
1328 	dst->active_lock_id = src->active_lock_id;
1329 	dst->active_lock_ptr = src->active_lock_ptr;
1330 	return 0;
1331 }
1332 
1333 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1334 {
1335 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1336 
1337 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1338 				GFP_KERNEL_ACCOUNT);
1339 	if (!dst->stack)
1340 		return -ENOMEM;
1341 
1342 	dst->allocated_stack = src->allocated_stack;
1343 	return 0;
1344 }
1345 
1346 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1347 {
1348 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1349 				    sizeof(struct bpf_reference_state));
1350 	if (!state->refs)
1351 		return -ENOMEM;
1352 
1353 	state->acquired_refs = n;
1354 	return 0;
1355 }
1356 
1357 /* Possibly update state->allocated_stack to be at least size bytes. Also
1358  * possibly update the function's high-water mark in its bpf_subprog_info.
1359  */
1360 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1361 {
1362 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1363 
1364 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1365 	size = round_up(size, BPF_REG_SIZE);
1366 	n = size / BPF_REG_SIZE;
1367 
1368 	if (old_n >= n)
1369 		return 0;
1370 
1371 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1372 	if (!state->stack)
1373 		return -ENOMEM;
1374 
1375 	state->allocated_stack = size;
1376 
1377 	/* update known max for given subprogram */
1378 	if (env->subprog_info[state->subprogno].stack_depth < size)
1379 		env->subprog_info[state->subprogno].stack_depth = size;
1380 
1381 	return 0;
1382 }
1383 
1384 /* Acquire a pointer id from the env and update the state->refs to include
1385  * this new pointer reference.
1386  * On success, returns a valid pointer id to associate with the register
1387  * On failure, returns a negative errno.
1388  */
1389 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1390 {
1391 	struct bpf_verifier_state *state = env->cur_state;
1392 	int new_ofs = state->acquired_refs;
1393 	int err;
1394 
1395 	err = resize_reference_state(state, state->acquired_refs + 1);
1396 	if (err)
1397 		return NULL;
1398 	state->refs[new_ofs].insn_idx = insn_idx;
1399 
1400 	return &state->refs[new_ofs];
1401 }
1402 
1403 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
1404 {
1405 	struct bpf_reference_state *s;
1406 
1407 	s = acquire_reference_state(env, insn_idx);
1408 	if (!s)
1409 		return -ENOMEM;
1410 	s->type = REF_TYPE_PTR;
1411 	s->id = ++env->id_gen;
1412 	return s->id;
1413 }
1414 
1415 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1416 			      int id, void *ptr)
1417 {
1418 	struct bpf_verifier_state *state = env->cur_state;
1419 	struct bpf_reference_state *s;
1420 
1421 	s = acquire_reference_state(env, insn_idx);
1422 	if (!s)
1423 		return -ENOMEM;
1424 	s->type = type;
1425 	s->id = id;
1426 	s->ptr = ptr;
1427 
1428 	state->active_locks++;
1429 	state->active_lock_id = id;
1430 	state->active_lock_ptr = ptr;
1431 	return 0;
1432 }
1433 
1434 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1435 {
1436 	struct bpf_verifier_state *state = env->cur_state;
1437 	struct bpf_reference_state *s;
1438 
1439 	s = acquire_reference_state(env, insn_idx);
1440 	if (!s)
1441 		return -ENOMEM;
1442 	s->type = REF_TYPE_IRQ;
1443 	s->id = ++env->id_gen;
1444 
1445 	state->active_irq_id = s->id;
1446 	return s->id;
1447 }
1448 
1449 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1450 {
1451 	int last_idx;
1452 	size_t rem;
1453 
1454 	/* IRQ state requires the relative ordering of elements remaining the
1455 	 * same, since it relies on the refs array to behave as a stack, so that
1456 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1457 	 * the array instead of swapping the final element into the deleted idx.
1458 	 */
1459 	last_idx = state->acquired_refs - 1;
1460 	rem = state->acquired_refs - idx - 1;
1461 	if (last_idx && idx != last_idx)
1462 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1463 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1464 	state->acquired_refs--;
1465 	return;
1466 }
1467 
1468 static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
1469 {
1470 	int i;
1471 
1472 	for (i = 0; i < state->acquired_refs; i++)
1473 		if (state->refs[i].id == ptr_id)
1474 			return true;
1475 
1476 	return false;
1477 }
1478 
1479 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1480 {
1481 	void *prev_ptr = NULL;
1482 	u32 prev_id = 0;
1483 	int i;
1484 
1485 	for (i = 0; i < state->acquired_refs; i++) {
1486 		if (state->refs[i].type == type && state->refs[i].id == id &&
1487 		    state->refs[i].ptr == ptr) {
1488 			release_reference_state(state, i);
1489 			state->active_locks--;
1490 			/* Reassign active lock (id, ptr). */
1491 			state->active_lock_id = prev_id;
1492 			state->active_lock_ptr = prev_ptr;
1493 			return 0;
1494 		}
1495 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1496 			prev_id = state->refs[i].id;
1497 			prev_ptr = state->refs[i].ptr;
1498 		}
1499 	}
1500 	return -EINVAL;
1501 }
1502 
1503 static int release_irq_state(struct bpf_verifier_state *state, int id)
1504 {
1505 	u32 prev_id = 0;
1506 	int i;
1507 
1508 	if (id != state->active_irq_id)
1509 		return -EACCES;
1510 
1511 	for (i = 0; i < state->acquired_refs; i++) {
1512 		if (state->refs[i].type != REF_TYPE_IRQ)
1513 			continue;
1514 		if (state->refs[i].id == id) {
1515 			release_reference_state(state, i);
1516 			state->active_irq_id = prev_id;
1517 			return 0;
1518 		} else {
1519 			prev_id = state->refs[i].id;
1520 		}
1521 	}
1522 	return -EINVAL;
1523 }
1524 
1525 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1526 						   int id, void *ptr)
1527 {
1528 	int i;
1529 
1530 	for (i = 0; i < state->acquired_refs; i++) {
1531 		struct bpf_reference_state *s = &state->refs[i];
1532 
1533 		if (!(s->type & type))
1534 			continue;
1535 
1536 		if (s->id == id && s->ptr == ptr)
1537 			return s;
1538 	}
1539 	return NULL;
1540 }
1541 
1542 static void free_func_state(struct bpf_func_state *state)
1543 {
1544 	if (!state)
1545 		return;
1546 	kfree(state->stack);
1547 	kfree(state);
1548 }
1549 
1550 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1551 {
1552 	kfree(state->jmp_history);
1553 	state->jmp_history = NULL;
1554 	state->jmp_history_cnt = 0;
1555 }
1556 
1557 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1558 			    bool free_self)
1559 {
1560 	int i;
1561 
1562 	for (i = 0; i <= state->curframe; i++) {
1563 		free_func_state(state->frame[i]);
1564 		state->frame[i] = NULL;
1565 	}
1566 	kfree(state->refs);
1567 	bpf_clear_jmp_history(state);
1568 	if (free_self)
1569 		kfree(state);
1570 }
1571 
1572 /* copy verifier state from src to dst growing dst stack space
1573  * when necessary to accommodate larger src stack
1574  */
1575 static int copy_func_state(struct bpf_func_state *dst,
1576 			   const struct bpf_func_state *src)
1577 {
1578 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1579 	return copy_stack_state(dst, src);
1580 }
1581 
1582 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1583 			   const struct bpf_verifier_state *src)
1584 {
1585 	struct bpf_func_state *dst;
1586 	int i, err;
1587 
1588 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1589 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1590 					  GFP_KERNEL_ACCOUNT);
1591 	if (!dst_state->jmp_history)
1592 		return -ENOMEM;
1593 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1594 
1595 	/* if dst has more stack frames then src frame, free them, this is also
1596 	 * necessary in case of exceptional exits using bpf_throw.
1597 	 */
1598 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1599 		free_func_state(dst_state->frame[i]);
1600 		dst_state->frame[i] = NULL;
1601 	}
1602 	err = copy_reference_state(dst_state, src);
1603 	if (err)
1604 		return err;
1605 	dst_state->speculative = src->speculative;
1606 	dst_state->in_sleepable = src->in_sleepable;
1607 	dst_state->curframe = src->curframe;
1608 	dst_state->branches = src->branches;
1609 	dst_state->parent = src->parent;
1610 	dst_state->first_insn_idx = src->first_insn_idx;
1611 	dst_state->last_insn_idx = src->last_insn_idx;
1612 	dst_state->dfs_depth = src->dfs_depth;
1613 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1614 	dst_state->may_goto_depth = src->may_goto_depth;
1615 	dst_state->equal_state = src->equal_state;
1616 	for (i = 0; i <= src->curframe; i++) {
1617 		dst = dst_state->frame[i];
1618 		if (!dst) {
1619 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1620 			if (!dst)
1621 				return -ENOMEM;
1622 			dst_state->frame[i] = dst;
1623 		}
1624 		err = copy_func_state(dst, src->frame[i]);
1625 		if (err)
1626 			return err;
1627 	}
1628 	return 0;
1629 }
1630 
1631 static u32 state_htab_size(struct bpf_verifier_env *env)
1632 {
1633 	return env->prog->len;
1634 }
1635 
1636 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1637 {
1638 	struct bpf_verifier_state *cur = env->cur_state;
1639 	struct bpf_func_state *state = cur->frame[cur->curframe];
1640 
1641 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1642 }
1643 
1644 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1645 {
1646 	int fr;
1647 
1648 	if (a->curframe != b->curframe)
1649 		return false;
1650 
1651 	for (fr = a->curframe; fr >= 0; fr--)
1652 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1653 			return false;
1654 
1655 	return true;
1656 }
1657 
1658 
1659 void bpf_free_backedges(struct bpf_scc_visit *visit)
1660 {
1661 	struct bpf_scc_backedge *backedge, *next;
1662 
1663 	for (backedge = visit->backedges; backedge; backedge = next) {
1664 		bpf_free_verifier_state(&backedge->state, false);
1665 		next = backedge->next;
1666 		kfree(backedge);
1667 	}
1668 	visit->backedges = NULL;
1669 }
1670 
1671 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1672 		     int *insn_idx, bool pop_log)
1673 {
1674 	struct bpf_verifier_state *cur = env->cur_state;
1675 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1676 	int err;
1677 
1678 	if (env->head == NULL)
1679 		return -ENOENT;
1680 
1681 	if (cur) {
1682 		err = bpf_copy_verifier_state(cur, &head->st);
1683 		if (err)
1684 			return err;
1685 	}
1686 	if (pop_log)
1687 		bpf_vlog_reset(&env->log, head->log_pos);
1688 	if (insn_idx)
1689 		*insn_idx = head->insn_idx;
1690 	if (prev_insn_idx)
1691 		*prev_insn_idx = head->prev_insn_idx;
1692 	elem = head->next;
1693 	bpf_free_verifier_state(&head->st, false);
1694 	kfree(head);
1695 	env->head = elem;
1696 	env->stack_size--;
1697 	return 0;
1698 }
1699 
1700 static bool error_recoverable_with_nospec(int err)
1701 {
1702 	/* Should only return true for non-fatal errors that are allowed to
1703 	 * occur during speculative verification. For these we can insert a
1704 	 * nospec and the program might still be accepted. Do not include
1705 	 * something like ENOMEM because it is likely to re-occur for the next
1706 	 * architectural path once it has been recovered-from in all speculative
1707 	 * paths.
1708 	 */
1709 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1710 }
1711 
1712 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1713 					     int insn_idx, int prev_insn_idx,
1714 					     bool speculative)
1715 {
1716 	struct bpf_verifier_state *cur = env->cur_state;
1717 	struct bpf_verifier_stack_elem *elem;
1718 	int err;
1719 
1720 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1721 	if (!elem)
1722 		return ERR_PTR(-ENOMEM);
1723 
1724 	elem->insn_idx = insn_idx;
1725 	elem->prev_insn_idx = prev_insn_idx;
1726 	elem->next = env->head;
1727 	elem->log_pos = env->log.end_pos;
1728 	env->head = elem;
1729 	env->stack_size++;
1730 	err = bpf_copy_verifier_state(&elem->st, cur);
1731 	if (err)
1732 		return ERR_PTR(-ENOMEM);
1733 	elem->st.speculative |= speculative;
1734 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1735 		verbose(env, "The sequence of %d jumps is too complex.\n",
1736 			env->stack_size);
1737 		return ERR_PTR(-E2BIG);
1738 	}
1739 	if (elem->st.parent) {
1740 		++elem->st.parent->branches;
1741 		/* WARN_ON(branches > 2) technically makes sense here,
1742 		 * but
1743 		 * 1. speculative states will bump 'branches' for non-branch
1744 		 * instructions
1745 		 * 2. is_state_visited() heuristics may decide not to create
1746 		 * a new state for a sequence of branches and all such current
1747 		 * and cloned states will be pointing to a single parent state
1748 		 * which might have large 'branches' count.
1749 		 */
1750 	}
1751 	return &elem->st;
1752 }
1753 
1754 static const int caller_saved[CALLER_SAVED_REGS] = {
1755 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1756 };
1757 
1758 /* This helper doesn't clear reg->id */
1759 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1760 {
1761 	reg->var_off = tnum_const(imm);
1762 	reg->smin_value = (s64)imm;
1763 	reg->smax_value = (s64)imm;
1764 	reg->umin_value = imm;
1765 	reg->umax_value = imm;
1766 
1767 	reg->s32_min_value = (s32)imm;
1768 	reg->s32_max_value = (s32)imm;
1769 	reg->u32_min_value = (u32)imm;
1770 	reg->u32_max_value = (u32)imm;
1771 }
1772 
1773 /* Mark the unknown part of a register (variable offset or scalar value) as
1774  * known to have the value @imm.
1775  */
1776 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1777 {
1778 	/* Clear off and union(map_ptr, range) */
1779 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1780 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1781 	reg->id = 0;
1782 	reg->ref_obj_id = 0;
1783 	___mark_reg_known(reg, imm);
1784 }
1785 
1786 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1787 {
1788 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1789 	reg->s32_min_value = (s32)imm;
1790 	reg->s32_max_value = (s32)imm;
1791 	reg->u32_min_value = (u32)imm;
1792 	reg->u32_max_value = (u32)imm;
1793 }
1794 
1795 /* Mark the 'variable offset' part of a register as zero.  This should be
1796  * used only on registers holding a pointer type.
1797  */
1798 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1799 {
1800 	__mark_reg_known(reg, 0);
1801 }
1802 
1803 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1804 {
1805 	__mark_reg_known(reg, 0);
1806 	reg->type = SCALAR_VALUE;
1807 	/* all scalars are assumed imprecise initially (unless unprivileged,
1808 	 * in which case everything is forced to be precise)
1809 	 */
1810 	reg->precise = !env->bpf_capable;
1811 }
1812 
1813 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1814 				struct bpf_reg_state *regs, u32 regno)
1815 {
1816 	__mark_reg_known_zero(regs + regno);
1817 }
1818 
1819 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1820 			      bool first_slot, int dynptr_id)
1821 {
1822 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1823 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1824 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1825 	 */
1826 	__mark_reg_known_zero(reg);
1827 	reg->type = CONST_PTR_TO_DYNPTR;
1828 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1829 	reg->id = dynptr_id;
1830 	reg->dynptr.type = type;
1831 	reg->dynptr.first_slot = first_slot;
1832 }
1833 
1834 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1835 {
1836 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1837 		const struct bpf_map *map = reg->map_ptr;
1838 
1839 		if (map->inner_map_meta) {
1840 			reg->type = CONST_PTR_TO_MAP;
1841 			reg->map_ptr = map->inner_map_meta;
1842 			/* transfer reg's id which is unique for every map_lookup_elem
1843 			 * as UID of the inner map.
1844 			 */
1845 			if (btf_record_has_field(map->inner_map_meta->record,
1846 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1847 				reg->map_uid = reg->id;
1848 			}
1849 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1850 			reg->type = PTR_TO_XDP_SOCK;
1851 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1852 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1853 			reg->type = PTR_TO_SOCKET;
1854 		} else {
1855 			reg->type = PTR_TO_MAP_VALUE;
1856 		}
1857 		return;
1858 	}
1859 
1860 	reg->type &= ~PTR_MAYBE_NULL;
1861 }
1862 
1863 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1864 				struct btf_field_graph_root *ds_head)
1865 {
1866 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1867 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1868 	regs[regno].btf = ds_head->btf;
1869 	regs[regno].btf_id = ds_head->value_btf_id;
1870 }
1871 
1872 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1873 {
1874 	return type_is_pkt_pointer(reg->type);
1875 }
1876 
1877 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1878 {
1879 	return reg_is_pkt_pointer(reg) ||
1880 	       reg->type == PTR_TO_PACKET_END;
1881 }
1882 
1883 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1884 {
1885 	return base_type(reg->type) == PTR_TO_MEM &&
1886 	       (reg->type &
1887 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1888 }
1889 
1890 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1891 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1892 				    enum bpf_reg_type which)
1893 {
1894 	/* The register can already have a range from prior markings.
1895 	 * This is fine as long as it hasn't been advanced from its
1896 	 * origin.
1897 	 */
1898 	return reg->type == which &&
1899 	       reg->id == 0 &&
1900 	       tnum_equals_const(reg->var_off, 0);
1901 }
1902 
1903 /* Reset the min/max bounds of a register */
1904 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1905 {
1906 	reg->smin_value = S64_MIN;
1907 	reg->smax_value = S64_MAX;
1908 	reg->umin_value = 0;
1909 	reg->umax_value = U64_MAX;
1910 
1911 	reg->s32_min_value = S32_MIN;
1912 	reg->s32_max_value = S32_MAX;
1913 	reg->u32_min_value = 0;
1914 	reg->u32_max_value = U32_MAX;
1915 }
1916 
1917 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1918 {
1919 	reg->smin_value = S64_MIN;
1920 	reg->smax_value = S64_MAX;
1921 	reg->umin_value = 0;
1922 	reg->umax_value = U64_MAX;
1923 }
1924 
1925 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1926 {
1927 	reg->s32_min_value = S32_MIN;
1928 	reg->s32_max_value = S32_MAX;
1929 	reg->u32_min_value = 0;
1930 	reg->u32_max_value = U32_MAX;
1931 }
1932 
1933 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1934 {
1935 	__mark_reg64_unbounded(reg);
1936 	reg->var_off = tnum_unknown;
1937 }
1938 
1939 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1940 {
1941 	__mark_reg32_unbounded(reg);
1942 	reg->var_off = tnum_unknown;
1943 }
1944 
1945 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1946 {
1947 	struct tnum var32_off = tnum_subreg(reg->var_off);
1948 
1949 	/* min signed is max(sign bit) | min(other bits) */
1950 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1951 			var32_off.value | (var32_off.mask & S32_MIN));
1952 	/* max signed is min(sign bit) | max(other bits) */
1953 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1954 			var32_off.value | (var32_off.mask & S32_MAX));
1955 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1956 	reg->u32_max_value = min(reg->u32_max_value,
1957 				 (u32)(var32_off.value | var32_off.mask));
1958 }
1959 
1960 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1961 {
1962 	u64 tnum_next, tmax;
1963 	bool umin_in_tnum;
1964 
1965 	/* min signed is max(sign bit) | min(other bits) */
1966 	reg->smin_value = max_t(s64, reg->smin_value,
1967 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1968 	/* max signed is min(sign bit) | max(other bits) */
1969 	reg->smax_value = min_t(s64, reg->smax_value,
1970 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1971 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1972 	reg->umax_value = min(reg->umax_value,
1973 			      reg->var_off.value | reg->var_off.mask);
1974 
1975 	/* Check if u64 and tnum overlap in a single value */
1976 	tnum_next = tnum_step(reg->var_off, reg->umin_value);
1977 	umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
1978 	tmax = reg->var_off.value | reg->var_off.mask;
1979 	if (umin_in_tnum && tnum_next > reg->umax_value) {
1980 		/* The u64 range and the tnum only overlap in umin.
1981 		 * u64:  ---[xxxxxx]-----
1982 		 * tnum: --xx----------x-
1983 		 */
1984 		___mark_reg_known(reg, reg->umin_value);
1985 	} else if (!umin_in_tnum && tnum_next == tmax) {
1986 		/* The u64 range and the tnum only overlap in the maximum value
1987 		 * represented by the tnum, called tmax.
1988 		 * u64:  ---[xxxxxx]-----
1989 		 * tnum: xx-----x--------
1990 		 */
1991 		___mark_reg_known(reg, tmax);
1992 	} else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
1993 		   tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
1994 		/* The u64 range and the tnum only overlap in between umin
1995 		 * (excluded) and umax.
1996 		 * u64:  ---[xxxxxx]-----
1997 		 * tnum: xx----x-------x-
1998 		 */
1999 		___mark_reg_known(reg, tnum_next);
2000 	}
2001 }
2002 
2003 static void __update_reg_bounds(struct bpf_reg_state *reg)
2004 {
2005 	__update_reg32_bounds(reg);
2006 	__update_reg64_bounds(reg);
2007 }
2008 
2009 /* Uses signed min/max values to inform unsigned, and vice-versa */
2010 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2011 {
2012 	/* If upper 32 bits of u64/s64 range don't change, we can use lower 32
2013 	 * bits to improve our u32/s32 boundaries.
2014 	 *
2015 	 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
2016 	 * u64) is pretty trivial, it's obvious that in u32 we'll also have
2017 	 * [10, 20] range. But this property holds for any 64-bit range as
2018 	 * long as upper 32 bits in that entire range of values stay the same.
2019 	 *
2020 	 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
2021 	 * in decimal) has the same upper 32 bits throughout all the values in
2022 	 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
2023 	 * range.
2024 	 *
2025 	 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
2026 	 * following the rules outlined below about u64/s64 correspondence
2027 	 * (which equally applies to u32 vs s32 correspondence). In general it
2028 	 * depends on actual hexadecimal values of 32-bit range. They can form
2029 	 * only valid u32, or only valid s32 ranges in some cases.
2030 	 *
2031 	 * So we use all these insights to derive bounds for subregisters here.
2032 	 */
2033 	if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
2034 		/* u64 to u32 casting preserves validity of low 32 bits as
2035 		 * a range, if upper 32 bits are the same
2036 		 */
2037 		reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2038 		reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2039 
2040 		if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2041 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2042 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2043 		}
2044 	}
2045 	if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2046 		/* low 32 bits should form a proper u32 range */
2047 		if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2048 			reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2049 			reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2050 		}
2051 		/* low 32 bits should form a proper s32 range */
2052 		if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2053 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2054 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2055 		}
2056 	}
2057 	/* Special case where upper bits form a small sequence of two
2058 	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2059 	 * 0x00000000 is also valid), while lower bits form a proper s32 range
2060 	 * going from negative numbers to positive numbers. E.g., let's say we
2061 	 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2062 	 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2063 	 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2064 	 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2065 	 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2066 	 * upper 32 bits. As a random example, s64 range
2067 	 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2068 	 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2069 	 */
2070 	if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2071 	    (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2072 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2073 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2074 	}
2075 	if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2076 	    (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2077 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2078 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2079 	}
2080 }
2081 
2082 static void deduce_bounds_32_from_32(struct bpf_reg_state *reg)
2083 {
2084 	/* if u32 range forms a valid s32 range (due to matching sign bit),
2085 	 * try to learn from that
2086 	 */
2087 	if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2088 		reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2089 		reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2090 	}
2091 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2092 	 * are the same, so combine.  This works even in the negative case, e.g.
2093 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2094 	 */
2095 	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2096 		reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2097 		reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2098 	} else {
2099 		if (reg->u32_max_value < (u32)reg->s32_min_value) {
2100 			/* See __reg64_deduce_bounds() for detailed explanation.
2101 			 * Refine ranges in the following situation:
2102 			 *
2103 			 * 0                                                   U32_MAX
2104 			 * |  [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]              |
2105 			 * |----------------------------|----------------------------|
2106 			 * |xxxxx s32 range xxxxxxxxx]                       [xxxxxxx|
2107 			 * 0                     S32_MAX S32_MIN                    -1
2108 			 */
2109 			reg->s32_min_value = (s32)reg->u32_min_value;
2110 			reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value);
2111 		} else if ((u32)reg->s32_max_value < reg->u32_min_value) {
2112 			/*
2113 			 * 0                                                   U32_MAX
2114 			 * |              [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]  |
2115 			 * |----------------------------|----------------------------|
2116 			 * |xxxxxxxxx]                       [xxxxxxxxxxxx s32 range |
2117 			 * 0                     S32_MAX S32_MIN                    -1
2118 			 */
2119 			reg->s32_max_value = (s32)reg->u32_max_value;
2120 			reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value);
2121 		}
2122 	}
2123 }
2124 
2125 static void deduce_bounds_64_from_64(struct bpf_reg_state *reg)
2126 {
2127 	/* If u64 range forms a valid s64 range (due to matching sign bit),
2128 	 * try to learn from that. Let's do a bit of ASCII art to see when
2129 	 * this is happening. Let's take u64 range first:
2130 	 *
2131 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2132 	 * |-------------------------------|--------------------------------|
2133 	 *
2134 	 * Valid u64 range is formed when umin and umax are anywhere in the
2135 	 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2136 	 * straightforward. Let's see how s64 range maps onto the same range
2137 	 * of values, annotated below the line for comparison:
2138 	 *
2139 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2140 	 * |-------------------------------|--------------------------------|
2141 	 * 0                        S64_MAX S64_MIN                        -1
2142 	 *
2143 	 * So s64 values basically start in the middle and they are logically
2144 	 * contiguous to the right of it, wrapping around from -1 to 0, and
2145 	 * then finishing as S64_MAX (0x7fffffffffffffff) right before
2146 	 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2147 	 * more visually as mapped to sign-agnostic range of hex values.
2148 	 *
2149 	 *  u64 start                                               u64 end
2150 	 *  _______________________________________________________________
2151 	 * /                                                               \
2152 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2153 	 * |-------------------------------|--------------------------------|
2154 	 * 0                        S64_MAX S64_MIN                        -1
2155 	 *                                / \
2156 	 * >------------------------------   ------------------------------->
2157 	 * s64 continues...        s64 end   s64 start          s64 "midpoint"
2158 	 *
2159 	 * What this means is that, in general, we can't always derive
2160 	 * something new about u64 from any random s64 range, and vice versa.
2161 	 *
2162 	 * But we can do that in two particular cases. One is when entire
2163 	 * u64/s64 range is *entirely* contained within left half of the above
2164 	 * diagram or when it is *entirely* contained in the right half. I.e.:
2165 	 *
2166 	 * |-------------------------------|--------------------------------|
2167 	 *     ^                   ^            ^                 ^
2168 	 *     A                   B            C                 D
2169 	 *
2170 	 * [A, B] and [C, D] are contained entirely in their respective halves
2171 	 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2172 	 * will be non-negative both as u64 and s64 (and in fact it will be
2173 	 * identical ranges no matter the signedness). [C, D] treated as s64
2174 	 * will be a range of negative values, while in u64 it will be
2175 	 * non-negative range of values larger than 0x8000000000000000.
2176 	 *
2177 	 * Now, any other range here can't be represented in both u64 and s64
2178 	 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2179 	 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2180 	 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2181 	 * for example. Similarly, valid s64 range [D, A] (going from negative
2182 	 * to positive values), would be two separate [D, U64_MAX] and [0, A]
2183 	 * ranges as u64. Currently reg_state can't represent two segments per
2184 	 * numeric domain, so in such situations we can only derive maximal
2185 	 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2186 	 *
2187 	 * So we use these facts to derive umin/umax from smin/smax and vice
2188 	 * versa only if they stay within the same "half". This is equivalent
2189 	 * to checking sign bit: lower half will have sign bit as zero, upper
2190 	 * half have sign bit 1. Below in code we simplify this by just
2191 	 * casting umin/umax as smin/smax and checking if they form valid
2192 	 * range, and vice versa. Those are equivalent checks.
2193 	 */
2194 	if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2195 		reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2196 		reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2197 	}
2198 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2199 	 * are the same, so combine.  This works even in the negative case, e.g.
2200 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2201 	 */
2202 	if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2203 		reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2204 		reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2205 	} else {
2206 		/* If the s64 range crosses the sign boundary, then it's split
2207 		 * between the beginning and end of the U64 domain. In that
2208 		 * case, we can derive new bounds if the u64 range overlaps
2209 		 * with only one end of the s64 range.
2210 		 *
2211 		 * In the following example, the u64 range overlaps only with
2212 		 * positive portion of the s64 range.
2213 		 *
2214 		 * 0                                                   U64_MAX
2215 		 * |  [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx]              |
2216 		 * |----------------------------|----------------------------|
2217 		 * |xxxxx s64 range xxxxxxxxx]                       [xxxxxxx|
2218 		 * 0                     S64_MAX S64_MIN                    -1
2219 		 *
2220 		 * We can thus derive the following new s64 and u64 ranges.
2221 		 *
2222 		 * 0                                                   U64_MAX
2223 		 * |  [xxxxxx u64 range xxxxx]                               |
2224 		 * |----------------------------|----------------------------|
2225 		 * |  [xxxxxx s64 range xxxxx]                               |
2226 		 * 0                     S64_MAX S64_MIN                    -1
2227 		 *
2228 		 * If they overlap in two places, we can't derive anything
2229 		 * because reg_state can't represent two ranges per numeric
2230 		 * domain.
2231 		 *
2232 		 * 0                                                   U64_MAX
2233 		 * |  [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx]        |
2234 		 * |----------------------------|----------------------------|
2235 		 * |xxxxx s64 range xxxxxxxxx]                    [xxxxxxxxxx|
2236 		 * 0                     S64_MAX S64_MIN                    -1
2237 		 *
2238 		 * The first condition below corresponds to the first diagram
2239 		 * above.
2240 		 */
2241 		if (reg->umax_value < (u64)reg->smin_value) {
2242 			reg->smin_value = (s64)reg->umin_value;
2243 			reg->umax_value = min_t(u64, reg->umax_value, reg->smax_value);
2244 		} else if ((u64)reg->smax_value < reg->umin_value) {
2245 			/* This second condition considers the case where the u64 range
2246 			 * overlaps with the negative portion of the s64 range:
2247 			 *
2248 			 * 0                                                   U64_MAX
2249 			 * |              [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx]  |
2250 			 * |----------------------------|----------------------------|
2251 			 * |xxxxxxxxx]                       [xxxxxxxxxxxx s64 range |
2252 			 * 0                     S64_MAX S64_MIN                    -1
2253 			 */
2254 			reg->smax_value = (s64)reg->umax_value;
2255 			reg->umin_value = max_t(u64, reg->umin_value, reg->smin_value);
2256 		}
2257 	}
2258 }
2259 
2260 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2261 {
2262 	/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2263 	 * values on both sides of 64-bit range in hope to have tighter range.
2264 	 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2265 	 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2266 	 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2267 	 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2268 	 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2269 	 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2270 	 * We just need to make sure that derived bounds we are intersecting
2271 	 * with are well-formed ranges in respective s64 or u64 domain, just
2272 	 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2273 	 */
2274 	__u64 new_umin, new_umax;
2275 	__s64 new_smin, new_smax;
2276 
2277 	/* u32 -> u64 tightening, it's always well-formed */
2278 	new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2279 	new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2280 	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2281 	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2282 	/* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2283 	new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2284 	new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2285 	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2286 	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2287 
2288 	/* Here we would like to handle a special case after sign extending load,
2289 	 * when upper bits for a 64-bit range are all 1s or all 0s.
2290 	 *
2291 	 * Upper bits are all 1s when register is in a range:
2292 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
2293 	 * Upper bits are all 0s when register is in a range:
2294 	 *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
2295 	 * Together this forms are continuous range:
2296 	 *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
2297 	 *
2298 	 * Now, suppose that register range is in fact tighter:
2299 	 *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
2300 	 * Also suppose that it's 32-bit range is positive,
2301 	 * meaning that lower 32-bits of the full 64-bit register
2302 	 * are in the range:
2303 	 *   [0x0000_0000, 0x7fff_ffff] (W)
2304 	 *
2305 	 * If this happens, then any value in a range:
2306 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
2307 	 * is smaller than a lowest bound of the range (R):
2308 	 *   0xffff_ffff_8000_0000
2309 	 * which means that upper bits of the full 64-bit register
2310 	 * can't be all 1s, when lower bits are in range (W).
2311 	 *
2312 	 * Note that:
2313 	 *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
2314 	 *  - 0x0000_0000_7fff_ffff == (s64)S32_MAX
2315 	 * These relations are used in the conditions below.
2316 	 */
2317 	if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
2318 		reg->smin_value = reg->s32_min_value;
2319 		reg->smax_value = reg->s32_max_value;
2320 		reg->umin_value = reg->s32_min_value;
2321 		reg->umax_value = reg->s32_max_value;
2322 		reg->var_off = tnum_intersect(reg->var_off,
2323 					      tnum_range(reg->smin_value, reg->smax_value));
2324 	}
2325 }
2326 
2327 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2328 {
2329 	deduce_bounds_64_from_64(reg);
2330 	deduce_bounds_32_from_64(reg);
2331 	deduce_bounds_32_from_32(reg);
2332 	deduce_bounds_64_from_32(reg);
2333 }
2334 
2335 /* Attempts to improve var_off based on unsigned min/max information */
2336 static void __reg_bound_offset(struct bpf_reg_state *reg)
2337 {
2338 	struct tnum var64_off = tnum_intersect(reg->var_off,
2339 					       tnum_range(reg->umin_value,
2340 							  reg->umax_value));
2341 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2342 					       tnum_range(reg->u32_min_value,
2343 							  reg->u32_max_value));
2344 
2345 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2346 }
2347 
2348 static bool range_bounds_violation(struct bpf_reg_state *reg);
2349 
2350 static void reg_bounds_sync(struct bpf_reg_state *reg)
2351 {
2352 	/* If the input reg_state is invalid, we can exit early */
2353 	if (range_bounds_violation(reg))
2354 		return;
2355 	/* We might have learned new bounds from the var_off. */
2356 	__update_reg_bounds(reg);
2357 	/* We might have learned something about the sign bit. */
2358 	__reg_deduce_bounds(reg);
2359 	__reg_deduce_bounds(reg);
2360 	/* We might have learned some bits from the bounds. */
2361 	__reg_bound_offset(reg);
2362 	/* Intersecting with the old var_off might have improved our bounds
2363 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2364 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2365 	 */
2366 	__update_reg_bounds(reg);
2367 }
2368 
2369 static bool range_bounds_violation(struct bpf_reg_state *reg)
2370 {
2371 	return (reg->umin_value > reg->umax_value || reg->smin_value > reg->smax_value ||
2372 		reg->u32_min_value > reg->u32_max_value ||
2373 		reg->s32_min_value > reg->s32_max_value);
2374 }
2375 
2376 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2377 {
2378 	u64 uval = reg->var_off.value;
2379 	s64 sval = (s64)uval;
2380 
2381 	if (!tnum_is_const(reg->var_off))
2382 		return false;
2383 
2384 	return reg->umin_value != uval || reg->umax_value != uval ||
2385 	       reg->smin_value != sval || reg->smax_value != sval;
2386 }
2387 
2388 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2389 {
2390 	u32 uval32 = tnum_subreg(reg->var_off).value;
2391 	s32 sval32 = (s32)uval32;
2392 
2393 	if (!tnum_subreg_is_const(reg->var_off))
2394 		return false;
2395 
2396 	return reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2397 	       reg->s32_min_value != sval32 || reg->s32_max_value != sval32;
2398 }
2399 
2400 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2401 				   struct bpf_reg_state *reg, const char *ctx)
2402 {
2403 	const char *msg;
2404 
2405 	if (range_bounds_violation(reg)) {
2406 		msg = "range bounds violation";
2407 		goto out;
2408 	}
2409 
2410 	if (const_tnum_range_mismatch(reg)) {
2411 		msg = "const tnum out of sync with range bounds";
2412 		goto out;
2413 	}
2414 
2415 	if (const_tnum_range_mismatch_32(reg)) {
2416 		msg = "const subreg tnum out of sync with range bounds";
2417 		goto out;
2418 	}
2419 
2420 	return 0;
2421 out:
2422 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2423 		     "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)",
2424 		     ctx, msg, reg->umin_value, reg->umax_value,
2425 		     reg->smin_value, reg->smax_value,
2426 		     reg->u32_min_value, reg->u32_max_value,
2427 		     reg->s32_min_value, reg->s32_max_value,
2428 		     reg->var_off.value, reg->var_off.mask);
2429 	if (env->test_reg_invariants)
2430 		return -EFAULT;
2431 	__mark_reg_unbounded(reg);
2432 	return 0;
2433 }
2434 
2435 static bool __reg32_bound_s64(s32 a)
2436 {
2437 	return a >= 0 && a <= S32_MAX;
2438 }
2439 
2440 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2441 {
2442 	reg->umin_value = reg->u32_min_value;
2443 	reg->umax_value = reg->u32_max_value;
2444 
2445 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2446 	 * be positive otherwise set to worse case bounds and refine later
2447 	 * from tnum.
2448 	 */
2449 	if (__reg32_bound_s64(reg->s32_min_value) &&
2450 	    __reg32_bound_s64(reg->s32_max_value)) {
2451 		reg->smin_value = reg->s32_min_value;
2452 		reg->smax_value = reg->s32_max_value;
2453 	} else {
2454 		reg->smin_value = 0;
2455 		reg->smax_value = U32_MAX;
2456 	}
2457 }
2458 
2459 /* Mark a register as having a completely unknown (scalar) value. */
2460 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2461 {
2462 	/*
2463 	 * Clear type, off, and union(map_ptr, range) and
2464 	 * padding between 'type' and union
2465 	 */
2466 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2467 	reg->type = SCALAR_VALUE;
2468 	reg->id = 0;
2469 	reg->ref_obj_id = 0;
2470 	reg->var_off = tnum_unknown;
2471 	reg->frameno = 0;
2472 	reg->precise = false;
2473 	__mark_reg_unbounded(reg);
2474 }
2475 
2476 /* Mark a register as having a completely unknown (scalar) value,
2477  * initialize .precise as true when not bpf capable.
2478  */
2479 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2480 			       struct bpf_reg_state *reg)
2481 {
2482 	bpf_mark_reg_unknown_imprecise(reg);
2483 	reg->precise = !env->bpf_capable;
2484 }
2485 
2486 static void mark_reg_unknown(struct bpf_verifier_env *env,
2487 			     struct bpf_reg_state *regs, u32 regno)
2488 {
2489 	__mark_reg_unknown(env, regs + regno);
2490 }
2491 
2492 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2493 				struct bpf_reg_state *regs,
2494 				u32 regno,
2495 				s32 s32_min,
2496 				s32 s32_max)
2497 {
2498 	struct bpf_reg_state *reg = regs + regno;
2499 
2500 	reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2501 	reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2502 
2503 	reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2504 	reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2505 
2506 	reg_bounds_sync(reg);
2507 
2508 	return reg_bounds_sanity_check(env, reg, "s32_range");
2509 }
2510 
2511 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2512 			   struct bpf_reg_state *reg)
2513 {
2514 	__mark_reg_unknown(env, reg);
2515 	reg->type = NOT_INIT;
2516 }
2517 
2518 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2519 			   struct bpf_reg_state *regs, u32 regno,
2520 			   enum bpf_reg_type reg_type,
2521 			   struct btf *btf, u32 btf_id,
2522 			   enum bpf_type_flag flag)
2523 {
2524 	switch (reg_type) {
2525 	case SCALAR_VALUE:
2526 		mark_reg_unknown(env, regs, regno);
2527 		return 0;
2528 	case PTR_TO_BTF_ID:
2529 		mark_reg_known_zero(env, regs, regno);
2530 		regs[regno].type = PTR_TO_BTF_ID | flag;
2531 		regs[regno].btf = btf;
2532 		regs[regno].btf_id = btf_id;
2533 		if (type_may_be_null(flag))
2534 			regs[regno].id = ++env->id_gen;
2535 		return 0;
2536 	case PTR_TO_MEM:
2537 		mark_reg_known_zero(env, regs, regno);
2538 		regs[regno].type = PTR_TO_MEM | flag;
2539 		regs[regno].mem_size = 0;
2540 		return 0;
2541 	default:
2542 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2543 		return -EFAULT;
2544 	}
2545 }
2546 
2547 #define DEF_NOT_SUBREG	(0)
2548 static void init_reg_state(struct bpf_verifier_env *env,
2549 			   struct bpf_func_state *state)
2550 {
2551 	struct bpf_reg_state *regs = state->regs;
2552 	int i;
2553 
2554 	for (i = 0; i < MAX_BPF_REG; i++) {
2555 		bpf_mark_reg_not_init(env, &regs[i]);
2556 		regs[i].subreg_def = DEF_NOT_SUBREG;
2557 	}
2558 
2559 	/* frame pointer */
2560 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2561 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2562 	regs[BPF_REG_FP].frameno = state->frameno;
2563 }
2564 
2565 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2566 {
2567 	/*
2568 	 * return_32bit is set to false by default and set explicitly
2569 	 * by the caller when necessary.
2570 	 */
2571 	return (struct bpf_retval_range){ minval, maxval, false };
2572 }
2573 
2574 static void init_func_state(struct bpf_verifier_env *env,
2575 			    struct bpf_func_state *state,
2576 			    int callsite, int frameno, int subprogno)
2577 {
2578 	state->callsite = callsite;
2579 	state->frameno = frameno;
2580 	state->subprogno = subprogno;
2581 	state->callback_ret_range = retval_range(0, 0);
2582 	init_reg_state(env, state);
2583 	mark_verifier_state_scratched(env);
2584 }
2585 
2586 /* Similar to push_stack(), but for async callbacks */
2587 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2588 						int insn_idx, int prev_insn_idx,
2589 						int subprog, bool is_sleepable)
2590 {
2591 	struct bpf_verifier_stack_elem *elem;
2592 	struct bpf_func_state *frame;
2593 
2594 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2595 	if (!elem)
2596 		return ERR_PTR(-ENOMEM);
2597 
2598 	elem->insn_idx = insn_idx;
2599 	elem->prev_insn_idx = prev_insn_idx;
2600 	elem->next = env->head;
2601 	elem->log_pos = env->log.end_pos;
2602 	env->head = elem;
2603 	env->stack_size++;
2604 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2605 		verbose(env,
2606 			"The sequence of %d jumps is too complex for async cb.\n",
2607 			env->stack_size);
2608 		return ERR_PTR(-E2BIG);
2609 	}
2610 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2611 	 * The caller state doesn't matter.
2612 	 * This is async callback. It starts in a fresh stack.
2613 	 * Initialize it similar to do_check_common().
2614 	 */
2615 	elem->st.branches = 1;
2616 	elem->st.in_sleepable = is_sleepable;
2617 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2618 	if (!frame)
2619 		return ERR_PTR(-ENOMEM);
2620 	init_func_state(env, frame,
2621 			BPF_MAIN_FUNC /* callsite */,
2622 			0 /* frameno within this callchain */,
2623 			subprog /* subprog number within this prog */);
2624 	elem->st.frame[0] = frame;
2625 	return &elem->st;
2626 }
2627 
2628 
2629 static int cmp_subprogs(const void *a, const void *b)
2630 {
2631 	return ((struct bpf_subprog_info *)a)->start -
2632 	       ((struct bpf_subprog_info *)b)->start;
2633 }
2634 
2635 /* Find subprogram that contains instruction at 'off' */
2636 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2637 {
2638 	struct bpf_subprog_info *vals = env->subprog_info;
2639 	int l, r, m;
2640 
2641 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2642 		return NULL;
2643 
2644 	l = 0;
2645 	r = env->subprog_cnt - 1;
2646 	while (l < r) {
2647 		m = l + (r - l + 1) / 2;
2648 		if (vals[m].start <= off)
2649 			l = m;
2650 		else
2651 			r = m - 1;
2652 	}
2653 	return &vals[l];
2654 }
2655 
2656 /* Find subprogram that starts exactly at 'off' */
2657 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2658 {
2659 	struct bpf_subprog_info *p;
2660 
2661 	p = bpf_find_containing_subprog(env, off);
2662 	if (!p || p->start != off)
2663 		return -ENOENT;
2664 	return p - env->subprog_info;
2665 }
2666 
2667 static int add_subprog(struct bpf_verifier_env *env, int off)
2668 {
2669 	int insn_cnt = env->prog->len;
2670 	int ret;
2671 
2672 	if (off >= insn_cnt || off < 0) {
2673 		verbose(env, "call to invalid destination\n");
2674 		return -EINVAL;
2675 	}
2676 	ret = bpf_find_subprog(env, off);
2677 	if (ret >= 0)
2678 		return ret;
2679 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2680 		verbose(env, "too many subprograms\n");
2681 		return -E2BIG;
2682 	}
2683 	/* determine subprog starts. The end is one before the next starts */
2684 	env->subprog_info[env->subprog_cnt++].start = off;
2685 	sort(env->subprog_info, env->subprog_cnt,
2686 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2687 	return env->subprog_cnt - 1;
2688 }
2689 
2690 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2691 {
2692 	struct bpf_prog_aux *aux = env->prog->aux;
2693 	struct btf *btf = aux->btf;
2694 	const struct btf_type *t;
2695 	u32 main_btf_id, id;
2696 	const char *name;
2697 	int ret, i;
2698 
2699 	/* Non-zero func_info_cnt implies valid btf */
2700 	if (!aux->func_info_cnt)
2701 		return 0;
2702 	main_btf_id = aux->func_info[0].type_id;
2703 
2704 	t = btf_type_by_id(btf, main_btf_id);
2705 	if (!t) {
2706 		verbose(env, "invalid btf id for main subprog in func_info\n");
2707 		return -EINVAL;
2708 	}
2709 
2710 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2711 	if (IS_ERR(name)) {
2712 		ret = PTR_ERR(name);
2713 		/* If there is no tag present, there is no exception callback */
2714 		if (ret == -ENOENT)
2715 			ret = 0;
2716 		else if (ret == -EEXIST)
2717 			verbose(env, "multiple exception callback tags for main subprog\n");
2718 		return ret;
2719 	}
2720 
2721 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2722 	if (ret < 0) {
2723 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2724 		return ret;
2725 	}
2726 	id = ret;
2727 	t = btf_type_by_id(btf, id);
2728 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2729 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2730 		return -EINVAL;
2731 	}
2732 	ret = 0;
2733 	for (i = 0; i < aux->func_info_cnt; i++) {
2734 		if (aux->func_info[i].type_id != id)
2735 			continue;
2736 		ret = aux->func_info[i].insn_off;
2737 		/* Further func_info and subprog checks will also happen
2738 		 * later, so assume this is the right insn_off for now.
2739 		 */
2740 		if (!ret) {
2741 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2742 			ret = -EINVAL;
2743 		}
2744 	}
2745 	if (!ret) {
2746 		verbose(env, "exception callback type id not found in func_info\n");
2747 		ret = -EINVAL;
2748 	}
2749 	return ret;
2750 }
2751 
2752 #define MAX_KFUNC_BTFS	256
2753 
2754 struct bpf_kfunc_btf {
2755 	struct btf *btf;
2756 	struct module *module;
2757 	u16 offset;
2758 };
2759 
2760 struct bpf_kfunc_btf_tab {
2761 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2762 	u32 nr_descs;
2763 };
2764 
2765 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2766 {
2767 	const struct bpf_kfunc_desc *d0 = a;
2768 	const struct bpf_kfunc_desc *d1 = b;
2769 
2770 	/* func_id is not greater than BTF_MAX_TYPE */
2771 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2772 }
2773 
2774 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2775 {
2776 	const struct bpf_kfunc_btf *d0 = a;
2777 	const struct bpf_kfunc_btf *d1 = b;
2778 
2779 	return d0->offset - d1->offset;
2780 }
2781 
2782 static struct bpf_kfunc_desc *
2783 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2784 {
2785 	struct bpf_kfunc_desc desc = {
2786 		.func_id = func_id,
2787 		.offset = offset,
2788 	};
2789 	struct bpf_kfunc_desc_tab *tab;
2790 
2791 	tab = prog->aux->kfunc_tab;
2792 	return bsearch(&desc, tab->descs, tab->nr_descs,
2793 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2794 }
2795 
2796 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2797 		       u16 btf_fd_idx, u8 **func_addr)
2798 {
2799 	const struct bpf_kfunc_desc *desc;
2800 
2801 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2802 	if (!desc)
2803 		return -EFAULT;
2804 
2805 	*func_addr = (u8 *)desc->addr;
2806 	return 0;
2807 }
2808 
2809 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2810 					 s16 offset)
2811 {
2812 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2813 	struct bpf_kfunc_btf_tab *tab;
2814 	struct bpf_kfunc_btf *b;
2815 	struct module *mod;
2816 	struct btf *btf;
2817 	int btf_fd;
2818 
2819 	tab = env->prog->aux->kfunc_btf_tab;
2820 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2821 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2822 	if (!b) {
2823 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2824 			verbose(env, "too many different module BTFs\n");
2825 			return ERR_PTR(-E2BIG);
2826 		}
2827 
2828 		if (bpfptr_is_null(env->fd_array)) {
2829 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2830 			return ERR_PTR(-EPROTO);
2831 		}
2832 
2833 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2834 					    offset * sizeof(btf_fd),
2835 					    sizeof(btf_fd)))
2836 			return ERR_PTR(-EFAULT);
2837 
2838 		btf = btf_get_by_fd(btf_fd);
2839 		if (IS_ERR(btf)) {
2840 			verbose(env, "invalid module BTF fd specified\n");
2841 			return btf;
2842 		}
2843 
2844 		if (!btf_is_module(btf)) {
2845 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2846 			btf_put(btf);
2847 			return ERR_PTR(-EINVAL);
2848 		}
2849 
2850 		mod = btf_try_get_module(btf);
2851 		if (!mod) {
2852 			btf_put(btf);
2853 			return ERR_PTR(-ENXIO);
2854 		}
2855 
2856 		b = &tab->descs[tab->nr_descs++];
2857 		b->btf = btf;
2858 		b->module = mod;
2859 		b->offset = offset;
2860 
2861 		/* sort() reorders entries by value, so b may no longer point
2862 		 * to the right entry after this
2863 		 */
2864 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2865 		     kfunc_btf_cmp_by_off, NULL);
2866 	} else {
2867 		btf = b->btf;
2868 	}
2869 
2870 	return btf;
2871 }
2872 
2873 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2874 {
2875 	if (!tab)
2876 		return;
2877 
2878 	while (tab->nr_descs--) {
2879 		module_put(tab->descs[tab->nr_descs].module);
2880 		btf_put(tab->descs[tab->nr_descs].btf);
2881 	}
2882 	kfree(tab);
2883 }
2884 
2885 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2886 {
2887 	if (offset) {
2888 		if (offset < 0) {
2889 			/* In the future, this can be allowed to increase limit
2890 			 * of fd index into fd_array, interpreted as u16.
2891 			 */
2892 			verbose(env, "negative offset disallowed for kernel module function call\n");
2893 			return ERR_PTR(-EINVAL);
2894 		}
2895 
2896 		return __find_kfunc_desc_btf(env, offset);
2897 	}
2898 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2899 }
2900 
2901 #define KF_IMPL_SUFFIX "_impl"
2902 
2903 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2904 						    struct btf *btf,
2905 						    const char *func_name)
2906 {
2907 	char *buf = env->tmp_str_buf;
2908 	const struct btf_type *func;
2909 	s32 impl_id;
2910 	int len;
2911 
2912 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2913 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2914 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2915 		return NULL;
2916 	}
2917 
2918 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2919 	if (impl_id <= 0) {
2920 		verbose(env, "cannot find function %s in BTF\n", buf);
2921 		return NULL;
2922 	}
2923 
2924 	func = btf_type_by_id(btf, impl_id);
2925 
2926 	return btf_type_by_id(btf, func->type);
2927 }
2928 
2929 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2930 			    s32 func_id,
2931 			    s16 offset,
2932 			    struct bpf_kfunc_meta *kfunc)
2933 {
2934 	const struct btf_type *func, *func_proto;
2935 	const char *func_name;
2936 	u32 *kfunc_flags;
2937 	struct btf *btf;
2938 
2939 	if (func_id <= 0) {
2940 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2941 		return -EINVAL;
2942 	}
2943 
2944 	btf = find_kfunc_desc_btf(env, offset);
2945 	if (IS_ERR(btf)) {
2946 		verbose(env, "failed to find BTF for kernel function\n");
2947 		return PTR_ERR(btf);
2948 	}
2949 
2950 	/*
2951 	 * Note that kfunc_flags may be NULL at this point, which
2952 	 * means that we couldn't find func_id in any relevant
2953 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2954 	 * call.  However we don't fail with an error here,
2955 	 * and let the caller decide what to do with NULL kfunc->flags.
2956 	 */
2957 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2958 
2959 	func = btf_type_by_id(btf, func_id);
2960 	if (!func || !btf_type_is_func(func)) {
2961 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2962 		return -EINVAL;
2963 	}
2964 
2965 	func_name = btf_name_by_offset(btf, func->name_off);
2966 
2967 	/*
2968 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2969 	 * can be found through the counterpart _impl kfunc.
2970 	 */
2971 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2972 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2973 	else
2974 		func_proto = btf_type_by_id(btf, func->type);
2975 
2976 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2977 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2978 			func_id);
2979 		return -EINVAL;
2980 	}
2981 
2982 	memset(kfunc, 0, sizeof(*kfunc));
2983 	kfunc->btf = btf;
2984 	kfunc->id = func_id;
2985 	kfunc->name = func_name;
2986 	kfunc->proto = func_proto;
2987 	kfunc->flags = kfunc_flags;
2988 
2989 	return 0;
2990 }
2991 
2992 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2993 {
2994 	struct bpf_kfunc_btf_tab *btf_tab;
2995 	struct btf_func_model func_model;
2996 	struct bpf_kfunc_desc_tab *tab;
2997 	struct bpf_prog_aux *prog_aux;
2998 	struct bpf_kfunc_meta kfunc;
2999 	struct bpf_kfunc_desc *desc;
3000 	unsigned long addr;
3001 	int err;
3002 
3003 	prog_aux = env->prog->aux;
3004 	tab = prog_aux->kfunc_tab;
3005 	btf_tab = prog_aux->kfunc_btf_tab;
3006 	if (!tab) {
3007 		if (!btf_vmlinux) {
3008 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
3009 			return -ENOTSUPP;
3010 		}
3011 
3012 		if (!env->prog->jit_requested) {
3013 			verbose(env, "JIT is required for calling kernel function\n");
3014 			return -ENOTSUPP;
3015 		}
3016 
3017 		if (!bpf_jit_supports_kfunc_call()) {
3018 			verbose(env, "JIT does not support calling kernel function\n");
3019 			return -ENOTSUPP;
3020 		}
3021 
3022 		if (!env->prog->gpl_compatible) {
3023 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
3024 			return -EINVAL;
3025 		}
3026 
3027 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
3028 		if (!tab)
3029 			return -ENOMEM;
3030 		prog_aux->kfunc_tab = tab;
3031 	}
3032 
3033 	/* func_id == 0 is always invalid, but instead of returning an error, be
3034 	 * conservative and wait until the code elimination pass before returning
3035 	 * error, so that invalid calls that get pruned out can be in BPF programs
3036 	 * loaded from userspace.  It is also required that offset be untouched
3037 	 * for such calls.
3038 	 */
3039 	if (!func_id && !offset)
3040 		return 0;
3041 
3042 	if (!btf_tab && offset) {
3043 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
3044 		if (!btf_tab)
3045 			return -ENOMEM;
3046 		prog_aux->kfunc_btf_tab = btf_tab;
3047 	}
3048 
3049 	if (find_kfunc_desc(env->prog, func_id, offset))
3050 		return 0;
3051 
3052 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
3053 		verbose(env, "too many different kernel function calls\n");
3054 		return -E2BIG;
3055 	}
3056 
3057 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
3058 	if (err)
3059 		return err;
3060 
3061 	addr = kallsyms_lookup_name(kfunc.name);
3062 	if (!addr) {
3063 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
3064 		return -EINVAL;
3065 	}
3066 
3067 	if (bpf_dev_bound_kfunc_id(func_id)) {
3068 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
3069 		if (err)
3070 			return err;
3071 	}
3072 
3073 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
3074 	if (err)
3075 		return err;
3076 
3077 	desc = &tab->descs[tab->nr_descs++];
3078 	desc->func_id = func_id;
3079 	desc->offset = offset;
3080 	desc->addr = addr;
3081 	desc->func_model = func_model;
3082 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3083 	     kfunc_desc_cmp_by_id_off, NULL);
3084 	return 0;
3085 }
3086 
3087 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3088 {
3089 	return !!prog->aux->kfunc_tab;
3090 }
3091 
3092 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3093 {
3094 	struct bpf_subprog_info *subprog = env->subprog_info;
3095 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3096 	struct bpf_insn *insn = env->prog->insnsi;
3097 
3098 	/* Add entry function. */
3099 	ret = add_subprog(env, 0);
3100 	if (ret)
3101 		return ret;
3102 
3103 	for (i = 0; i < insn_cnt; i++, insn++) {
3104 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3105 		    !bpf_pseudo_kfunc_call(insn))
3106 			continue;
3107 
3108 		if (!env->bpf_capable) {
3109 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3110 			return -EPERM;
3111 		}
3112 
3113 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3114 			ret = add_subprog(env, i + insn->imm + 1);
3115 		else
3116 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
3117 
3118 		if (ret < 0)
3119 			return ret;
3120 	}
3121 
3122 	ret = bpf_find_exception_callback_insn_off(env);
3123 	if (ret < 0)
3124 		return ret;
3125 	ex_cb_insn = ret;
3126 
3127 	/* If ex_cb_insn > 0, this means that the main program has a subprog
3128 	 * marked using BTF decl tag to serve as the exception callback.
3129 	 */
3130 	if (ex_cb_insn) {
3131 		ret = add_subprog(env, ex_cb_insn);
3132 		if (ret < 0)
3133 			return ret;
3134 		for (i = 1; i < env->subprog_cnt; i++) {
3135 			if (env->subprog_info[i].start != ex_cb_insn)
3136 				continue;
3137 			env->exception_callback_subprog = i;
3138 			bpf_mark_subprog_exc_cb(env, i);
3139 			break;
3140 		}
3141 	}
3142 
3143 	/* Add a fake 'exit' subprog which could simplify subprog iteration
3144 	 * logic. 'subprog_cnt' should not be increased.
3145 	 */
3146 	subprog[env->subprog_cnt].start = insn_cnt;
3147 
3148 	if (env->log.level & BPF_LOG_LEVEL2)
3149 		for (i = 0; i < env->subprog_cnt; i++)
3150 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
3151 
3152 	return 0;
3153 }
3154 
3155 static int check_subprogs(struct bpf_verifier_env *env)
3156 {
3157 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
3158 	struct bpf_subprog_info *subprog = env->subprog_info;
3159 	struct bpf_insn *insn = env->prog->insnsi;
3160 	int insn_cnt = env->prog->len;
3161 
3162 	/* now check that all jumps are within the same subprog */
3163 	subprog_start = subprog[cur_subprog].start;
3164 	subprog_end = subprog[cur_subprog + 1].start;
3165 	for (i = 0; i < insn_cnt; i++) {
3166 		u8 code = insn[i].code;
3167 
3168 		if (code == (BPF_JMP | BPF_CALL) &&
3169 		    insn[i].src_reg == 0 &&
3170 		    insn[i].imm == BPF_FUNC_tail_call) {
3171 			subprog[cur_subprog].has_tail_call = true;
3172 			subprog[cur_subprog].tail_call_reachable = true;
3173 		}
3174 		if (BPF_CLASS(code) == BPF_LD &&
3175 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3176 			subprog[cur_subprog].has_ld_abs = true;
3177 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3178 			goto next;
3179 		if (BPF_OP(code) == BPF_CALL)
3180 			goto next;
3181 		if (BPF_OP(code) == BPF_EXIT) {
3182 			subprog[cur_subprog].exit_idx = i;
3183 			goto next;
3184 		}
3185 		off = i + bpf_jmp_offset(&insn[i]) + 1;
3186 		if (off < subprog_start || off >= subprog_end) {
3187 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
3188 			return -EINVAL;
3189 		}
3190 next:
3191 		if (i == subprog_end - 1) {
3192 			/* to avoid fall-through from one subprog into another
3193 			 * the last insn of the subprog should be either exit
3194 			 * or unconditional jump back or bpf_throw call
3195 			 */
3196 			if (code != (BPF_JMP | BPF_EXIT) &&
3197 			    code != (BPF_JMP32 | BPF_JA) &&
3198 			    code != (BPF_JMP | BPF_JA)) {
3199 				verbose(env, "last insn is not an exit or jmp\n");
3200 				return -EINVAL;
3201 			}
3202 			subprog_start = subprog_end;
3203 			cur_subprog++;
3204 			if (cur_subprog < env->subprog_cnt)
3205 				subprog_end = subprog[cur_subprog + 1].start;
3206 		}
3207 	}
3208 	return 0;
3209 }
3210 
3211 /*
3212  * Sort subprogs in topological order so that leaf subprogs come first and
3213  * their callers come later. This is a DFS post-order traversal of the call
3214  * graph. Scan only reachable instructions (those in the computed postorder) of
3215  * the current subprog to discover callees (direct subprogs and sync
3216  * callbacks).
3217  */
3218 static int sort_subprogs_topo(struct bpf_verifier_env *env)
3219 {
3220 	struct bpf_subprog_info *si = env->subprog_info;
3221 	int *insn_postorder = env->cfg.insn_postorder;
3222 	struct bpf_insn *insn = env->prog->insnsi;
3223 	int cnt = env->subprog_cnt;
3224 	int *dfs_stack = NULL;
3225 	int top = 0, order = 0;
3226 	int i, ret = 0;
3227 	u8 *color = NULL;
3228 
3229 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
3230 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
3231 	if (!color || !dfs_stack) {
3232 		ret = -ENOMEM;
3233 		goto out;
3234 	}
3235 
3236 	/*
3237 	 * DFS post-order traversal.
3238 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
3239 	 */
3240 	for (i = 0; i < cnt; i++) {
3241 		if (color[i])
3242 			continue;
3243 		color[i] = 1;
3244 		dfs_stack[top++] = i;
3245 
3246 		while (top > 0) {
3247 			int cur = dfs_stack[top - 1];
3248 			int po_start = si[cur].postorder_start;
3249 			int po_end = si[cur + 1].postorder_start;
3250 			bool pushed = false;
3251 			int j;
3252 
3253 			for (j = po_start; j < po_end; j++) {
3254 				int idx = insn_postorder[j];
3255 				int callee;
3256 
3257 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
3258 					continue;
3259 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
3260 				if (callee < 0) {
3261 					ret = -EFAULT;
3262 					goto out;
3263 				}
3264 				if (color[callee] == 2)
3265 					continue;
3266 				if (color[callee] == 1) {
3267 					if (bpf_pseudo_func(&insn[idx]))
3268 						continue;
3269 					verbose(env, "recursive call from %s() to %s()\n",
3270 						subprog_name(env, cur),
3271 						subprog_name(env, callee));
3272 					ret = -EINVAL;
3273 					goto out;
3274 				}
3275 				color[callee] = 1;
3276 				dfs_stack[top++] = callee;
3277 				pushed = true;
3278 				break;
3279 			}
3280 
3281 			if (!pushed) {
3282 				color[cur] = 2;
3283 				env->subprog_topo_order[order++] = cur;
3284 				top--;
3285 			}
3286 		}
3287 	}
3288 
3289 	if (env->log.level & BPF_LOG_LEVEL2)
3290 		for (i = 0; i < cnt; i++)
3291 			verbose(env, "topo_order[%d] = %s\n",
3292 				i, subprog_name(env, env->subprog_topo_order[i]));
3293 out:
3294 	kvfree(dfs_stack);
3295 	kvfree(color);
3296 	return ret;
3297 }
3298 
3299 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3300 				    int spi, int nr_slots)
3301 {
3302 	int i;
3303 
3304 	for (i = 0; i < nr_slots; i++)
3305 		mark_stack_slot_scratched(env, spi - i);
3306 	return 0;
3307 }
3308 
3309 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3310 {
3311 	int spi;
3312 
3313 	/* For CONST_PTR_TO_DYNPTR, it must have already been done by
3314 	 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3315 	 * check_kfunc_call.
3316 	 */
3317 	if (reg->type == CONST_PTR_TO_DYNPTR)
3318 		return 0;
3319 	spi = dynptr_get_spi(env, reg);
3320 	if (spi < 0)
3321 		return spi;
3322 	/* Caller ensures dynptr is valid and initialized, which means spi is in
3323 	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3324 	 * read.
3325 	 */
3326 	return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS);
3327 }
3328 
3329 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3330 			  int spi, int nr_slots)
3331 {
3332 	return mark_stack_slot_obj_read(env, reg, spi, nr_slots);
3333 }
3334 
3335 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3336 {
3337 	int spi;
3338 
3339 	spi = irq_flag_get_spi(env, reg);
3340 	if (spi < 0)
3341 		return spi;
3342 	return mark_stack_slot_obj_read(env, reg, spi, 1);
3343 }
3344 
3345 /* This function is supposed to be used by the following 32-bit optimization
3346  * code only. It returns TRUE if the source or destination register operates
3347  * on 64-bit, otherwise return FALSE.
3348  */
3349 bool bpf_is_reg64(struct bpf_insn *insn,
3350 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
3351 {
3352 	u8 code, class, op;
3353 
3354 	code = insn->code;
3355 	class = BPF_CLASS(code);
3356 	op = BPF_OP(code);
3357 	if (class == BPF_JMP) {
3358 		/* BPF_EXIT for "main" will reach here. Return TRUE
3359 		 * conservatively.
3360 		 */
3361 		if (op == BPF_EXIT)
3362 			return true;
3363 		if (op == BPF_CALL) {
3364 			/* BPF to BPF call will reach here because of marking
3365 			 * caller saved clobber with DST_OP_NO_MARK for which we
3366 			 * don't care the register def because they are anyway
3367 			 * marked as NOT_INIT already.
3368 			 */
3369 			if (insn->src_reg == BPF_PSEUDO_CALL)
3370 				return false;
3371 			/* Helper call will reach here because of arg type
3372 			 * check, conservatively return TRUE.
3373 			 */
3374 			if (t == SRC_OP)
3375 				return true;
3376 
3377 			return false;
3378 		}
3379 	}
3380 
3381 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3382 		return false;
3383 
3384 	if (class == BPF_ALU64 || class == BPF_JMP ||
3385 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3386 		return true;
3387 
3388 	if (class == BPF_ALU || class == BPF_JMP32)
3389 		return false;
3390 
3391 	if (class == BPF_LDX) {
3392 		if (t != SRC_OP)
3393 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3394 		/* LDX source must be ptr. */
3395 		return true;
3396 	}
3397 
3398 	if (class == BPF_STX) {
3399 		/* BPF_STX (including atomic variants) has one or more source
3400 		 * operands, one of which is a ptr. Check whether the caller is
3401 		 * asking about it.
3402 		 */
3403 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3404 			return true;
3405 		return BPF_SIZE(code) == BPF_DW;
3406 	}
3407 
3408 	if (class == BPF_LD) {
3409 		u8 mode = BPF_MODE(code);
3410 
3411 		/* LD_IMM64 */
3412 		if (mode == BPF_IMM)
3413 			return true;
3414 
3415 		/* Both LD_IND and LD_ABS return 32-bit data. */
3416 		if (t != SRC_OP)
3417 			return  false;
3418 
3419 		/* Implicit ctx ptr. */
3420 		if (regno == BPF_REG_6)
3421 			return true;
3422 
3423 		/* Explicit source could be any width. */
3424 		return true;
3425 	}
3426 
3427 	if (class == BPF_ST)
3428 		/* The only source register for BPF_ST is a ptr. */
3429 		return true;
3430 
3431 	/* Conservatively return true at default. */
3432 	return true;
3433 }
3434 
3435 static void mark_insn_zext(struct bpf_verifier_env *env,
3436 			   struct bpf_reg_state *reg)
3437 {
3438 	s32 def_idx = reg->subreg_def;
3439 
3440 	if (def_idx == DEF_NOT_SUBREG)
3441 		return;
3442 
3443 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3444 	/* The dst will be zero extended, so won't be sub-register anymore. */
3445 	reg->subreg_def = DEF_NOT_SUBREG;
3446 }
3447 
3448 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3449 			   enum bpf_reg_arg_type t)
3450 {
3451 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3452 	struct bpf_reg_state *reg;
3453 	bool rw64;
3454 
3455 	mark_reg_scratched(env, regno);
3456 
3457 	reg = &regs[regno];
3458 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3459 	if (t == SRC_OP) {
3460 		/* check whether register used as source operand can be read */
3461 		if (reg->type == NOT_INIT) {
3462 			verbose(env, "R%d !read_ok\n", regno);
3463 			return -EACCES;
3464 		}
3465 		/* We don't need to worry about FP liveness because it's read-only */
3466 		if (regno == BPF_REG_FP)
3467 			return 0;
3468 
3469 		if (rw64)
3470 			mark_insn_zext(env, reg);
3471 
3472 		return 0;
3473 	} else {
3474 		/* check whether register used as dest operand can be written to */
3475 		if (regno == BPF_REG_FP) {
3476 			verbose(env, "frame pointer is read only\n");
3477 			return -EACCES;
3478 		}
3479 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3480 		if (t == DST_OP)
3481 			mark_reg_unknown(env, regs, regno);
3482 	}
3483 	return 0;
3484 }
3485 
3486 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3487 			 enum bpf_reg_arg_type t)
3488 {
3489 	struct bpf_verifier_state *vstate = env->cur_state;
3490 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3491 
3492 	return __check_reg_arg(env, state->regs, regno, t);
3493 }
3494 
3495 static int insn_stack_access_flags(int frameno, int spi)
3496 {
3497 	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3498 }
3499 
3500 static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
3501 {
3502 	env->insn_aux_data[idx].indirect_target = true;
3503 }
3504 
3505 #define LR_FRAMENO_BITS	3
3506 #define LR_SPI_BITS	6
3507 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3508 #define LR_SIZE_BITS	4
3509 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3510 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3511 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3512 #define LR_SPI_OFF	LR_FRAMENO_BITS
3513 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3514 #define LINKED_REGS_MAX	6
3515 
3516 struct linked_reg {
3517 	u8 frameno;
3518 	union {
3519 		u8 spi;
3520 		u8 regno;
3521 	};
3522 	bool is_reg;
3523 };
3524 
3525 struct linked_regs {
3526 	int cnt;
3527 	struct linked_reg entries[LINKED_REGS_MAX];
3528 };
3529 
3530 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3531 {
3532 	if (s->cnt < LINKED_REGS_MAX)
3533 		return &s->entries[s->cnt++];
3534 
3535 	return NULL;
3536 }
3537 
3538 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3539  * number of elements currently in stack.
3540  * Pack one history entry for linked registers as 10 bits in the following format:
3541  * - 3-bits frameno
3542  * - 6-bits spi_or_reg
3543  * - 1-bit  is_reg
3544  */
3545 static u64 linked_regs_pack(struct linked_regs *s)
3546 {
3547 	u64 val = 0;
3548 	int i;
3549 
3550 	for (i = 0; i < s->cnt; ++i) {
3551 		struct linked_reg *e = &s->entries[i];
3552 		u64 tmp = 0;
3553 
3554 		tmp |= e->frameno;
3555 		tmp |= e->spi << LR_SPI_OFF;
3556 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3557 
3558 		val <<= LR_ENTRY_BITS;
3559 		val |= tmp;
3560 	}
3561 	val <<= LR_SIZE_BITS;
3562 	val |= s->cnt;
3563 	return val;
3564 }
3565 
3566 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3567 {
3568 	int i;
3569 
3570 	s->cnt = val & LR_SIZE_MASK;
3571 	val >>= LR_SIZE_BITS;
3572 
3573 	for (i = 0; i < s->cnt; ++i) {
3574 		struct linked_reg *e = &s->entries[i];
3575 
3576 		e->frameno =  val & LR_FRAMENO_MASK;
3577 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3578 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3579 		val >>= LR_ENTRY_BITS;
3580 	}
3581 }
3582 
3583 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3584 {
3585 	const struct btf_type *func;
3586 	struct btf *desc_btf;
3587 
3588 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3589 		return NULL;
3590 
3591 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3592 	if (IS_ERR(desc_btf))
3593 		return "<error>";
3594 
3595 	func = btf_type_by_id(desc_btf, insn->imm);
3596 	return btf_name_by_offset(desc_btf, func->name_off);
3597 }
3598 
3599 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3600 {
3601 	const struct bpf_insn_cbs cbs = {
3602 		.cb_call	= disasm_kfunc_name,
3603 		.cb_print	= verbose,
3604 		.private_data	= env,
3605 	};
3606 
3607 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3608 }
3609 
3610 /* If any register R in hist->linked_regs is marked as precise in bt,
3611  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3612  */
3613 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3614 {
3615 	struct linked_regs linked_regs;
3616 	bool some_precise = false;
3617 	int i;
3618 
3619 	if (!hist || hist->linked_regs == 0)
3620 		return;
3621 
3622 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3623 	for (i = 0; i < linked_regs.cnt; ++i) {
3624 		struct linked_reg *e = &linked_regs.entries[i];
3625 
3626 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3627 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3628 			some_precise = true;
3629 			break;
3630 		}
3631 	}
3632 
3633 	if (!some_precise)
3634 		return;
3635 
3636 	for (i = 0; i < linked_regs.cnt; ++i) {
3637 		struct linked_reg *e = &linked_regs.entries[i];
3638 
3639 		if (e->is_reg)
3640 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3641 		else
3642 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3643 	}
3644 }
3645 
3646 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3647 {
3648 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3649 }
3650 
3651 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3652  * desired reg and stack masks across all relevant frames
3653  */
3654 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3655 				      struct bpf_verifier_state *starting_state)
3656 {
3657 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3658 }
3659 
3660 static bool is_spillable_regtype(enum bpf_reg_type type)
3661 {
3662 	switch (base_type(type)) {
3663 	case PTR_TO_MAP_VALUE:
3664 	case PTR_TO_STACK:
3665 	case PTR_TO_CTX:
3666 	case PTR_TO_PACKET:
3667 	case PTR_TO_PACKET_META:
3668 	case PTR_TO_PACKET_END:
3669 	case PTR_TO_FLOW_KEYS:
3670 	case CONST_PTR_TO_MAP:
3671 	case PTR_TO_SOCKET:
3672 	case PTR_TO_SOCK_COMMON:
3673 	case PTR_TO_TCP_SOCK:
3674 	case PTR_TO_XDP_SOCK:
3675 	case PTR_TO_BTF_ID:
3676 	case PTR_TO_BUF:
3677 	case PTR_TO_MEM:
3678 	case PTR_TO_FUNC:
3679 	case PTR_TO_MAP_KEY:
3680 	case PTR_TO_ARENA:
3681 		return true;
3682 	default:
3683 		return false;
3684 	}
3685 }
3686 
3687 
3688 /* check if register is a constant scalar value */
3689 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3690 {
3691 	return reg->type == SCALAR_VALUE &&
3692 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3693 }
3694 
3695 /* assuming is_reg_const() is true, return constant value of a register */
3696 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3697 {
3698 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3699 }
3700 
3701 static bool __is_pointer_value(bool allow_ptr_leaks,
3702 			       const struct bpf_reg_state *reg)
3703 {
3704 	if (allow_ptr_leaks)
3705 		return false;
3706 
3707 	return reg->type != SCALAR_VALUE;
3708 }
3709 
3710 static void clear_scalar_id(struct bpf_reg_state *reg)
3711 {
3712 	reg->id = 0;
3713 	reg->delta = 0;
3714 }
3715 
3716 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3717 					struct bpf_reg_state *src_reg)
3718 {
3719 	if (src_reg->type != SCALAR_VALUE)
3720 		return;
3721 	/*
3722 	 * The verifier is processing rX = rY insn and
3723 	 * rY->id has special linked register already.
3724 	 * Cleared it, since multiple rX += const are not supported.
3725 	 */
3726 	if (src_reg->id & BPF_ADD_CONST)
3727 		clear_scalar_id(src_reg);
3728 	/*
3729 	 * Ensure that src_reg has a valid ID that will be copied to
3730 	 * dst_reg and then will be used by sync_linked_regs() to
3731 	 * propagate min/max range.
3732 	 */
3733 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3734 		src_reg->id = ++env->id_gen;
3735 }
3736 
3737 /* Copy src state preserving dst->parent and dst->live fields */
3738 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
3739 {
3740 	*dst = *src;
3741 }
3742 
3743 static void save_register_state(struct bpf_verifier_env *env,
3744 				struct bpf_func_state *state,
3745 				int spi, struct bpf_reg_state *reg,
3746 				int size)
3747 {
3748 	int i;
3749 
3750 	copy_register_state(&state->stack[spi].spilled_ptr, reg);
3751 
3752 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3753 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3754 
3755 	/* size < 8 bytes spill */
3756 	for (; i; i--)
3757 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3758 }
3759 
3760 static bool is_bpf_st_mem(struct bpf_insn *insn)
3761 {
3762 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3763 }
3764 
3765 static int get_reg_width(struct bpf_reg_state *reg)
3766 {
3767 	return fls64(reg->umax_value);
3768 }
3769 
3770 /* See comment for mark_fastcall_pattern_for_call() */
3771 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3772 					  struct bpf_func_state *state, int insn_idx, int off)
3773 {
3774 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3775 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3776 	int i;
3777 
3778 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3779 		return;
3780 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3781 	 * from something that is not a part of the fastcall pattern,
3782 	 * disable fastcall rewrites for current subprogram by setting
3783 	 * fastcall_stack_off to a value smaller than any possible offset.
3784 	 */
3785 	subprog->fastcall_stack_off = S16_MIN;
3786 	/* reset fastcall aux flags within subprogram,
3787 	 * happens at most once per subprogram
3788 	 */
3789 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3790 		aux[i].fastcall_spills_num = 0;
3791 		aux[i].fastcall_pattern = 0;
3792 	}
3793 }
3794 
3795 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3796 {
3797 	int i;
3798 
3799 	/* regular write of data into stack destroys any spilled ptr */
3800 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3801 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3802 	if (is_stack_slot_special(&state->stack[spi]))
3803 		for (i = 0; i < BPF_REG_SIZE; i++)
3804 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3805 }
3806 
3807 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3808  * stack boundary and alignment are checked in check_mem_access()
3809  */
3810 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3811 				       /* stack frame we're writing to */
3812 				       struct bpf_func_state *state,
3813 				       int off, int size, int value_regno,
3814 				       int insn_idx)
3815 {
3816 	struct bpf_func_state *cur; /* state of the current function */
3817 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3818 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3819 	struct bpf_reg_state *reg = NULL;
3820 	int insn_flags = insn_stack_access_flags(state->frameno, spi);
3821 
3822 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3823 	 * so it's aligned access and [off, off + size) are within stack limits
3824 	 */
3825 	if (!env->allow_ptr_leaks &&
3826 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3827 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3828 	    size != BPF_REG_SIZE) {
3829 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3830 		return -EACCES;
3831 	}
3832 
3833 	cur = env->cur_state->frame[env->cur_state->curframe];
3834 	if (value_regno >= 0)
3835 		reg = &cur->regs[value_regno];
3836 	if (!env->bypass_spec_v4) {
3837 		bool sanitize = reg && is_spillable_regtype(reg->type);
3838 
3839 		for (i = 0; i < size; i++) {
3840 			u8 type = state->stack[spi].slot_type[i];
3841 
3842 			if (type != STACK_MISC && type != STACK_ZERO) {
3843 				sanitize = true;
3844 				break;
3845 			}
3846 		}
3847 
3848 		if (sanitize)
3849 			env->insn_aux_data[insn_idx].nospec_result = true;
3850 	}
3851 
3852 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3853 	if (err)
3854 		return err;
3855 
3856 	check_fastcall_stack_contract(env, state, insn_idx, off);
3857 	mark_stack_slot_scratched(env, spi);
3858 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3859 		bool reg_value_fits;
3860 
3861 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3862 		/* Make sure that reg had an ID to build a relation on spill. */
3863 		if (reg_value_fits)
3864 			assign_scalar_id_before_mov(env, reg);
3865 		save_register_state(env, state, spi, reg, size);
3866 		/* Break the relation on a narrowing spill. */
3867 		if (!reg_value_fits)
3868 			state->stack[spi].spilled_ptr.id = 0;
3869 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3870 		   env->bpf_capable) {
3871 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3872 
3873 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3874 		__mark_reg_known(tmp_reg, insn->imm);
3875 		tmp_reg->type = SCALAR_VALUE;
3876 		save_register_state(env, state, spi, tmp_reg, size);
3877 	} else if (reg && is_spillable_regtype(reg->type)) {
3878 		/* register containing pointer is being spilled into stack */
3879 		if (size != BPF_REG_SIZE) {
3880 			verbose_linfo(env, insn_idx, "; ");
3881 			verbose(env, "invalid size of register spill\n");
3882 			return -EACCES;
3883 		}
3884 		if (state != cur && reg->type == PTR_TO_STACK) {
3885 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3886 			return -EINVAL;
3887 		}
3888 		save_register_state(env, state, spi, reg, size);
3889 	} else {
3890 		u8 type = STACK_MISC;
3891 
3892 		scrub_special_slot(state, spi);
3893 
3894 		/* when we zero initialize stack slots mark them as such */
3895 		if ((reg && bpf_register_is_null(reg)) ||
3896 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3897 			/* STACK_ZERO case happened because register spill
3898 			 * wasn't properly aligned at the stack slot boundary,
3899 			 * so it's not a register spill anymore; force
3900 			 * originating register to be precise to make
3901 			 * STACK_ZERO correct for subsequent states
3902 			 */
3903 			err = mark_chain_precision(env, value_regno);
3904 			if (err)
3905 				return err;
3906 			type = STACK_ZERO;
3907 		}
3908 
3909 		/* Mark slots affected by this stack write. */
3910 		for (i = 0; i < size; i++)
3911 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3912 		insn_flags = 0; /* not a register spill */
3913 	}
3914 
3915 	if (insn_flags)
3916 		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
3917 	return 0;
3918 }
3919 
3920 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3921  * known to contain a variable offset.
3922  * This function checks whether the write is permitted and conservatively
3923  * tracks the effects of the write, considering that each stack slot in the
3924  * dynamic range is potentially written to.
3925  *
3926  * 'value_regno' can be -1, meaning that an unknown value is being written to
3927  * the stack.
3928  *
3929  * Spilled pointers in range are not marked as written because we don't know
3930  * what's going to be actually written. This means that read propagation for
3931  * future reads cannot be terminated by this write.
3932  *
3933  * For privileged programs, uninitialized stack slots are considered
3934  * initialized by this write (even though we don't know exactly what offsets
3935  * are going to be written to). The idea is that we don't want the verifier to
3936  * reject future reads that access slots written to through variable offsets.
3937  */
3938 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3939 				     /* func where register points to */
3940 				     struct bpf_func_state *state,
3941 				     int ptr_regno, int off, int size,
3942 				     int value_regno, int insn_idx)
3943 {
3944 	struct bpf_func_state *cur; /* state of the current function */
3945 	int min_off, max_off;
3946 	int i, err;
3947 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3948 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3949 	bool writing_zero = false;
3950 	/* set if the fact that we're writing a zero is used to let any
3951 	 * stack slots remain STACK_ZERO
3952 	 */
3953 	bool zero_used = false;
3954 
3955 	cur = env->cur_state->frame[env->cur_state->curframe];
3956 	ptr_reg = &cur->regs[ptr_regno];
3957 	min_off = ptr_reg->smin_value + off;
3958 	max_off = ptr_reg->smax_value + off + size;
3959 	if (value_regno >= 0)
3960 		value_reg = &cur->regs[value_regno];
3961 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3962 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3963 		writing_zero = true;
3964 
3965 	for (i = min_off; i < max_off; i++) {
3966 		int spi;
3967 
3968 		spi = bpf_get_spi(i);
3969 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3970 		if (err)
3971 			return err;
3972 	}
3973 
3974 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3975 	/* Variable offset writes destroy any spilled pointers in range. */
3976 	for (i = min_off; i < max_off; i++) {
3977 		u8 new_type, *stype;
3978 		int slot, spi;
3979 
3980 		slot = -i - 1;
3981 		spi = slot / BPF_REG_SIZE;
3982 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3983 		mark_stack_slot_scratched(env, spi);
3984 
3985 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3986 			/* Reject the write if range we may write to has not
3987 			 * been initialized beforehand. If we didn't reject
3988 			 * here, the ptr status would be erased below (even
3989 			 * though not all slots are actually overwritten),
3990 			 * possibly opening the door to leaks.
3991 			 *
3992 			 * We do however catch STACK_INVALID case below, and
3993 			 * only allow reading possibly uninitialized memory
3994 			 * later for CAP_PERFMON, as the write may not happen to
3995 			 * that slot.
3996 			 */
3997 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3998 				insn_idx, i);
3999 			return -EINVAL;
4000 		}
4001 
4002 		/* If writing_zero and the spi slot contains a spill of value 0,
4003 		 * maintain the spill type.
4004 		 */
4005 		if (writing_zero && *stype == STACK_SPILL &&
4006 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
4007 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
4008 
4009 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
4010 				zero_used = true;
4011 				continue;
4012 			}
4013 		}
4014 
4015 		/*
4016 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
4017 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
4018 		 * and valid program is rejected by check_stack_read_fixed_off()
4019 		 * with obscure "invalid size of register fill" message.
4020 		 */
4021 		scrub_special_slot(state, spi);
4022 
4023 		/* Update the slot type. */
4024 		new_type = STACK_MISC;
4025 		if (writing_zero && *stype == STACK_ZERO) {
4026 			new_type = STACK_ZERO;
4027 			zero_used = true;
4028 		}
4029 		/* If the slot is STACK_INVALID, we check whether it's OK to
4030 		 * pretend that it will be initialized by this write. The slot
4031 		 * might not actually be written to, and so if we mark it as
4032 		 * initialized future reads might leak uninitialized memory.
4033 		 * For privileged programs, we will accept such reads to slots
4034 		 * that may or may not be written because, if we're reject
4035 		 * them, the error would be too confusing.
4036 		 * Conservatively, treat STACK_POISON in a similar way.
4037 		 */
4038 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
4039 		    !env->allow_uninit_stack) {
4040 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4041 					insn_idx, i);
4042 			return -EINVAL;
4043 		}
4044 		*stype = new_type;
4045 	}
4046 	if (zero_used) {
4047 		/* backtracking doesn't work for STACK_ZERO yet. */
4048 		err = mark_chain_precision(env, value_regno);
4049 		if (err)
4050 			return err;
4051 	}
4052 	return 0;
4053 }
4054 
4055 /* When register 'dst_regno' is assigned some values from stack[min_off,
4056  * max_off), we set the register's type according to the types of the
4057  * respective stack slots. If all the stack values are known to be zeros, then
4058  * so is the destination reg. Otherwise, the register is considered to be
4059  * SCALAR. This function does not deal with register filling; the caller must
4060  * ensure that all spilled registers in the stack range have been marked as
4061  * read.
4062  */
4063 static void mark_reg_stack_read(struct bpf_verifier_env *env,
4064 				/* func where src register points to */
4065 				struct bpf_func_state *ptr_state,
4066 				int min_off, int max_off, int dst_regno)
4067 {
4068 	struct bpf_verifier_state *vstate = env->cur_state;
4069 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4070 	int i, slot, spi;
4071 	u8 *stype;
4072 	int zeros = 0;
4073 
4074 	for (i = min_off; i < max_off; i++) {
4075 		slot = -i - 1;
4076 		spi = slot / BPF_REG_SIZE;
4077 		mark_stack_slot_scratched(env, spi);
4078 		stype = ptr_state->stack[spi].slot_type;
4079 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4080 			break;
4081 		zeros++;
4082 	}
4083 	if (zeros == max_off - min_off) {
4084 		/* Any access_size read into register is zero extended,
4085 		 * so the whole register == const_zero.
4086 		 */
4087 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
4088 	} else {
4089 		/* have read misc data from the stack */
4090 		mark_reg_unknown(env, state->regs, dst_regno);
4091 	}
4092 }
4093 
4094 /* Read the stack at 'off' and put the results into the register indicated by
4095  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4096  * spilled reg.
4097  *
4098  * 'dst_regno' can be -1, meaning that the read value is not going to a
4099  * register.
4100  *
4101  * The access is assumed to be within the current stack bounds.
4102  */
4103 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4104 				      /* func where src register points to */
4105 				      struct bpf_func_state *reg_state,
4106 				      int off, int size, int dst_regno)
4107 {
4108 	struct bpf_verifier_state *vstate = env->cur_state;
4109 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4110 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4111 	struct bpf_reg_state *reg;
4112 	u8 *stype, type;
4113 	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
4114 
4115 	stype = reg_state->stack[spi].slot_type;
4116 	reg = &reg_state->stack[spi].spilled_ptr;
4117 
4118 	mark_stack_slot_scratched(env, spi);
4119 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
4120 
4121 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
4122 		u8 spill_size = 1;
4123 
4124 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4125 			spill_size++;
4126 
4127 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4128 			if (reg->type != SCALAR_VALUE) {
4129 				verbose_linfo(env, env->insn_idx, "; ");
4130 				verbose(env, "invalid size of register fill\n");
4131 				return -EACCES;
4132 			}
4133 
4134 			if (dst_regno < 0)
4135 				return 0;
4136 
4137 			if (size <= spill_size &&
4138 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
4139 				/* The earlier check_reg_arg() has decided the
4140 				 * subreg_def for this insn.  Save it first.
4141 				 */
4142 				s32 subreg_def = state->regs[dst_regno].subreg_def;
4143 
4144 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
4145 				    get_reg_width(reg) <= 32)
4146 					/* Ensure stack slot has an ID to build a relation
4147 					 * with the destination register on fill.
4148 					 */
4149 					assign_scalar_id_before_mov(env, reg);
4150 				copy_register_state(&state->regs[dst_regno], reg);
4151 				state->regs[dst_regno].subreg_def = subreg_def;
4152 
4153 				/* Break the relation on a narrowing fill.
4154 				 * coerce_reg_to_size will adjust the boundaries.
4155 				 */
4156 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
4157 					clear_scalar_id(&state->regs[dst_regno]);
4158 			} else {
4159 				int spill_cnt = 0, zero_cnt = 0;
4160 
4161 				for (i = 0; i < size; i++) {
4162 					type = stype[(slot - i) % BPF_REG_SIZE];
4163 					if (type == STACK_SPILL) {
4164 						spill_cnt++;
4165 						continue;
4166 					}
4167 					if (type == STACK_MISC)
4168 						continue;
4169 					if (type == STACK_ZERO) {
4170 						zero_cnt++;
4171 						continue;
4172 					}
4173 					if (type == STACK_INVALID && env->allow_uninit_stack)
4174 						continue;
4175 					if (type == STACK_POISON) {
4176 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
4177 							off, i, size);
4178 					} else {
4179 						verbose(env, "invalid read from stack off %d+%d size %d\n",
4180 							off, i, size);
4181 					}
4182 					return -EACCES;
4183 				}
4184 
4185 				if (spill_cnt == size &&
4186 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
4187 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
4188 					/* this IS register fill, so keep insn_flags */
4189 				} else if (zero_cnt == size) {
4190 					/* similarly to mark_reg_stack_read(), preserve zeroes */
4191 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
4192 					insn_flags = 0; /* not restoring original register state */
4193 				} else {
4194 					mark_reg_unknown(env, state->regs, dst_regno);
4195 					insn_flags = 0; /* not restoring original register state */
4196 				}
4197 			}
4198 		} else if (dst_regno >= 0) {
4199 			/* restore register state from stack */
4200 			if (env->bpf_capable)
4201 				/* Ensure stack slot has an ID to build a relation
4202 				 * with the destination register on fill.
4203 				 */
4204 				assign_scalar_id_before_mov(env, reg);
4205 			copy_register_state(&state->regs[dst_regno], reg);
4206 			/* mark reg as written since spilled pointer state likely
4207 			 * has its liveness marks cleared by is_state_visited()
4208 			 * which resets stack/reg liveness for state transitions
4209 			 */
4210 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4211 			/* If dst_regno==-1, the caller is asking us whether
4212 			 * it is acceptable to use this value as a SCALAR_VALUE
4213 			 * (e.g. for XADD).
4214 			 * We must not allow unprivileged callers to do that
4215 			 * with spilled pointers.
4216 			 */
4217 			verbose(env, "leaking pointer from stack off %d\n",
4218 				off);
4219 			return -EACCES;
4220 		}
4221 	} else {
4222 		for (i = 0; i < size; i++) {
4223 			type = stype[(slot - i) % BPF_REG_SIZE];
4224 			if (type == STACK_MISC)
4225 				continue;
4226 			if (type == STACK_ZERO)
4227 				continue;
4228 			if (type == STACK_INVALID && env->allow_uninit_stack)
4229 				continue;
4230 			if (type == STACK_POISON) {
4231 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
4232 					off, i, size);
4233 			} else {
4234 				verbose(env, "invalid read from stack off %d+%d size %d\n",
4235 					off, i, size);
4236 			}
4237 			return -EACCES;
4238 		}
4239 		if (dst_regno >= 0)
4240 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4241 		insn_flags = 0; /* we are not restoring spilled register */
4242 	}
4243 	if (insn_flags)
4244 		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
4245 	return 0;
4246 }
4247 
4248 enum bpf_access_src {
4249 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4250 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
4251 };
4252 
4253 static int check_stack_range_initialized(struct bpf_verifier_env *env,
4254 					 int regno, int off, int access_size,
4255 					 bool zero_size_allowed,
4256 					 enum bpf_access_type type,
4257 					 struct bpf_call_arg_meta *meta);
4258 
4259 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4260 {
4261 	return cur_regs(env) + regno;
4262 }
4263 
4264 /* Read the stack at 'ptr_regno + off' and put the result into the register
4265  * 'dst_regno'.
4266  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4267  * but not its variable offset.
4268  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4269  *
4270  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4271  * filling registers (i.e. reads of spilled register cannot be detected when
4272  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4273  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4274  * offset; for a fixed offset check_stack_read_fixed_off should be used
4275  * instead.
4276  */
4277 static int check_stack_read_var_off(struct bpf_verifier_env *env,
4278 				    int ptr_regno, int off, int size, int dst_regno)
4279 {
4280 	/* The state of the source register. */
4281 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4282 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
4283 	int err;
4284 	int min_off, max_off;
4285 
4286 	/* Note that we pass a NULL meta, so raw access will not be permitted.
4287 	 */
4288 	err = check_stack_range_initialized(env, ptr_regno, off, size,
4289 					    false, BPF_READ, NULL);
4290 	if (err)
4291 		return err;
4292 
4293 	min_off = reg->smin_value + off;
4294 	max_off = reg->smax_value + off;
4295 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4296 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
4297 	return 0;
4298 }
4299 
4300 /* check_stack_read dispatches to check_stack_read_fixed_off or
4301  * check_stack_read_var_off.
4302  *
4303  * The caller must ensure that the offset falls within the allocated stack
4304  * bounds.
4305  *
4306  * 'dst_regno' is a register which will receive the value from the stack. It
4307  * can be -1, meaning that the read value is not going to a register.
4308  */
4309 static int check_stack_read(struct bpf_verifier_env *env,
4310 			    int ptr_regno, int off, int size,
4311 			    int dst_regno)
4312 {
4313 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4314 	struct bpf_func_state *state = bpf_func(env, reg);
4315 	int err;
4316 	/* Some accesses are only permitted with a static offset. */
4317 	bool var_off = !tnum_is_const(reg->var_off);
4318 
4319 	/* The offset is required to be static when reads don't go to a
4320 	 * register, in order to not leak pointers (see
4321 	 * check_stack_read_fixed_off).
4322 	 */
4323 	if (dst_regno < 0 && var_off) {
4324 		char tn_buf[48];
4325 
4326 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4327 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4328 			tn_buf, off, size);
4329 		return -EACCES;
4330 	}
4331 	/* Variable offset is prohibited for unprivileged mode for simplicity
4332 	 * since it requires corresponding support in Spectre masking for stack
4333 	 * ALU. See also retrieve_ptr_limit(). The check in
4334 	 * check_stack_access_for_ptr_arithmetic() called by
4335 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
4336 	 * with variable offsets, therefore no check is required here. Further,
4337 	 * just checking it here would be insufficient as speculative stack
4338 	 * writes could still lead to unsafe speculative behaviour.
4339 	 */
4340 	if (!var_off) {
4341 		off += reg->var_off.value;
4342 		err = check_stack_read_fixed_off(env, state, off, size,
4343 						 dst_regno);
4344 	} else {
4345 		/* Variable offset stack reads need more conservative handling
4346 		 * than fixed offset ones. Note that dst_regno >= 0 on this
4347 		 * branch.
4348 		 */
4349 		err = check_stack_read_var_off(env, ptr_regno, off, size,
4350 					       dst_regno);
4351 	}
4352 	return err;
4353 }
4354 
4355 
4356 /* check_stack_write dispatches to check_stack_write_fixed_off or
4357  * check_stack_write_var_off.
4358  *
4359  * 'ptr_regno' is the register used as a pointer into the stack.
4360  * 'value_regno' is the register whose value we're writing to the stack. It can
4361  * be -1, meaning that we're not writing from a register.
4362  *
4363  * The caller must ensure that the offset falls within the maximum stack size.
4364  */
4365 static int check_stack_write(struct bpf_verifier_env *env,
4366 			     int ptr_regno, int off, int size,
4367 			     int value_regno, int insn_idx)
4368 {
4369 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4370 	struct bpf_func_state *state = bpf_func(env, reg);
4371 	int err;
4372 
4373 	if (tnum_is_const(reg->var_off)) {
4374 		off += reg->var_off.value;
4375 		err = check_stack_write_fixed_off(env, state, off, size,
4376 						  value_regno, insn_idx);
4377 	} else {
4378 		/* Variable offset stack reads need more conservative handling
4379 		 * than fixed offset ones.
4380 		 */
4381 		err = check_stack_write_var_off(env, state,
4382 						ptr_regno, off, size,
4383 						value_regno, insn_idx);
4384 	}
4385 	return err;
4386 }
4387 
4388 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4389 				 int off, int size, enum bpf_access_type type)
4390 {
4391 	struct bpf_reg_state *reg = reg_state(env, regno);
4392 	struct bpf_map *map = reg->map_ptr;
4393 	u32 cap = bpf_map_flags_to_cap(map);
4394 
4395 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4396 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4397 			map->value_size, reg->smin_value + off, size);
4398 		return -EACCES;
4399 	}
4400 
4401 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4402 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4403 			map->value_size, reg->smin_value + off, size);
4404 		return -EACCES;
4405 	}
4406 
4407 	return 0;
4408 }
4409 
4410 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4411 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4412 			      int off, int size, u32 mem_size,
4413 			      bool zero_size_allowed)
4414 {
4415 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4416 	struct bpf_reg_state *reg;
4417 
4418 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4419 		return 0;
4420 
4421 	reg = &cur_regs(env)[regno];
4422 	switch (reg->type) {
4423 	case PTR_TO_MAP_KEY:
4424 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4425 			mem_size, off, size);
4426 		break;
4427 	case PTR_TO_MAP_VALUE:
4428 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4429 			mem_size, off, size);
4430 		break;
4431 	case PTR_TO_PACKET:
4432 	case PTR_TO_PACKET_META:
4433 	case PTR_TO_PACKET_END:
4434 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4435 			off, size, regno, reg->id, off, mem_size);
4436 		break;
4437 	case PTR_TO_CTX:
4438 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4439 			mem_size, off, size);
4440 		break;
4441 	case PTR_TO_MEM:
4442 	default:
4443 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4444 			mem_size, off, size);
4445 	}
4446 
4447 	return -EACCES;
4448 }
4449 
4450 /* check read/write into a memory region with possible variable offset */
4451 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4452 				   int off, int size, u32 mem_size,
4453 				   bool zero_size_allowed)
4454 {
4455 	struct bpf_verifier_state *vstate = env->cur_state;
4456 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4457 	struct bpf_reg_state *reg = &state->regs[regno];
4458 	int err;
4459 
4460 	/* We may have adjusted the register pointing to memory region, so we
4461 	 * need to try adding each of min_value and max_value to off
4462 	 * to make sure our theoretical access will be safe.
4463 	 *
4464 	 * The minimum value is only important with signed
4465 	 * comparisons where we can't assume the floor of a
4466 	 * value is 0.  If we are using signed variables for our
4467 	 * index'es we need to make sure that whatever we use
4468 	 * will have a set floor within our range.
4469 	 */
4470 	if (reg->smin_value < 0 &&
4471 	    (reg->smin_value == S64_MIN ||
4472 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4473 	      reg->smin_value + off < 0)) {
4474 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4475 			regno);
4476 		return -EACCES;
4477 	}
4478 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
4479 				 mem_size, zero_size_allowed);
4480 	if (err) {
4481 		verbose(env, "R%d min value is outside of the allowed memory range\n",
4482 			regno);
4483 		return err;
4484 	}
4485 
4486 	/* If we haven't set a max value then we need to bail since we can't be
4487 	 * sure we won't do bad things.
4488 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
4489 	 */
4490 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
4491 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
4492 			regno);
4493 		return -EACCES;
4494 	}
4495 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
4496 				 mem_size, zero_size_allowed);
4497 	if (err) {
4498 		verbose(env, "R%d max value is outside of the allowed memory range\n",
4499 			regno);
4500 		return err;
4501 	}
4502 
4503 	return 0;
4504 }
4505 
4506 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4507 			       const struct bpf_reg_state *reg, int regno,
4508 			       bool fixed_off_ok)
4509 {
4510 	/* Access to this pointer-typed register or passing it to a helper
4511 	 * is only allowed in its original, unmodified form.
4512 	 */
4513 
4514 	if (!tnum_is_const(reg->var_off)) {
4515 		char tn_buf[48];
4516 
4517 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4518 		verbose(env, "variable %s access var_off=%s disallowed\n",
4519 			reg_type_str(env, reg->type), tn_buf);
4520 		return -EACCES;
4521 	}
4522 
4523 	if (reg->smin_value < 0) {
4524 		verbose(env, "negative offset %s ptr R%d off=%lld disallowed\n",
4525 			reg_type_str(env, reg->type), regno, reg->var_off.value);
4526 		return -EACCES;
4527 	}
4528 
4529 	if (!fixed_off_ok && reg->var_off.value != 0) {
4530 		verbose(env, "dereference of modified %s ptr R%d off=%lld disallowed\n",
4531 			reg_type_str(env, reg->type), regno, reg->var_off.value);
4532 		return -EACCES;
4533 	}
4534 
4535 	return 0;
4536 }
4537 
4538 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4539 		             const struct bpf_reg_state *reg, int regno)
4540 {
4541 	return __check_ptr_off_reg(env, reg, regno, false);
4542 }
4543 
4544 static int map_kptr_match_type(struct bpf_verifier_env *env,
4545 			       struct btf_field *kptr_field,
4546 			       struct bpf_reg_state *reg, u32 regno)
4547 {
4548 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4549 	int perm_flags;
4550 	const char *reg_name = "";
4551 
4552 	if (base_type(reg->type) != PTR_TO_BTF_ID)
4553 		goto bad_type;
4554 
4555 	if (btf_is_kernel(reg->btf)) {
4556 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4557 
4558 		/* Only unreferenced case accepts untrusted pointers */
4559 		if (kptr_field->type == BPF_KPTR_UNREF)
4560 			perm_flags |= PTR_UNTRUSTED;
4561 	} else {
4562 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4563 		if (kptr_field->type == BPF_KPTR_PERCPU)
4564 			perm_flags |= MEM_PERCPU;
4565 	}
4566 
4567 	if (type_flag(reg->type) & ~perm_flags)
4568 		goto bad_type;
4569 
4570 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4571 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4572 
4573 	/* For ref_ptr case, release function check should ensure we get one
4574 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4575 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4576 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4577 	 * reg->ref_obj_id is not needed here.
4578 	 */
4579 	if (__check_ptr_off_reg(env, reg, regno, true))
4580 		return -EACCES;
4581 
4582 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4583 	 * we also need to take into account the reg->var_off.
4584 	 *
4585 	 * We want to support cases like:
4586 	 *
4587 	 * struct foo {
4588 	 *         struct bar br;
4589 	 *         struct baz bz;
4590 	 * };
4591 	 *
4592 	 * struct foo *v;
4593 	 * v = func();	      // PTR_TO_BTF_ID
4594 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4595 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4596 	 *                    // first member type of struct after comparison fails
4597 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4598 	 *                    // to match type
4599 	 *
4600 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4601 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4602 	 * the struct to match type against first member of struct, i.e. reject
4603 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4604 	 * strict mode to true for type match.
4605 	 */
4606 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4607 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4608 				  kptr_field->type != BPF_KPTR_UNREF))
4609 		goto bad_type;
4610 	return 0;
4611 bad_type:
4612 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4613 		reg_type_str(env, reg->type), reg_name);
4614 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4615 	if (kptr_field->type == BPF_KPTR_UNREF)
4616 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4617 			targ_name);
4618 	else
4619 		verbose(env, "\n");
4620 	return -EINVAL;
4621 }
4622 
4623 static bool in_sleepable(struct bpf_verifier_env *env)
4624 {
4625 	return env->cur_state->in_sleepable;
4626 }
4627 
4628 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4629  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4630  */
4631 static bool in_rcu_cs(struct bpf_verifier_env *env)
4632 {
4633 	return env->cur_state->active_rcu_locks ||
4634 	       env->cur_state->active_locks ||
4635 	       !in_sleepable(env);
4636 }
4637 
4638 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4639 BTF_SET_START(rcu_protected_types)
4640 #ifdef CONFIG_NET
4641 BTF_ID(struct, prog_test_ref_kfunc)
4642 #endif
4643 #ifdef CONFIG_CGROUPS
4644 BTF_ID(struct, cgroup)
4645 #endif
4646 #ifdef CONFIG_BPF_JIT
4647 BTF_ID(struct, bpf_cpumask)
4648 #endif
4649 BTF_ID(struct, task_struct)
4650 #ifdef CONFIG_CRYPTO
4651 BTF_ID(struct, bpf_crypto_ctx)
4652 #endif
4653 BTF_SET_END(rcu_protected_types)
4654 
4655 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4656 {
4657 	if (!btf_is_kernel(btf))
4658 		return true;
4659 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4660 }
4661 
4662 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4663 {
4664 	struct btf_struct_meta *meta;
4665 
4666 	if (btf_is_kernel(kptr_field->kptr.btf))
4667 		return NULL;
4668 
4669 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4670 				    kptr_field->kptr.btf_id);
4671 
4672 	return meta ? meta->record : NULL;
4673 }
4674 
4675 static bool rcu_safe_kptr(const struct btf_field *field)
4676 {
4677 	const struct btf_field_kptr *kptr = &field->kptr;
4678 
4679 	return field->type == BPF_KPTR_PERCPU ||
4680 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4681 }
4682 
4683 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4684 {
4685 	struct btf_record *rec;
4686 	u32 ret;
4687 
4688 	ret = PTR_MAYBE_NULL;
4689 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4690 		ret |= MEM_RCU;
4691 		if (kptr_field->type == BPF_KPTR_PERCPU)
4692 			ret |= MEM_PERCPU;
4693 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4694 			ret |= MEM_ALLOC;
4695 
4696 		rec = kptr_pointee_btf_record(kptr_field);
4697 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4698 			ret |= NON_OWN_REF;
4699 	} else {
4700 		ret |= PTR_UNTRUSTED;
4701 	}
4702 
4703 	return ret;
4704 }
4705 
4706 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4707 			    struct btf_field *field)
4708 {
4709 	struct bpf_reg_state *reg;
4710 	const struct btf_type *t;
4711 
4712 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4713 	mark_reg_known_zero(env, cur_regs(env), regno);
4714 	reg = reg_state(env, regno);
4715 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4716 	reg->mem_size = t->size;
4717 	reg->id = ++env->id_gen;
4718 
4719 	return 0;
4720 }
4721 
4722 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
4723 				 int value_regno, int insn_idx,
4724 				 struct btf_field *kptr_field)
4725 {
4726 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4727 	int class = BPF_CLASS(insn->code);
4728 	struct bpf_reg_state *val_reg;
4729 	int ret;
4730 
4731 	/* Things we already checked for in check_map_access and caller:
4732 	 *  - Reject cases where variable offset may touch kptr
4733 	 *  - size of access (must be BPF_DW)
4734 	 *  - tnum_is_const(reg->var_off)
4735 	 *  - kptr_field->offset == off + reg->var_off.value
4736 	 */
4737 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4738 	if (BPF_MODE(insn->code) != BPF_MEM) {
4739 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4740 		return -EACCES;
4741 	}
4742 
4743 	/* We only allow loading referenced kptr, since it will be marked as
4744 	 * untrusted, similar to unreferenced kptr.
4745 	 */
4746 	if (class != BPF_LDX &&
4747 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4748 		verbose(env, "store to referenced kptr disallowed\n");
4749 		return -EACCES;
4750 	}
4751 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4752 		verbose(env, "store to uptr disallowed\n");
4753 		return -EACCES;
4754 	}
4755 
4756 	if (class == BPF_LDX) {
4757 		if (kptr_field->type == BPF_UPTR)
4758 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4759 
4760 		/* We can simply mark the value_regno receiving the pointer
4761 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4762 		 */
4763 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4764 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4765 				      btf_ld_kptr_type(env, kptr_field));
4766 		if (ret < 0)
4767 			return ret;
4768 	} else if (class == BPF_STX) {
4769 		val_reg = reg_state(env, value_regno);
4770 		if (!bpf_register_is_null(val_reg) &&
4771 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4772 			return -EACCES;
4773 	} else if (class == BPF_ST) {
4774 		if (insn->imm) {
4775 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4776 				kptr_field->offset);
4777 			return -EACCES;
4778 		}
4779 	} else {
4780 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4781 		return -EACCES;
4782 	}
4783 	return 0;
4784 }
4785 
4786 /*
4787  * Return the size of the memory region accessible from a pointer to map value.
4788  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4789  */
4790 static u32 map_mem_size(const struct bpf_map *map)
4791 {
4792 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4793 		return map->max_entries * sizeof(long);
4794 
4795 	return map->value_size;
4796 }
4797 
4798 /* check read/write into a map element with possible variable offset */
4799 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
4800 			    int off, int size, bool zero_size_allowed,
4801 			    enum bpf_access_src src)
4802 {
4803 	struct bpf_verifier_state *vstate = env->cur_state;
4804 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4805 	struct bpf_reg_state *reg = &state->regs[regno];
4806 	struct bpf_map *map = reg->map_ptr;
4807 	u32 mem_size = map_mem_size(map);
4808 	struct btf_record *rec;
4809 	int err, i;
4810 
4811 	err = check_mem_region_access(env, regno, off, size, mem_size, zero_size_allowed);
4812 	if (err)
4813 		return err;
4814 
4815 	if (IS_ERR_OR_NULL(map->record))
4816 		return 0;
4817 	rec = map->record;
4818 	for (i = 0; i < rec->cnt; i++) {
4819 		struct btf_field *field = &rec->fields[i];
4820 		u32 p = field->offset;
4821 
4822 		/* If any part of a field  can be touched by load/store, reject
4823 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4824 		 * it is sufficient to check x1 < y2 && y1 < x2.
4825 		 */
4826 		if (reg->smin_value + off < p + field->size &&
4827 		    p < reg->umax_value + off + size) {
4828 			switch (field->type) {
4829 			case BPF_KPTR_UNREF:
4830 			case BPF_KPTR_REF:
4831 			case BPF_KPTR_PERCPU:
4832 			case BPF_UPTR:
4833 				if (src != ACCESS_DIRECT) {
4834 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4835 						btf_field_type_name(field->type));
4836 					return -EACCES;
4837 				}
4838 				if (!tnum_is_const(reg->var_off)) {
4839 					verbose(env, "%s access cannot have variable offset\n",
4840 						btf_field_type_name(field->type));
4841 					return -EACCES;
4842 				}
4843 				if (p != off + reg->var_off.value) {
4844 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4845 						btf_field_type_name(field->type),
4846 						p, off + reg->var_off.value);
4847 					return -EACCES;
4848 				}
4849 				if (size != bpf_size_to_bytes(BPF_DW)) {
4850 					verbose(env, "%s access size must be BPF_DW\n",
4851 						btf_field_type_name(field->type));
4852 					return -EACCES;
4853 				}
4854 				break;
4855 			default:
4856 				verbose(env, "%s cannot be accessed directly by load/store\n",
4857 					btf_field_type_name(field->type));
4858 				return -EACCES;
4859 			}
4860 		}
4861 	}
4862 	return 0;
4863 }
4864 
4865 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4866 			       const struct bpf_call_arg_meta *meta,
4867 			       enum bpf_access_type t)
4868 {
4869 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4870 
4871 	switch (prog_type) {
4872 	/* Program types only with direct read access go here! */
4873 	case BPF_PROG_TYPE_LWT_IN:
4874 	case BPF_PROG_TYPE_LWT_OUT:
4875 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4876 	case BPF_PROG_TYPE_SK_REUSEPORT:
4877 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4878 	case BPF_PROG_TYPE_CGROUP_SKB:
4879 		if (t == BPF_WRITE)
4880 			return false;
4881 		fallthrough;
4882 
4883 	/* Program types with direct read + write access go here! */
4884 	case BPF_PROG_TYPE_SCHED_CLS:
4885 	case BPF_PROG_TYPE_SCHED_ACT:
4886 	case BPF_PROG_TYPE_XDP:
4887 	case BPF_PROG_TYPE_LWT_XMIT:
4888 	case BPF_PROG_TYPE_SK_SKB:
4889 	case BPF_PROG_TYPE_SK_MSG:
4890 		if (meta)
4891 			return meta->pkt_access;
4892 
4893 		env->seen_direct_write = true;
4894 		return true;
4895 
4896 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4897 		if (t == BPF_WRITE)
4898 			env->seen_direct_write = true;
4899 
4900 		return true;
4901 
4902 	default:
4903 		return false;
4904 	}
4905 }
4906 
4907 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
4908 			       int size, bool zero_size_allowed)
4909 {
4910 	struct bpf_reg_state *reg = reg_state(env, regno);
4911 	int err;
4912 
4913 	if (reg->range < 0) {
4914 		verbose(env, "R%d offset is outside of the packet\n", regno);
4915 		return -EINVAL;
4916 	}
4917 
4918 	err = check_mem_region_access(env, regno, off, size, reg->range, zero_size_allowed);
4919 	if (err)
4920 		return err;
4921 
4922 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4923 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4924 	 * otherwise find_good_pkt_pointers would have refused to set range info
4925 	 * that __check_mem_access would have rejected this pkt access.
4926 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4927 	 */
4928 	env->prog->aux->max_pkt_offset =
4929 		max_t(u32, env->prog->aux->max_pkt_offset,
4930 		      off + reg->umax_value + size - 1);
4931 
4932 	return 0;
4933 }
4934 
4935 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4936 {
4937 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4938 }
4939 
4940 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4941 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4942 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4943 {
4944 	if (env->ops->is_valid_access &&
4945 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4946 		/* A non zero info.ctx_field_size indicates that this field is a
4947 		 * candidate for later verifier transformation to load the whole
4948 		 * field and then apply a mask when accessed with a narrower
4949 		 * access than actual ctx access size. A zero info.ctx_field_size
4950 		 * will only allow for whole field access and rejects any other
4951 		 * type of narrower access.
4952 		 */
4953 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4954 			if (info->ref_obj_id &&
4955 			    !find_reference_state(env->cur_state, info->ref_obj_id)) {
4956 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4957 					off);
4958 				return -EACCES;
4959 			}
4960 		} else {
4961 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4962 		}
4963 		/* remember the offset of last byte accessed in ctx */
4964 		if (env->prog->aux->max_ctx_offset < off + size)
4965 			env->prog->aux->max_ctx_offset = off + size;
4966 		return 0;
4967 	}
4968 
4969 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4970 	return -EACCES;
4971 }
4972 
4973 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4974 			    int off, int access_size, enum bpf_access_type t,
4975 			    struct bpf_insn_access_aux *info)
4976 {
4977 	/*
4978 	 * Program types that don't rewrite ctx accesses can safely
4979 	 * dereference ctx pointers with fixed offsets.
4980 	 */
4981 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4982 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4983 	struct bpf_reg_state *regs = cur_regs(env);
4984 	struct bpf_reg_state *reg = regs + regno;
4985 	int err;
4986 
4987 	if (var_off_ok)
4988 		err = check_mem_region_access(env, regno, off, access_size, U16_MAX, false);
4989 	else
4990 		err = __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
4991 	if (err)
4992 		return err;
4993 	off += reg->umax_value;
4994 
4995 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4996 	if (err)
4997 		verbose_linfo(env, insn_idx, "; ");
4998 	return err;
4999 }
5000 
5001 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
5002 				  int size)
5003 {
5004 	if (size < 0 || off < 0 ||
5005 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
5006 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
5007 			off, size);
5008 		return -EACCES;
5009 	}
5010 	return 0;
5011 }
5012 
5013 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
5014 			     u32 regno, int off, int size,
5015 			     enum bpf_access_type t)
5016 {
5017 	struct bpf_reg_state *reg = reg_state(env, regno);
5018 	struct bpf_insn_access_aux info = {};
5019 	bool valid;
5020 
5021 	if (reg->smin_value < 0) {
5022 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5023 			regno);
5024 		return -EACCES;
5025 	}
5026 
5027 	switch (reg->type) {
5028 	case PTR_TO_SOCK_COMMON:
5029 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
5030 		break;
5031 	case PTR_TO_SOCKET:
5032 		valid = bpf_sock_is_valid_access(off, size, t, &info);
5033 		break;
5034 	case PTR_TO_TCP_SOCK:
5035 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
5036 		break;
5037 	case PTR_TO_XDP_SOCK:
5038 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
5039 		break;
5040 	default:
5041 		valid = false;
5042 	}
5043 
5044 
5045 	if (valid) {
5046 		env->insn_aux_data[insn_idx].ctx_field_size =
5047 			info.ctx_field_size;
5048 		return 0;
5049 	}
5050 
5051 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
5052 		regno, reg_type_str(env, reg->type), off, size);
5053 
5054 	return -EACCES;
5055 }
5056 
5057 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
5058 {
5059 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
5060 }
5061 
5062 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
5063 {
5064 	const struct bpf_reg_state *reg = reg_state(env, regno);
5065 
5066 	return reg->type == PTR_TO_CTX;
5067 }
5068 
5069 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
5070 {
5071 	const struct bpf_reg_state *reg = reg_state(env, regno);
5072 
5073 	return type_is_sk_pointer(reg->type);
5074 }
5075 
5076 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
5077 {
5078 	const struct bpf_reg_state *reg = reg_state(env, regno);
5079 
5080 	return type_is_pkt_pointer(reg->type);
5081 }
5082 
5083 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
5084 {
5085 	const struct bpf_reg_state *reg = reg_state(env, regno);
5086 
5087 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5088 	return reg->type == PTR_TO_FLOW_KEYS;
5089 }
5090 
5091 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
5092 {
5093 	const struct bpf_reg_state *reg = reg_state(env, regno);
5094 
5095 	return reg->type == PTR_TO_ARENA;
5096 }
5097 
5098 /* Return false if @regno contains a pointer whose type isn't supported for
5099  * atomic instruction @insn.
5100  */
5101 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
5102 			       struct bpf_insn *insn)
5103 {
5104 	if (is_ctx_reg(env, regno))
5105 		return false;
5106 	if (is_pkt_reg(env, regno))
5107 		return false;
5108 	if (is_flow_key_reg(env, regno))
5109 		return false;
5110 	if (is_sk_reg(env, regno))
5111 		return false;
5112 	if (is_arena_reg(env, regno))
5113 		return bpf_jit_supports_insn(insn, true);
5114 
5115 	return true;
5116 }
5117 
5118 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5119 #ifdef CONFIG_NET
5120 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5121 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5122 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5123 #endif
5124 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
5125 };
5126 
5127 static bool is_trusted_reg(const struct bpf_reg_state *reg)
5128 {
5129 	/* A referenced register is always trusted. */
5130 	if (reg->ref_obj_id)
5131 		return true;
5132 
5133 	/* Types listed in the reg2btf_ids are always trusted */
5134 	if (reg2btf_ids[base_type(reg->type)] &&
5135 	    !bpf_type_has_unsafe_modifiers(reg->type))
5136 		return true;
5137 
5138 	/* If a register is not referenced, it is trusted if it has the
5139 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5140 	 * other type modifiers may be safe, but we elect to take an opt-in
5141 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5142 	 * not.
5143 	 *
5144 	 * Eventually, we should make PTR_TRUSTED the single source of truth
5145 	 * for whether a register is trusted.
5146 	 */
5147 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5148 	       !bpf_type_has_unsafe_modifiers(reg->type);
5149 }
5150 
5151 static bool is_rcu_reg(const struct bpf_reg_state *reg)
5152 {
5153 	return reg->type & MEM_RCU;
5154 }
5155 
5156 static void clear_trusted_flags(enum bpf_type_flag *flag)
5157 {
5158 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
5159 }
5160 
5161 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5162 				   const struct bpf_reg_state *reg,
5163 				   int off, int size, bool strict)
5164 {
5165 	struct tnum reg_off;
5166 	int ip_align;
5167 
5168 	/* Byte size accesses are always allowed. */
5169 	if (!strict || size == 1)
5170 		return 0;
5171 
5172 	/* For platforms that do not have a Kconfig enabling
5173 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5174 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
5175 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5176 	 * to this code only in strict mode where we want to emulate
5177 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
5178 	 * unconditional IP align value of '2'.
5179 	 */
5180 	ip_align = 2;
5181 
5182 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
5183 	if (!tnum_is_aligned(reg_off, size)) {
5184 		char tn_buf[48];
5185 
5186 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5187 		verbose(env,
5188 			"misaligned packet access off %d+%s+%d size %d\n",
5189 			ip_align, tn_buf, off, size);
5190 		return -EACCES;
5191 	}
5192 
5193 	return 0;
5194 }
5195 
5196 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5197 				       const struct bpf_reg_state *reg,
5198 				       const char *pointer_desc,
5199 				       int off, int size, bool strict)
5200 {
5201 	struct tnum reg_off;
5202 
5203 	/* Byte size accesses are always allowed. */
5204 	if (!strict || size == 1)
5205 		return 0;
5206 
5207 	reg_off = tnum_add(reg->var_off, tnum_const(off));
5208 	if (!tnum_is_aligned(reg_off, size)) {
5209 		char tn_buf[48];
5210 
5211 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5212 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
5213 			pointer_desc, tn_buf, off, size);
5214 		return -EACCES;
5215 	}
5216 
5217 	return 0;
5218 }
5219 
5220 static int check_ptr_alignment(struct bpf_verifier_env *env,
5221 			       const struct bpf_reg_state *reg, int off,
5222 			       int size, bool strict_alignment_once)
5223 {
5224 	bool strict = env->strict_alignment || strict_alignment_once;
5225 	const char *pointer_desc = "";
5226 
5227 	switch (reg->type) {
5228 	case PTR_TO_PACKET:
5229 	case PTR_TO_PACKET_META:
5230 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
5231 		 * right in front, treat it the very same way.
5232 		 */
5233 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
5234 	case PTR_TO_FLOW_KEYS:
5235 		pointer_desc = "flow keys ";
5236 		break;
5237 	case PTR_TO_MAP_KEY:
5238 		pointer_desc = "key ";
5239 		break;
5240 	case PTR_TO_MAP_VALUE:
5241 		pointer_desc = "value ";
5242 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
5243 			strict = true;
5244 		break;
5245 	case PTR_TO_CTX:
5246 		pointer_desc = "context ";
5247 		break;
5248 	case PTR_TO_STACK:
5249 		pointer_desc = "stack ";
5250 		/* The stack spill tracking logic in check_stack_write_fixed_off()
5251 		 * and check_stack_read_fixed_off() relies on stack accesses being
5252 		 * aligned.
5253 		 */
5254 		strict = true;
5255 		break;
5256 	case PTR_TO_SOCKET:
5257 		pointer_desc = "sock ";
5258 		break;
5259 	case PTR_TO_SOCK_COMMON:
5260 		pointer_desc = "sock_common ";
5261 		break;
5262 	case PTR_TO_TCP_SOCK:
5263 		pointer_desc = "tcp_sock ";
5264 		break;
5265 	case PTR_TO_XDP_SOCK:
5266 		pointer_desc = "xdp_sock ";
5267 		break;
5268 	case PTR_TO_ARENA:
5269 		return 0;
5270 	default:
5271 		break;
5272 	}
5273 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5274 					   strict);
5275 }
5276 
5277 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5278 {
5279 	if (!bpf_jit_supports_private_stack())
5280 		return NO_PRIV_STACK;
5281 
5282 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5283 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5284 	 * explicitly.
5285 	 */
5286 	switch (prog->type) {
5287 	case BPF_PROG_TYPE_KPROBE:
5288 	case BPF_PROG_TYPE_TRACEPOINT:
5289 	case BPF_PROG_TYPE_PERF_EVENT:
5290 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5291 		return PRIV_STACK_ADAPTIVE;
5292 	case BPF_PROG_TYPE_TRACING:
5293 	case BPF_PROG_TYPE_LSM:
5294 	case BPF_PROG_TYPE_STRUCT_OPS:
5295 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5296 			return PRIV_STACK_ADAPTIVE;
5297 		fallthrough;
5298 	default:
5299 		break;
5300 	}
5301 
5302 	return NO_PRIV_STACK;
5303 }
5304 
5305 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5306 {
5307 	if (env->prog->jit_requested)
5308 		return round_up(stack_depth, 16);
5309 
5310 	/* round up to 32-bytes, since this is granularity
5311 	 * of interpreter stack size
5312 	 */
5313 	return round_up(max_t(u32, stack_depth, 1), 32);
5314 }
5315 
5316 /* temporary state used for call frame depth calculation */
5317 struct bpf_subprog_call_depth_info {
5318 	int ret_insn; /* caller instruction where we return to. */
5319 	int caller; /* caller subprogram idx */
5320 	int frame; /* # of consecutive static call stack frames on top of stack */
5321 };
5322 
5323 /* starting from main bpf function walk all instructions of the function
5324  * and recursively walk all callees that given function can call.
5325  * Ignore jump and exit insns.
5326  */
5327 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5328 					 struct bpf_subprog_call_depth_info *dinfo,
5329 					 bool priv_stack_supported)
5330 {
5331 	struct bpf_subprog_info *subprog = env->subprog_info;
5332 	struct bpf_insn *insn = env->prog->insnsi;
5333 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5334 	bool tail_call_reachable = false;
5335 	int total;
5336 	int tmp;
5337 
5338 	/* no caller idx */
5339 	dinfo[idx].caller = -1;
5340 
5341 	i = subprog[idx].start;
5342 	if (!priv_stack_supported)
5343 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5344 process_func:
5345 	/* protect against potential stack overflow that might happen when
5346 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5347 	 * depth for such case down to 256 so that the worst case scenario
5348 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5349 	 * 8k).
5350 	 *
5351 	 * To get the idea what might happen, see an example:
5352 	 * func1 -> sub rsp, 128
5353 	 *  subfunc1 -> sub rsp, 256
5354 	 *  tailcall1 -> add rsp, 256
5355 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5356 	 *   subfunc2 -> sub rsp, 64
5357 	 *   subfunc22 -> sub rsp, 128
5358 	 *   tailcall2 -> add rsp, 128
5359 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5360 	 *
5361 	 * tailcall will unwind the current stack frame but it will not get rid
5362 	 * of caller's stack as shown on the example above.
5363 	 */
5364 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5365 		verbose(env,
5366 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5367 			depth);
5368 		return -EACCES;
5369 	}
5370 
5371 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5372 	if (priv_stack_supported) {
5373 		/* Request private stack support only if the subprog stack
5374 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5375 		 * avoid jit penalty if the stack usage is small.
5376 		 */
5377 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5378 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5379 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5380 	}
5381 
5382 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5383 		if (subprog_depth > MAX_BPF_STACK) {
5384 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5385 				idx, subprog_depth);
5386 			return -EACCES;
5387 		}
5388 	} else {
5389 		depth += subprog_depth;
5390 		if (depth > MAX_BPF_STACK) {
5391 			total = 0;
5392 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5393 				total++;
5394 
5395 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5396 				total, depth);
5397 			return -EACCES;
5398 		}
5399 	}
5400 continue_func:
5401 	subprog_end = subprog[idx + 1].start;
5402 	for (; i < subprog_end; i++) {
5403 		int next_insn, sidx;
5404 
5405 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5406 			bool err = false;
5407 
5408 			if (!is_bpf_throw_kfunc(insn + i))
5409 				continue;
5410 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5411 				if (subprog[tmp].is_cb) {
5412 					err = true;
5413 					break;
5414 				}
5415 			}
5416 			if (!err)
5417 				continue;
5418 			verbose(env,
5419 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5420 				i, idx);
5421 			return -EINVAL;
5422 		}
5423 
5424 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5425 			continue;
5426 		/* remember insn and function to return to */
5427 
5428 		/* find the callee */
5429 		next_insn = i + insn[i].imm + 1;
5430 		sidx = bpf_find_subprog(env, next_insn);
5431 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5432 			return -EFAULT;
5433 		if (subprog[sidx].is_async_cb) {
5434 			if (subprog[sidx].has_tail_call) {
5435 				verifier_bug(env, "subprog has tail_call and async cb");
5436 				return -EFAULT;
5437 			}
5438 			/* async callbacks don't increase bpf prog stack size unless called directly */
5439 			if (!bpf_pseudo_call(insn + i))
5440 				continue;
5441 			if (subprog[sidx].is_exception_cb) {
5442 				verbose(env, "insn %d cannot call exception cb directly", i);
5443 				return -EINVAL;
5444 			}
5445 		}
5446 
5447 		/* store caller info for after we return from callee */
5448 		dinfo[idx].frame = frame;
5449 		dinfo[idx].ret_insn = i + 1;
5450 
5451 		/* push caller idx into callee's dinfo */
5452 		dinfo[sidx].caller = idx;
5453 
5454 		i = next_insn;
5455 
5456 		idx = sidx;
5457 		if (!priv_stack_supported)
5458 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5459 
5460 		if (subprog[idx].has_tail_call)
5461 			tail_call_reachable = true;
5462 
5463 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5464 		if (frame >= MAX_CALL_FRAMES) {
5465 			verbose(env, "the call stack of %d frames is too deep !\n",
5466 				frame);
5467 			return -E2BIG;
5468 		}
5469 		goto process_func;
5470 	}
5471 	/* if tail call got detected across bpf2bpf calls then mark each of the
5472 	 * currently present subprog frames as tail call reachable subprogs;
5473 	 * this info will be utilized by JIT so that we will be preserving the
5474 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5475 	 */
5476 	if (tail_call_reachable)
5477 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5478 			if (subprog[tmp].is_exception_cb) {
5479 				verbose(env, "cannot tail call within exception cb\n");
5480 				return -EINVAL;
5481 			}
5482 			subprog[tmp].tail_call_reachable = true;
5483 		}
5484 	if (subprog[0].tail_call_reachable)
5485 		env->prog->aux->tail_call_reachable = true;
5486 
5487 	/* end of for() loop means the last insn of the 'subprog'
5488 	 * was reached. Doesn't matter whether it was JA or EXIT
5489 	 */
5490 	if (frame == 0 && dinfo[idx].caller < 0)
5491 		return 0;
5492 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5493 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5494 
5495 	/* pop caller idx from callee */
5496 	idx = dinfo[idx].caller;
5497 
5498 	/* retrieve caller state from its frame */
5499 	frame = dinfo[idx].frame;
5500 	i = dinfo[idx].ret_insn;
5501 
5502 	goto continue_func;
5503 }
5504 
5505 static int check_max_stack_depth(struct bpf_verifier_env *env)
5506 {
5507 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5508 	struct bpf_subprog_call_depth_info *dinfo;
5509 	struct bpf_subprog_info *si = env->subprog_info;
5510 	bool priv_stack_supported;
5511 	int ret;
5512 
5513 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5514 	if (!dinfo)
5515 		return -ENOMEM;
5516 
5517 	for (int i = 0; i < env->subprog_cnt; i++) {
5518 		if (si[i].has_tail_call) {
5519 			priv_stack_mode = NO_PRIV_STACK;
5520 			break;
5521 		}
5522 	}
5523 
5524 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5525 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5526 
5527 	/* All async_cb subprogs use normal kernel stack. If a particular
5528 	 * subprog appears in both main prog and async_cb subtree, that
5529 	 * subprog will use normal kernel stack to avoid potential nesting.
5530 	 * The reverse subprog traversal ensures when main prog subtree is
5531 	 * checked, the subprogs appearing in async_cb subtrees are already
5532 	 * marked as using normal kernel stack, so stack size checking can
5533 	 * be done properly.
5534 	 */
5535 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5536 		if (!i || si[i].is_async_cb) {
5537 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5538 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5539 					priv_stack_supported);
5540 			if (ret < 0) {
5541 				kvfree(dinfo);
5542 				return ret;
5543 			}
5544 		}
5545 	}
5546 
5547 	for (int i = 0; i < env->subprog_cnt; i++) {
5548 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5549 			env->prog->aux->jits_use_priv_stack = true;
5550 			break;
5551 		}
5552 	}
5553 
5554 	kvfree(dinfo);
5555 
5556 	return 0;
5557 }
5558 
5559 static int __check_buffer_access(struct bpf_verifier_env *env,
5560 				 const char *buf_info,
5561 				 const struct bpf_reg_state *reg,
5562 				 int regno, int off, int size)
5563 {
5564 	if (off < 0) {
5565 		verbose(env,
5566 			"R%d invalid %s buffer access: off=%d, size=%d\n",
5567 			regno, buf_info, off, size);
5568 		return -EACCES;
5569 	}
5570 	if (!tnum_is_const(reg->var_off)) {
5571 		char tn_buf[48];
5572 
5573 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5574 		verbose(env,
5575 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
5576 			regno, off, tn_buf);
5577 		return -EACCES;
5578 	}
5579 
5580 	return 0;
5581 }
5582 
5583 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5584 				  const struct bpf_reg_state *reg,
5585 				  int regno, int off, int size)
5586 {
5587 	int err;
5588 
5589 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5590 	if (err)
5591 		return err;
5592 
5593 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5594 					    env->prog->aux->max_tp_access);
5595 
5596 	return 0;
5597 }
5598 
5599 static int check_buffer_access(struct bpf_verifier_env *env,
5600 			       const struct bpf_reg_state *reg,
5601 			       int regno, int off, int size,
5602 			       bool zero_size_allowed,
5603 			       u32 *max_access)
5604 {
5605 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5606 	int err;
5607 
5608 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5609 	if (err)
5610 		return err;
5611 
5612 	*max_access = max(reg->var_off.value + off + size, *max_access);
5613 
5614 	return 0;
5615 }
5616 
5617 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5618 static void zext_32_to_64(struct bpf_reg_state *reg)
5619 {
5620 	reg->var_off = tnum_subreg(reg->var_off);
5621 	__reg_assign_32_into_64(reg);
5622 }
5623 
5624 /* truncate register to smaller size (in bytes)
5625  * must be called with size < BPF_REG_SIZE
5626  */
5627 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5628 {
5629 	u64 mask;
5630 
5631 	/* clear high bits in bit representation */
5632 	reg->var_off = tnum_cast(reg->var_off, size);
5633 
5634 	/* fix arithmetic bounds */
5635 	mask = ((u64)1 << (size * 8)) - 1;
5636 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5637 		reg->umin_value &= mask;
5638 		reg->umax_value &= mask;
5639 	} else {
5640 		reg->umin_value = 0;
5641 		reg->umax_value = mask;
5642 	}
5643 	reg->smin_value = reg->umin_value;
5644 	reg->smax_value = reg->umax_value;
5645 
5646 	/* If size is smaller than 32bit register the 32bit register
5647 	 * values are also truncated so we push 64-bit bounds into
5648 	 * 32-bit bounds. Above were truncated < 32-bits already.
5649 	 */
5650 	if (size < 4)
5651 		__mark_reg32_unbounded(reg);
5652 
5653 	reg_bounds_sync(reg);
5654 }
5655 
5656 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5657 {
5658 	if (size == 1) {
5659 		reg->smin_value = reg->s32_min_value = S8_MIN;
5660 		reg->smax_value = reg->s32_max_value = S8_MAX;
5661 	} else if (size == 2) {
5662 		reg->smin_value = reg->s32_min_value = S16_MIN;
5663 		reg->smax_value = reg->s32_max_value = S16_MAX;
5664 	} else {
5665 		/* size == 4 */
5666 		reg->smin_value = reg->s32_min_value = S32_MIN;
5667 		reg->smax_value = reg->s32_max_value = S32_MAX;
5668 	}
5669 	reg->umin_value = reg->u32_min_value = 0;
5670 	reg->umax_value = U64_MAX;
5671 	reg->u32_max_value = U32_MAX;
5672 	reg->var_off = tnum_unknown;
5673 }
5674 
5675 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5676 {
5677 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5678 	u64 top_smax_value, top_smin_value;
5679 	u64 num_bits = size * 8;
5680 
5681 	if (tnum_is_const(reg->var_off)) {
5682 		u64_cval = reg->var_off.value;
5683 		if (size == 1)
5684 			reg->var_off = tnum_const((s8)u64_cval);
5685 		else if (size == 2)
5686 			reg->var_off = tnum_const((s16)u64_cval);
5687 		else
5688 			/* size == 4 */
5689 			reg->var_off = tnum_const((s32)u64_cval);
5690 
5691 		u64_cval = reg->var_off.value;
5692 		reg->smax_value = reg->smin_value = u64_cval;
5693 		reg->umax_value = reg->umin_value = u64_cval;
5694 		reg->s32_max_value = reg->s32_min_value = u64_cval;
5695 		reg->u32_max_value = reg->u32_min_value = u64_cval;
5696 		return;
5697 	}
5698 
5699 	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
5700 	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
5701 
5702 	if (top_smax_value != top_smin_value)
5703 		goto out;
5704 
5705 	/* find the s64_min and s64_min after sign extension */
5706 	if (size == 1) {
5707 		init_s64_max = (s8)reg->smax_value;
5708 		init_s64_min = (s8)reg->smin_value;
5709 	} else if (size == 2) {
5710 		init_s64_max = (s16)reg->smax_value;
5711 		init_s64_min = (s16)reg->smin_value;
5712 	} else {
5713 		init_s64_max = (s32)reg->smax_value;
5714 		init_s64_min = (s32)reg->smin_value;
5715 	}
5716 
5717 	s64_max = max(init_s64_max, init_s64_min);
5718 	s64_min = min(init_s64_max, init_s64_min);
5719 
5720 	/* both of s64_max/s64_min positive or negative */
5721 	if ((s64_max >= 0) == (s64_min >= 0)) {
5722 		reg->s32_min_value = reg->smin_value = s64_min;
5723 		reg->s32_max_value = reg->smax_value = s64_max;
5724 		reg->u32_min_value = reg->umin_value = s64_min;
5725 		reg->u32_max_value = reg->umax_value = s64_max;
5726 		reg->var_off = tnum_range(s64_min, s64_max);
5727 		return;
5728 	}
5729 
5730 out:
5731 	set_sext64_default_val(reg, size);
5732 }
5733 
5734 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5735 {
5736 	if (size == 1) {
5737 		reg->s32_min_value = S8_MIN;
5738 		reg->s32_max_value = S8_MAX;
5739 	} else {
5740 		/* size == 2 */
5741 		reg->s32_min_value = S16_MIN;
5742 		reg->s32_max_value = S16_MAX;
5743 	}
5744 	reg->u32_min_value = 0;
5745 	reg->u32_max_value = U32_MAX;
5746 	reg->var_off = tnum_subreg(tnum_unknown);
5747 }
5748 
5749 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5750 {
5751 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5752 	u32 top_smax_value, top_smin_value;
5753 	u32 num_bits = size * 8;
5754 
5755 	if (tnum_is_const(reg->var_off)) {
5756 		u32_val = reg->var_off.value;
5757 		if (size == 1)
5758 			reg->var_off = tnum_const((s8)u32_val);
5759 		else
5760 			reg->var_off = tnum_const((s16)u32_val);
5761 
5762 		u32_val = reg->var_off.value;
5763 		reg->s32_min_value = reg->s32_max_value = u32_val;
5764 		reg->u32_min_value = reg->u32_max_value = u32_val;
5765 		return;
5766 	}
5767 
5768 	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
5769 	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
5770 
5771 	if (top_smax_value != top_smin_value)
5772 		goto out;
5773 
5774 	/* find the s32_min and s32_min after sign extension */
5775 	if (size == 1) {
5776 		init_s32_max = (s8)reg->s32_max_value;
5777 		init_s32_min = (s8)reg->s32_min_value;
5778 	} else {
5779 		/* size == 2 */
5780 		init_s32_max = (s16)reg->s32_max_value;
5781 		init_s32_min = (s16)reg->s32_min_value;
5782 	}
5783 	s32_max = max(init_s32_max, init_s32_min);
5784 	s32_min = min(init_s32_max, init_s32_min);
5785 
5786 	if ((s32_min >= 0) == (s32_max >= 0)) {
5787 		reg->s32_min_value = s32_min;
5788 		reg->s32_max_value = s32_max;
5789 		reg->u32_min_value = (u32)s32_min;
5790 		reg->u32_max_value = (u32)s32_max;
5791 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5792 		return;
5793 	}
5794 
5795 out:
5796 	set_sext32_default_val(reg, size);
5797 }
5798 
5799 bool bpf_map_is_rdonly(const struct bpf_map *map)
5800 {
5801 	/* A map is considered read-only if the following condition are true:
5802 	 *
5803 	 * 1) BPF program side cannot change any of the map content. The
5804 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5805 	 *    and was set at map creation time.
5806 	 * 2) The map value(s) have been initialized from user space by a
5807 	 *    loader and then "frozen", such that no new map update/delete
5808 	 *    operations from syscall side are possible for the rest of
5809 	 *    the map's lifetime from that point onwards.
5810 	 * 3) Any parallel/pending map update/delete operations from syscall
5811 	 *    side have been completed. Only after that point, it's safe to
5812 	 *    assume that map value(s) are immutable.
5813 	 */
5814 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5815 	       READ_ONCE(map->frozen) &&
5816 	       !bpf_map_write_active(map);
5817 }
5818 
5819 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5820 			bool is_ldsx)
5821 {
5822 	void *ptr;
5823 	u64 addr;
5824 	int err;
5825 
5826 	err = map->ops->map_direct_value_addr(map, &addr, off);
5827 	if (err)
5828 		return err;
5829 	ptr = (void *)(long)addr + off;
5830 
5831 	switch (size) {
5832 	case sizeof(u8):
5833 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5834 		break;
5835 	case sizeof(u16):
5836 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5837 		break;
5838 	case sizeof(u32):
5839 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5840 		break;
5841 	case sizeof(u64):
5842 		*val = *(u64 *)ptr;
5843 		break;
5844 	default:
5845 		return -EINVAL;
5846 	}
5847 	return 0;
5848 }
5849 
5850 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5851 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5852 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5853 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5854 
5855 /*
5856  * Allow list few fields as RCU trusted or full trusted.
5857  * This logic doesn't allow mix tagging and will be removed once GCC supports
5858  * btf_type_tag.
5859  */
5860 
5861 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5862 BTF_TYPE_SAFE_RCU(struct task_struct) {
5863 	const cpumask_t *cpus_ptr;
5864 	struct css_set __rcu *cgroups;
5865 	struct task_struct __rcu *real_parent;
5866 	struct task_struct *group_leader;
5867 };
5868 
5869 BTF_TYPE_SAFE_RCU(struct cgroup) {
5870 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5871 	struct kernfs_node *kn;
5872 };
5873 
5874 BTF_TYPE_SAFE_RCU(struct css_set) {
5875 	struct cgroup *dfl_cgrp;
5876 };
5877 
5878 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5879 	struct cgroup *cgroup;
5880 };
5881 
5882 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5883 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5884 	struct file __rcu *exe_file;
5885 #ifdef CONFIG_MEMCG
5886 	struct task_struct __rcu *owner;
5887 #endif
5888 };
5889 
5890 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5891  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5892  */
5893 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5894 	struct sock *sk;
5895 };
5896 
5897 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5898 	struct sock *sk;
5899 };
5900 
5901 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5902 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5903 	struct seq_file *seq;
5904 };
5905 
5906 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5907 	struct bpf_iter_meta *meta;
5908 	struct task_struct *task;
5909 };
5910 
5911 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5912 	struct file *file;
5913 };
5914 
5915 BTF_TYPE_SAFE_TRUSTED(struct file) {
5916 	struct inode *f_inode;
5917 };
5918 
5919 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5920 	struct inode *d_inode;
5921 };
5922 
5923 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5924 	struct sock *sk;
5925 };
5926 
5927 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5928 	struct mm_struct *vm_mm;
5929 	struct file *vm_file;
5930 };
5931 
5932 static bool type_is_rcu(struct bpf_verifier_env *env,
5933 			struct bpf_reg_state *reg,
5934 			const char *field_name, u32 btf_id)
5935 {
5936 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5937 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5938 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5939 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5940 
5941 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5942 }
5943 
5944 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5945 				struct bpf_reg_state *reg,
5946 				const char *field_name, u32 btf_id)
5947 {
5948 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5949 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5950 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5951 
5952 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5953 }
5954 
5955 static bool type_is_trusted(struct bpf_verifier_env *env,
5956 			    struct bpf_reg_state *reg,
5957 			    const char *field_name, u32 btf_id)
5958 {
5959 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5960 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5961 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5962 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5963 
5964 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5965 }
5966 
5967 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5968 				    struct bpf_reg_state *reg,
5969 				    const char *field_name, u32 btf_id)
5970 {
5971 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5972 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5973 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5974 
5975 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5976 					  "__safe_trusted_or_null");
5977 }
5978 
5979 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5980 				   struct bpf_reg_state *regs,
5981 				   int regno, int off, int size,
5982 				   enum bpf_access_type atype,
5983 				   int value_regno)
5984 {
5985 	struct bpf_reg_state *reg = regs + regno;
5986 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5987 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5988 	const char *field_name = NULL;
5989 	enum bpf_type_flag flag = 0;
5990 	u32 btf_id = 0;
5991 	int ret;
5992 
5993 	if (!env->allow_ptr_leaks) {
5994 		verbose(env,
5995 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5996 			tname);
5997 		return -EPERM;
5998 	}
5999 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
6000 		verbose(env,
6001 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6002 			tname);
6003 		return -EINVAL;
6004 	}
6005 
6006 	if (!tnum_is_const(reg->var_off)) {
6007 		char tn_buf[48];
6008 
6009 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6010 		verbose(env,
6011 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6012 			regno, tname, off, tn_buf);
6013 		return -EACCES;
6014 	}
6015 
6016 	off += reg->var_off.value;
6017 
6018 	if (off < 0) {
6019 		verbose(env,
6020 			"R%d is ptr_%s invalid negative access: off=%d\n",
6021 			regno, tname, off);
6022 		return -EACCES;
6023 	}
6024 
6025 	if (reg->type & MEM_USER) {
6026 		verbose(env,
6027 			"R%d is ptr_%s access user memory: off=%d\n",
6028 			regno, tname, off);
6029 		return -EACCES;
6030 	}
6031 
6032 	if (reg->type & MEM_PERCPU) {
6033 		verbose(env,
6034 			"R%d is ptr_%s access percpu memory: off=%d\n",
6035 			regno, tname, off);
6036 		return -EACCES;
6037 	}
6038 
6039 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
6040 		if (!btf_is_kernel(reg->btf)) {
6041 			verifier_bug(env, "reg->btf must be kernel btf");
6042 			return -EFAULT;
6043 		}
6044 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6045 	} else {
6046 		/* Writes are permitted with default btf_struct_access for
6047 		 * program allocated objects (which always have ref_obj_id > 0),
6048 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6049 		 */
6050 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
6051 			verbose(env, "only read is supported\n");
6052 			return -EACCES;
6053 		}
6054 
6055 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
6056 		    !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
6057 			verifier_bug(env, "ref_obj_id for allocated object must be non-zero");
6058 			return -EFAULT;
6059 		}
6060 
6061 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
6062 	}
6063 
6064 	if (ret < 0)
6065 		return ret;
6066 
6067 	if (ret != PTR_TO_BTF_ID) {
6068 		/* just mark; */
6069 
6070 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
6071 		/* If this is an untrusted pointer, all pointers formed by walking it
6072 		 * also inherit the untrusted flag.
6073 		 */
6074 		flag = PTR_UNTRUSTED;
6075 
6076 	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
6077 		/* By default any pointer obtained from walking a trusted pointer is no
6078 		 * longer trusted, unless the field being accessed has explicitly been
6079 		 * marked as inheriting its parent's state of trust (either full or RCU).
6080 		 * For example:
6081 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
6082 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
6083 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6084 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6085 		 *
6086 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
6087 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
6088 		 */
6089 		if (type_is_trusted(env, reg, field_name, btf_id)) {
6090 			flag |= PTR_TRUSTED;
6091 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
6092 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
6093 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
6094 			if (type_is_rcu(env, reg, field_name, btf_id)) {
6095 				/* ignore __rcu tag and mark it MEM_RCU */
6096 				flag |= MEM_RCU;
6097 			} else if (flag & MEM_RCU ||
6098 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6099 				/* __rcu tagged pointers can be NULL */
6100 				flag |= MEM_RCU | PTR_MAYBE_NULL;
6101 
6102 				/* We always trust them */
6103 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6104 				    flag & PTR_UNTRUSTED)
6105 					flag &= ~PTR_UNTRUSTED;
6106 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
6107 				/* keep as-is */
6108 			} else {
6109 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6110 				clear_trusted_flags(&flag);
6111 			}
6112 		} else {
6113 			/*
6114 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
6115 			 * aggressively mark as untrusted otherwise such
6116 			 * pointers will be plain PTR_TO_BTF_ID without flags
6117 			 * and will be allowed to be passed into helpers for
6118 			 * compat reasons.
6119 			 */
6120 			flag = PTR_UNTRUSTED;
6121 		}
6122 	} else {
6123 		/* Old compat. Deprecated */
6124 		clear_trusted_flags(&flag);
6125 	}
6126 
6127 	if (atype == BPF_READ && value_regno >= 0) {
6128 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
6129 		if (ret < 0)
6130 			return ret;
6131 	}
6132 
6133 	return 0;
6134 }
6135 
6136 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6137 				   struct bpf_reg_state *regs,
6138 				   int regno, int off, int size,
6139 				   enum bpf_access_type atype,
6140 				   int value_regno)
6141 {
6142 	struct bpf_reg_state *reg = regs + regno;
6143 	struct bpf_map *map = reg->map_ptr;
6144 	struct bpf_reg_state map_reg;
6145 	enum bpf_type_flag flag = 0;
6146 	const struct btf_type *t;
6147 	const char *tname;
6148 	u32 btf_id;
6149 	int ret;
6150 
6151 	if (!btf_vmlinux) {
6152 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6153 		return -ENOTSUPP;
6154 	}
6155 
6156 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
6157 		verbose(env, "map_ptr access not supported for map type %d\n",
6158 			map->map_type);
6159 		return -ENOTSUPP;
6160 	}
6161 
6162 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
6163 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
6164 
6165 	if (!env->allow_ptr_leaks) {
6166 		verbose(env,
6167 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6168 			tname);
6169 		return -EPERM;
6170 	}
6171 
6172 	if (off < 0) {
6173 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
6174 			regno, tname, off);
6175 		return -EACCES;
6176 	}
6177 
6178 	if (atype != BPF_READ) {
6179 		verbose(env, "only read from %s is supported\n", tname);
6180 		return -EACCES;
6181 	}
6182 
6183 	/* Simulate access to a PTR_TO_BTF_ID */
6184 	memset(&map_reg, 0, sizeof(map_reg));
6185 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
6186 			      btf_vmlinux, *map->ops->map_btf_id, 0);
6187 	if (ret < 0)
6188 		return ret;
6189 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
6190 	if (ret < 0)
6191 		return ret;
6192 
6193 	if (value_regno >= 0) {
6194 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
6195 		if (ret < 0)
6196 			return ret;
6197 	}
6198 
6199 	return 0;
6200 }
6201 
6202 /* Check that the stack access at the given offset is within bounds. The
6203  * maximum valid offset is -1.
6204  *
6205  * The minimum valid offset is -MAX_BPF_STACK for writes, and
6206  * -state->allocated_stack for reads.
6207  */
6208 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
6209                                           s64 off,
6210                                           struct bpf_func_state *state,
6211                                           enum bpf_access_type t)
6212 {
6213 	int min_valid_off;
6214 
6215 	if (t == BPF_WRITE || env->allow_uninit_stack)
6216 		min_valid_off = -MAX_BPF_STACK;
6217 	else
6218 		min_valid_off = -state->allocated_stack;
6219 
6220 	if (off < min_valid_off || off > -1)
6221 		return -EACCES;
6222 	return 0;
6223 }
6224 
6225 /* Check that the stack access at 'regno + off' falls within the maximum stack
6226  * bounds.
6227  *
6228  * 'off' includes `regno->offset`, but not its dynamic part (if any).
6229  */
6230 static int check_stack_access_within_bounds(
6231 		struct bpf_verifier_env *env,
6232 		int regno, int off, int access_size,
6233 		enum bpf_access_type type)
6234 {
6235 	struct bpf_reg_state *reg = reg_state(env, regno);
6236 	struct bpf_func_state *state = bpf_func(env, reg);
6237 	s64 min_off, max_off;
6238 	int err;
6239 	char *err_extra;
6240 
6241 	if (type == BPF_READ)
6242 		err_extra = " read from";
6243 	else
6244 		err_extra = " write to";
6245 
6246 	if (tnum_is_const(reg->var_off)) {
6247 		min_off = (s64)reg->var_off.value + off;
6248 		max_off = min_off + access_size;
6249 	} else {
6250 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
6251 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
6252 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
6253 				err_extra, regno);
6254 			return -EACCES;
6255 		}
6256 		min_off = reg->smin_value + off;
6257 		max_off = reg->smax_value + off + access_size;
6258 	}
6259 
6260 	err = check_stack_slot_within_bounds(env, min_off, state, type);
6261 	if (!err && max_off > 0)
6262 		err = -EINVAL; /* out of stack access into non-negative offsets */
6263 	if (!err && access_size < 0)
6264 		/* access_size should not be negative (or overflow an int); others checks
6265 		 * along the way should have prevented such an access.
6266 		 */
6267 		err = -EFAULT; /* invalid negative access size; integer overflow? */
6268 
6269 	if (err) {
6270 		if (tnum_is_const(reg->var_off)) {
6271 			verbose(env, "invalid%s stack R%d off=%lld size=%d\n",
6272 				err_extra, regno, min_off, access_size);
6273 		} else {
6274 			char tn_buf[48];
6275 
6276 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6277 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
6278 				err_extra, regno, tn_buf, off, access_size);
6279 		}
6280 		return err;
6281 	}
6282 
6283 	/* Note that there is no stack access with offset zero, so the needed stack
6284 	 * size is -min_off, not -min_off+1.
6285 	 */
6286 	return grow_stack_state(env, state, -min_off /* size */);
6287 }
6288 
6289 static bool get_func_retval_range(struct bpf_prog *prog,
6290 				  struct bpf_retval_range *range)
6291 {
6292 	if (prog->type == BPF_PROG_TYPE_LSM &&
6293 		prog->expected_attach_type == BPF_LSM_MAC &&
6294 		!bpf_lsm_get_retval_range(prog, range)) {
6295 		return true;
6296 	}
6297 	return false;
6298 }
6299 
6300 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6301 {
6302 	struct bpf_reg_state fake_reg;
6303 
6304 	if (!val)
6305 		return;
6306 
6307 	fake_reg.type = SCALAR_VALUE;
6308 	__mark_reg_known(&fake_reg, val);
6309 
6310 	scalar32_min_max_add(dst_reg, &fake_reg);
6311 	scalar_min_max_add(dst_reg, &fake_reg);
6312 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6313 
6314 	reg_bounds_sync(dst_reg);
6315 }
6316 
6317 /* check whether memory at (regno + off) is accessible for t = (read | write)
6318  * if t==write, value_regno is a register which value is stored into memory
6319  * if t==read, value_regno is a register which will receive the value from memory
6320  * if t==write && value_regno==-1, some unknown value is stored into memory
6321  * if t==read && value_regno==-1, don't care what we read from memory
6322  */
6323 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
6324 			    int off, int bpf_size, enum bpf_access_type t,
6325 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6326 {
6327 	struct bpf_reg_state *regs = cur_regs(env);
6328 	struct bpf_reg_state *reg = regs + regno;
6329 	int size, err = 0;
6330 
6331 	size = bpf_size_to_bytes(bpf_size);
6332 	if (size < 0)
6333 		return size;
6334 
6335 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6336 	if (err)
6337 		return err;
6338 
6339 	if (reg->type == PTR_TO_MAP_KEY) {
6340 		if (t == BPF_WRITE) {
6341 			verbose(env, "write to change key R%d not allowed\n", regno);
6342 			return -EACCES;
6343 		}
6344 
6345 		err = check_mem_region_access(env, regno, off, size,
6346 					      reg->map_ptr->key_size, false);
6347 		if (err)
6348 			return err;
6349 		if (value_regno >= 0)
6350 			mark_reg_unknown(env, regs, value_regno);
6351 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6352 		struct btf_field *kptr_field = NULL;
6353 
6354 		if (t == BPF_WRITE && value_regno >= 0 &&
6355 		    is_pointer_value(env, value_regno)) {
6356 			verbose(env, "R%d leaks addr into map\n", value_regno);
6357 			return -EACCES;
6358 		}
6359 		err = check_map_access_type(env, regno, off, size, t);
6360 		if (err)
6361 			return err;
6362 		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
6363 		if (err)
6364 			return err;
6365 		if (tnum_is_const(reg->var_off))
6366 			kptr_field = btf_record_find(reg->map_ptr->record,
6367 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6368 		if (kptr_field) {
6369 			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6370 		} else if (t == BPF_READ && value_regno >= 0) {
6371 			struct bpf_map *map = reg->map_ptr;
6372 
6373 			/*
6374 			 * If map is read-only, track its contents as scalars,
6375 			 * unless it is an insn array (see the special case below)
6376 			 */
6377 			if (tnum_is_const(reg->var_off) &&
6378 			    bpf_map_is_rdonly(map) &&
6379 			    map->ops->map_direct_value_addr &&
6380 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6381 				int map_off = off + reg->var_off.value;
6382 				u64 val = 0;
6383 
6384 				err = bpf_map_direct_read(map, map_off, size,
6385 							  &val, is_ldsx);
6386 				if (err)
6387 					return err;
6388 
6389 				regs[value_regno].type = SCALAR_VALUE;
6390 				__mark_reg_known(&regs[value_regno], val);
6391 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6392 				if (bpf_size != BPF_DW) {
6393 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6394 						     size);
6395 					return -EACCES;
6396 				}
6397 				copy_register_state(&regs[value_regno], reg);
6398 				add_scalar_to_reg(&regs[value_regno], off);
6399 				regs[value_regno].type = PTR_TO_INSN;
6400 			} else {
6401 				mark_reg_unknown(env, regs, value_regno);
6402 			}
6403 		}
6404 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6405 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6406 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6407 
6408 		if (type_may_be_null(reg->type)) {
6409 			verbose(env, "R%d invalid mem access '%s'\n", regno,
6410 				reg_type_str(env, reg->type));
6411 			return -EACCES;
6412 		}
6413 
6414 		if (t == BPF_WRITE && rdonly_mem) {
6415 			verbose(env, "R%d cannot write into %s\n",
6416 				regno, reg_type_str(env, reg->type));
6417 			return -EACCES;
6418 		}
6419 
6420 		if (t == BPF_WRITE && value_regno >= 0 &&
6421 		    is_pointer_value(env, value_regno)) {
6422 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6423 			return -EACCES;
6424 		}
6425 
6426 		/*
6427 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6428 		 * instructions, hence no need to check bounds in that case.
6429 		 */
6430 		if (!rdonly_untrusted)
6431 			err = check_mem_region_access(env, regno, off, size,
6432 						      reg->mem_size, false);
6433 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6434 			mark_reg_unknown(env, regs, value_regno);
6435 	} else if (reg->type == PTR_TO_CTX) {
6436 		struct bpf_insn_access_aux info = {
6437 			.reg_type = SCALAR_VALUE,
6438 			.is_ldsx = is_ldsx,
6439 			.log = &env->log,
6440 		};
6441 		struct bpf_retval_range range;
6442 
6443 		if (t == BPF_WRITE && value_regno >= 0 &&
6444 		    is_pointer_value(env, value_regno)) {
6445 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6446 			return -EACCES;
6447 		}
6448 
6449 		err = check_ctx_access(env, insn_idx, regno, off, size, t, &info);
6450 		if (!err && t == BPF_READ && value_regno >= 0) {
6451 			/* ctx access returns either a scalar, or a
6452 			 * PTR_TO_PACKET[_META,_END]. In the latter
6453 			 * case, we know the offset is zero.
6454 			 */
6455 			if (info.reg_type == SCALAR_VALUE) {
6456 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6457 					err = __mark_reg_s32_range(env, regs, value_regno,
6458 								   range.minval, range.maxval);
6459 					if (err)
6460 						return err;
6461 				} else {
6462 					mark_reg_unknown(env, regs, value_regno);
6463 				}
6464 			} else {
6465 				mark_reg_known_zero(env, regs,
6466 						    value_regno);
6467 				if (type_may_be_null(info.reg_type))
6468 					regs[value_regno].id = ++env->id_gen;
6469 				/* A load of ctx field could have different
6470 				 * actual load size with the one encoded in the
6471 				 * insn. When the dst is PTR, it is for sure not
6472 				 * a sub-register.
6473 				 */
6474 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6475 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6476 					regs[value_regno].btf = info.btf;
6477 					regs[value_regno].btf_id = info.btf_id;
6478 					regs[value_regno].ref_obj_id = info.ref_obj_id;
6479 				}
6480 			}
6481 			regs[value_regno].type = info.reg_type;
6482 		}
6483 
6484 	} else if (reg->type == PTR_TO_STACK) {
6485 		/* Basic bounds checks. */
6486 		err = check_stack_access_within_bounds(env, regno, off, size, t);
6487 		if (err)
6488 			return err;
6489 
6490 		if (t == BPF_READ)
6491 			err = check_stack_read(env, regno, off, size,
6492 					       value_regno);
6493 		else
6494 			err = check_stack_write(env, regno, off, size,
6495 						value_regno, insn_idx);
6496 	} else if (reg_is_pkt_pointer(reg)) {
6497 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6498 			verbose(env, "cannot write into packet\n");
6499 			return -EACCES;
6500 		}
6501 		if (t == BPF_WRITE && value_regno >= 0 &&
6502 		    is_pointer_value(env, value_regno)) {
6503 			verbose(env, "R%d leaks addr into packet\n",
6504 				value_regno);
6505 			return -EACCES;
6506 		}
6507 		err = check_packet_access(env, regno, off, size, false);
6508 		if (!err && t == BPF_READ && value_regno >= 0)
6509 			mark_reg_unknown(env, regs, value_regno);
6510 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6511 		if (t == BPF_WRITE && value_regno >= 0 &&
6512 		    is_pointer_value(env, value_regno)) {
6513 			verbose(env, "R%d leaks addr into flow keys\n",
6514 				value_regno);
6515 			return -EACCES;
6516 		}
6517 
6518 		err = check_flow_keys_access(env, off, size);
6519 		if (!err && t == BPF_READ && value_regno >= 0)
6520 			mark_reg_unknown(env, regs, value_regno);
6521 	} else if (type_is_sk_pointer(reg->type)) {
6522 		if (t == BPF_WRITE) {
6523 			verbose(env, "R%d cannot write into %s\n",
6524 				regno, reg_type_str(env, reg->type));
6525 			return -EACCES;
6526 		}
6527 		err = check_sock_access(env, insn_idx, regno, off, size, t);
6528 		if (!err && value_regno >= 0)
6529 			mark_reg_unknown(env, regs, value_regno);
6530 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6531 		err = check_tp_buffer_access(env, reg, regno, off, size);
6532 		if (!err && t == BPF_READ && value_regno >= 0)
6533 			mark_reg_unknown(env, regs, value_regno);
6534 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6535 		   !type_may_be_null(reg->type)) {
6536 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
6537 					      value_regno);
6538 	} else if (reg->type == CONST_PTR_TO_MAP) {
6539 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
6540 					      value_regno);
6541 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6542 		   !type_may_be_null(reg->type)) {
6543 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6544 		u32 *max_access;
6545 
6546 		if (rdonly_mem) {
6547 			if (t == BPF_WRITE) {
6548 				verbose(env, "R%d cannot write into %s\n",
6549 					regno, reg_type_str(env, reg->type));
6550 				return -EACCES;
6551 			}
6552 			max_access = &env->prog->aux->max_rdonly_access;
6553 		} else {
6554 			max_access = &env->prog->aux->max_rdwr_access;
6555 		}
6556 
6557 		err = check_buffer_access(env, reg, regno, off, size, false,
6558 					  max_access);
6559 
6560 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6561 			mark_reg_unknown(env, regs, value_regno);
6562 	} else if (reg->type == PTR_TO_ARENA) {
6563 		if (t == BPF_READ && value_regno >= 0)
6564 			mark_reg_unknown(env, regs, value_regno);
6565 	} else {
6566 		verbose(env, "R%d invalid mem access '%s'\n", regno,
6567 			reg_type_str(env, reg->type));
6568 		return -EACCES;
6569 	}
6570 
6571 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6572 	    regs[value_regno].type == SCALAR_VALUE) {
6573 		if (!is_ldsx)
6574 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6575 			coerce_reg_to_size(&regs[value_regno], size);
6576 		else
6577 			coerce_reg_to_size_sx(&regs[value_regno], size);
6578 	}
6579 	return err;
6580 }
6581 
6582 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6583 			     bool allow_trust_mismatch);
6584 
6585 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6586 			  bool strict_alignment_once, bool is_ldsx,
6587 			  bool allow_trust_mismatch, const char *ctx)
6588 {
6589 	struct bpf_reg_state *regs = cur_regs(env);
6590 	enum bpf_reg_type src_reg_type;
6591 	int err;
6592 
6593 	/* check src operand */
6594 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6595 	if (err)
6596 		return err;
6597 
6598 	/* check dst operand */
6599 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6600 	if (err)
6601 		return err;
6602 
6603 	src_reg_type = regs[insn->src_reg].type;
6604 
6605 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6606 	 * updated by this call.
6607 	 */
6608 	err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off,
6609 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6610 			       strict_alignment_once, is_ldsx);
6611 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6612 				       allow_trust_mismatch);
6613 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6614 
6615 	return err;
6616 }
6617 
6618 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6619 			   bool strict_alignment_once)
6620 {
6621 	struct bpf_reg_state *regs = cur_regs(env);
6622 	enum bpf_reg_type dst_reg_type;
6623 	int err;
6624 
6625 	/* check src1 operand */
6626 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6627 	if (err)
6628 		return err;
6629 
6630 	/* check src2 operand */
6631 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6632 	if (err)
6633 		return err;
6634 
6635 	dst_reg_type = regs[insn->dst_reg].type;
6636 
6637 	/* Check if (dst_reg + off) is writeable. */
6638 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6639 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6640 			       strict_alignment_once, false);
6641 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6642 
6643 	return err;
6644 }
6645 
6646 static int check_atomic_rmw(struct bpf_verifier_env *env,
6647 			    struct bpf_insn *insn)
6648 {
6649 	int load_reg;
6650 	int err;
6651 
6652 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6653 		verbose(env, "invalid atomic operand size\n");
6654 		return -EINVAL;
6655 	}
6656 
6657 	/* check src1 operand */
6658 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6659 	if (err)
6660 		return err;
6661 
6662 	/* check src2 operand */
6663 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6664 	if (err)
6665 		return err;
6666 
6667 	if (insn->imm == BPF_CMPXCHG) {
6668 		/* Check comparison of R0 with memory location */
6669 		const u32 aux_reg = BPF_REG_0;
6670 
6671 		err = check_reg_arg(env, aux_reg, SRC_OP);
6672 		if (err)
6673 			return err;
6674 
6675 		if (is_pointer_value(env, aux_reg)) {
6676 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6677 			return -EACCES;
6678 		}
6679 	}
6680 
6681 	if (is_pointer_value(env, insn->src_reg)) {
6682 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6683 		return -EACCES;
6684 	}
6685 
6686 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6687 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6688 			insn->dst_reg,
6689 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6690 		return -EACCES;
6691 	}
6692 
6693 	if (insn->imm & BPF_FETCH) {
6694 		if (insn->imm == BPF_CMPXCHG)
6695 			load_reg = BPF_REG_0;
6696 		else
6697 			load_reg = insn->src_reg;
6698 
6699 		/* check and record load of old value */
6700 		err = check_reg_arg(env, load_reg, DST_OP);
6701 		if (err)
6702 			return err;
6703 	} else {
6704 		/* This instruction accesses a memory location but doesn't
6705 		 * actually load it into a register.
6706 		 */
6707 		load_reg = -1;
6708 	}
6709 
6710 	/* Check whether we can read the memory, with second call for fetch
6711 	 * case to simulate the register fill.
6712 	 */
6713 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6714 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6715 	if (!err && load_reg >= 0)
6716 		err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6717 				       insn->off, BPF_SIZE(insn->code),
6718 				       BPF_READ, load_reg, true, false);
6719 	if (err)
6720 		return err;
6721 
6722 	if (is_arena_reg(env, insn->dst_reg)) {
6723 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6724 		if (err)
6725 			return err;
6726 	}
6727 	/* Check whether we can write into the same memory. */
6728 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6729 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6730 	if (err)
6731 		return err;
6732 	return 0;
6733 }
6734 
6735 static int check_atomic_load(struct bpf_verifier_env *env,
6736 			     struct bpf_insn *insn)
6737 {
6738 	int err;
6739 
6740 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6741 	if (err)
6742 		return err;
6743 
6744 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6745 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6746 			insn->src_reg,
6747 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6748 		return -EACCES;
6749 	}
6750 
6751 	return 0;
6752 }
6753 
6754 static int check_atomic_store(struct bpf_verifier_env *env,
6755 			      struct bpf_insn *insn)
6756 {
6757 	int err;
6758 
6759 	err = check_store_reg(env, insn, true);
6760 	if (err)
6761 		return err;
6762 
6763 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6764 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6765 			insn->dst_reg,
6766 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6767 		return -EACCES;
6768 	}
6769 
6770 	return 0;
6771 }
6772 
6773 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6774 {
6775 	switch (insn->imm) {
6776 	case BPF_ADD:
6777 	case BPF_ADD | BPF_FETCH:
6778 	case BPF_AND:
6779 	case BPF_AND | BPF_FETCH:
6780 	case BPF_OR:
6781 	case BPF_OR | BPF_FETCH:
6782 	case BPF_XOR:
6783 	case BPF_XOR | BPF_FETCH:
6784 	case BPF_XCHG:
6785 	case BPF_CMPXCHG:
6786 		return check_atomic_rmw(env, insn);
6787 	case BPF_LOAD_ACQ:
6788 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6789 			verbose(env,
6790 				"64-bit load-acquires are only supported on 64-bit arches\n");
6791 			return -EOPNOTSUPP;
6792 		}
6793 		return check_atomic_load(env, insn);
6794 	case BPF_STORE_REL:
6795 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6796 			verbose(env,
6797 				"64-bit store-releases are only supported on 64-bit arches\n");
6798 			return -EOPNOTSUPP;
6799 		}
6800 		return check_atomic_store(env, insn);
6801 	default:
6802 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6803 			insn->imm);
6804 		return -EINVAL;
6805 	}
6806 }
6807 
6808 /* When register 'regno' is used to read the stack (either directly or through
6809  * a helper function) make sure that it's within stack boundary and, depending
6810  * on the access type and privileges, that all elements of the stack are
6811  * initialized.
6812  *
6813  * All registers that have been spilled on the stack in the slots within the
6814  * read offsets are marked as read.
6815  */
6816 static int check_stack_range_initialized(
6817 		struct bpf_verifier_env *env, int regno, int off,
6818 		int access_size, bool zero_size_allowed,
6819 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6820 {
6821 	struct bpf_reg_state *reg = reg_state(env, regno);
6822 	struct bpf_func_state *state = bpf_func(env, reg);
6823 	int err, min_off, max_off, i, j, slot, spi;
6824 	/* Some accesses can write anything into the stack, others are
6825 	 * read-only.
6826 	 */
6827 	bool clobber = type == BPF_WRITE;
6828 	/*
6829 	 * Negative access_size signals global subprog/kfunc arg check where
6830 	 * STACK_POISON slots are acceptable. static stack liveness
6831 	 * might have determined that subprog doesn't read them,
6832 	 * but BTF based global subprog validation isn't accurate enough.
6833 	 */
6834 	bool allow_poison = access_size < 0 || clobber;
6835 
6836 	access_size = abs(access_size);
6837 
6838 	if (access_size == 0 && !zero_size_allowed) {
6839 		verbose(env, "invalid zero-sized read\n");
6840 		return -EACCES;
6841 	}
6842 
6843 	err = check_stack_access_within_bounds(env, regno, off, access_size, type);
6844 	if (err)
6845 		return err;
6846 
6847 
6848 	if (tnum_is_const(reg->var_off)) {
6849 		min_off = max_off = reg->var_off.value + off;
6850 	} else {
6851 		/* Variable offset is prohibited for unprivileged mode for
6852 		 * simplicity since it requires corresponding support in
6853 		 * Spectre masking for stack ALU.
6854 		 * See also retrieve_ptr_limit().
6855 		 */
6856 		if (!env->bypass_spec_v1) {
6857 			char tn_buf[48];
6858 
6859 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6860 			verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
6861 				regno, tn_buf);
6862 			return -EACCES;
6863 		}
6864 		/* Only initialized buffer on stack is allowed to be accessed
6865 		 * with variable offset. With uninitialized buffer it's hard to
6866 		 * guarantee that whole memory is marked as initialized on
6867 		 * helper return since specific bounds are unknown what may
6868 		 * cause uninitialized stack leaking.
6869 		 */
6870 		if (meta && meta->raw_mode)
6871 			meta = NULL;
6872 
6873 		min_off = reg->smin_value + off;
6874 		max_off = reg->smax_value + off;
6875 	}
6876 
6877 	if (meta && meta->raw_mode) {
6878 		/* Ensure we won't be overwriting dynptrs when simulating byte
6879 		 * by byte access in check_helper_call using meta.access_size.
6880 		 * This would be a problem if we have a helper in the future
6881 		 * which takes:
6882 		 *
6883 		 *	helper(uninit_mem, len, dynptr)
6884 		 *
6885 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6886 		 * may end up writing to dynptr itself when touching memory from
6887 		 * arg 1. This can be relaxed on a case by case basis for known
6888 		 * safe cases, but reject due to the possibilitiy of aliasing by
6889 		 * default.
6890 		 */
6891 		for (i = min_off; i < max_off + access_size; i++) {
6892 			int stack_off = -i - 1;
6893 
6894 			spi = bpf_get_spi(i);
6895 			/* raw_mode may write past allocated_stack */
6896 			if (state->allocated_stack <= stack_off)
6897 				continue;
6898 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6899 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6900 				return -EACCES;
6901 			}
6902 		}
6903 		meta->access_size = access_size;
6904 		meta->regno = regno;
6905 		return 0;
6906 	}
6907 
6908 	for (i = min_off; i < max_off + access_size; i++) {
6909 		u8 *stype;
6910 
6911 		slot = -i - 1;
6912 		spi = slot / BPF_REG_SIZE;
6913 		if (state->allocated_stack <= slot) {
6914 			verbose(env, "allocated_stack too small\n");
6915 			return -EFAULT;
6916 		}
6917 
6918 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6919 		if (*stype == STACK_MISC)
6920 			goto mark;
6921 		if ((*stype == STACK_ZERO) ||
6922 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6923 			if (clobber) {
6924 				/* helper can write anything into the stack */
6925 				*stype = STACK_MISC;
6926 			}
6927 			goto mark;
6928 		}
6929 
6930 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6931 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6932 		     env->allow_ptr_leaks)) {
6933 			if (clobber) {
6934 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6935 				for (j = 0; j < BPF_REG_SIZE; j++)
6936 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6937 			}
6938 			goto mark;
6939 		}
6940 
6941 		if (*stype == STACK_POISON) {
6942 			if (allow_poison)
6943 				goto mark;
6944 			verbose(env, "reading from stack R%d off %d+%d size %d, slot poisoned by dead code elimination\n",
6945 				regno, min_off, i - min_off, access_size);
6946 		} else if (tnum_is_const(reg->var_off)) {
6947 			verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
6948 				regno, min_off, i - min_off, access_size);
6949 		} else {
6950 			char tn_buf[48];
6951 
6952 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6953 			verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
6954 				regno, tn_buf, i - min_off, access_size);
6955 		}
6956 		return -EACCES;
6957 mark:
6958 		;
6959 	}
6960 	return 0;
6961 }
6962 
6963 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
6964 				   int access_size, enum bpf_access_type access_type,
6965 				   bool zero_size_allowed,
6966 				   struct bpf_call_arg_meta *meta)
6967 {
6968 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6969 	u32 *max_access;
6970 
6971 	switch (base_type(reg->type)) {
6972 	case PTR_TO_PACKET:
6973 	case PTR_TO_PACKET_META:
6974 		return check_packet_access(env, regno, 0, access_size,
6975 					   zero_size_allowed);
6976 	case PTR_TO_MAP_KEY:
6977 		if (access_type == BPF_WRITE) {
6978 			verbose(env, "R%d cannot write into %s\n", regno,
6979 				reg_type_str(env, reg->type));
6980 			return -EACCES;
6981 		}
6982 		return check_mem_region_access(env, regno, 0, access_size,
6983 					       reg->map_ptr->key_size, false);
6984 	case PTR_TO_MAP_VALUE:
6985 		if (check_map_access_type(env, regno, 0, access_size, access_type))
6986 			return -EACCES;
6987 		return check_map_access(env, regno, 0, access_size,
6988 					zero_size_allowed, ACCESS_HELPER);
6989 	case PTR_TO_MEM:
6990 		if (type_is_rdonly_mem(reg->type)) {
6991 			if (access_type == BPF_WRITE) {
6992 				verbose(env, "R%d cannot write into %s\n", regno,
6993 					reg_type_str(env, reg->type));
6994 				return -EACCES;
6995 			}
6996 		}
6997 		return check_mem_region_access(env, regno, 0,
6998 					       access_size, reg->mem_size,
6999 					       zero_size_allowed);
7000 	case PTR_TO_BUF:
7001 		if (type_is_rdonly_mem(reg->type)) {
7002 			if (access_type == BPF_WRITE) {
7003 				verbose(env, "R%d cannot write into %s\n", regno,
7004 					reg_type_str(env, reg->type));
7005 				return -EACCES;
7006 			}
7007 
7008 			max_access = &env->prog->aux->max_rdonly_access;
7009 		} else {
7010 			max_access = &env->prog->aux->max_rdwr_access;
7011 		}
7012 		return check_buffer_access(env, reg, regno, 0,
7013 					   access_size, zero_size_allowed,
7014 					   max_access);
7015 	case PTR_TO_STACK:
7016 		return check_stack_range_initialized(
7017 				env,
7018 				regno, 0, access_size,
7019 				zero_size_allowed, access_type, meta);
7020 	case PTR_TO_BTF_ID:
7021 		return check_ptr_to_btf_access(env, regs, regno, 0,
7022 					       access_size, BPF_READ, -1);
7023 	case PTR_TO_CTX:
7024 		/* Only permit reading or writing syscall context using helper calls. */
7025 		if (is_var_ctx_off_allowed(env->prog)) {
7026 			int err = check_mem_region_access(env, regno, 0, access_size, U16_MAX,
7027 							  zero_size_allowed);
7028 			if (err)
7029 				return err;
7030 			if (env->prog->aux->max_ctx_offset < reg->umax_value + access_size)
7031 				env->prog->aux->max_ctx_offset = reg->umax_value + access_size;
7032 			return 0;
7033 		}
7034 		fallthrough;
7035 	default: /* scalar_value or invalid ptr */
7036 		/* Allow zero-byte read from NULL, regardless of pointer type */
7037 		if (zero_size_allowed && access_size == 0 &&
7038 		    bpf_register_is_null(reg))
7039 			return 0;
7040 
7041 		verbose(env, "R%d type=%s ", regno,
7042 			reg_type_str(env, reg->type));
7043 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7044 		return -EACCES;
7045 	}
7046 }
7047 
7048 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
7049  * size.
7050  *
7051  * @regno is the register containing the access size. regno-1 is the register
7052  * containing the pointer.
7053  */
7054 static int check_mem_size_reg(struct bpf_verifier_env *env,
7055 			      struct bpf_reg_state *reg, u32 regno,
7056 			      enum bpf_access_type access_type,
7057 			      bool zero_size_allowed,
7058 			      struct bpf_call_arg_meta *meta)
7059 {
7060 	int err;
7061 
7062 	/* This is used to refine r0 return value bounds for helpers
7063 	 * that enforce this value as an upper bound on return values.
7064 	 * See do_refine_retval_range() for helpers that can refine
7065 	 * the return value. C type of helper is u32 so we pull register
7066 	 * bound from umax_value however, if negative verifier errors
7067 	 * out. Only upper bounds can be learned because retval is an
7068 	 * int type and negative retvals are allowed.
7069 	 */
7070 	meta->msize_max_value = reg->umax_value;
7071 
7072 	/* The register is SCALAR_VALUE; the access check happens using
7073 	 * its boundaries. For unprivileged variable accesses, disable
7074 	 * raw mode so that the program is required to initialize all
7075 	 * the memory that the helper could just partially fill up.
7076 	 */
7077 	if (!tnum_is_const(reg->var_off))
7078 		meta = NULL;
7079 
7080 	if (reg->smin_value < 0) {
7081 		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7082 			regno);
7083 		return -EACCES;
7084 	}
7085 
7086 	if (reg->umin_value == 0 && !zero_size_allowed) {
7087 		verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
7088 			regno, reg->umin_value, reg->umax_value);
7089 		return -EACCES;
7090 	}
7091 
7092 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7093 		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7094 			regno);
7095 		return -EACCES;
7096 	}
7097 	err = check_helper_mem_access(env, regno - 1, reg->umax_value,
7098 				      access_type, zero_size_allowed, meta);
7099 	if (!err)
7100 		err = mark_chain_precision(env, regno);
7101 	return err;
7102 }
7103 
7104 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7105 			 u32 regno, u32 mem_size)
7106 {
7107 	bool may_be_null = type_may_be_null(reg->type);
7108 	struct bpf_reg_state saved_reg;
7109 	int err;
7110 
7111 	if (bpf_register_is_null(reg))
7112 		return 0;
7113 
7114 	/* Assuming that the register contains a value check if the memory
7115 	 * access is safe. Temporarily save and restore the register's state as
7116 	 * the conversion shouldn't be visible to a caller.
7117 	 */
7118 	if (may_be_null) {
7119 		saved_reg = *reg;
7120 		mark_ptr_not_null_reg(reg);
7121 	}
7122 
7123 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
7124 
7125 	err = check_helper_mem_access(env, regno, size, BPF_READ, true, NULL);
7126 	err = err ?: check_helper_mem_access(env, regno, size, BPF_WRITE, true, NULL);
7127 
7128 	if (may_be_null)
7129 		*reg = saved_reg;
7130 
7131 	return err;
7132 }
7133 
7134 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7135 				    u32 regno)
7136 {
7137 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7138 	bool may_be_null = type_may_be_null(mem_reg->type);
7139 	struct bpf_reg_state saved_reg;
7140 	struct bpf_call_arg_meta meta;
7141 	int err;
7142 
7143 	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7144 
7145 	memset(&meta, 0, sizeof(meta));
7146 
7147 	if (may_be_null) {
7148 		saved_reg = *mem_reg;
7149 		mark_ptr_not_null_reg(mem_reg);
7150 	}
7151 
7152 	err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
7153 	err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
7154 
7155 	if (may_be_null)
7156 		*mem_reg = saved_reg;
7157 
7158 	return err;
7159 }
7160 
7161 enum {
7162 	PROCESS_SPIN_LOCK = (1 << 0),
7163 	PROCESS_RES_LOCK  = (1 << 1),
7164 	PROCESS_LOCK_IRQ  = (1 << 2),
7165 };
7166 
7167 /* Implementation details:
7168  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7169  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7170  * Two bpf_map_lookups (even with the same key) will have different reg->id.
7171  * Two separate bpf_obj_new will also have different reg->id.
7172  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7173  * clears reg->id after value_or_null->value transition, since the verifier only
7174  * cares about the range of access to valid map value pointer and doesn't care
7175  * about actual address of the map element.
7176  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7177  * reg->id > 0 after value_or_null->value transition. By doing so
7178  * two bpf_map_lookups will be considered two different pointers that
7179  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7180  * returned from bpf_obj_new.
7181  * The verifier allows taking only one bpf_spin_lock at a time to avoid
7182  * dead-locks.
7183  * Since only one bpf_spin_lock is allowed the checks are simpler than
7184  * reg_is_refcounted() logic. The verifier needs to remember only
7185  * one spin_lock instead of array of acquired_refs.
7186  * env->cur_state->active_locks remembers which map value element or allocated
7187  * object got locked and clears it after bpf_spin_unlock.
7188  */
7189 static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
7190 {
7191 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
7192 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
7193 	struct bpf_reg_state *reg = reg_state(env, regno);
7194 	struct bpf_verifier_state *cur = env->cur_state;
7195 	bool is_const = tnum_is_const(reg->var_off);
7196 	bool is_irq = flags & PROCESS_LOCK_IRQ;
7197 	u64 val = reg->var_off.value;
7198 	struct bpf_map *map = NULL;
7199 	struct btf *btf = NULL;
7200 	struct btf_record *rec;
7201 	u32 spin_lock_off;
7202 	int err;
7203 
7204 	if (!is_const) {
7205 		verbose(env,
7206 			"R%d doesn't have constant offset. %s_lock has to be at the constant offset\n",
7207 			regno, lock_str);
7208 		return -EINVAL;
7209 	}
7210 	if (reg->type == PTR_TO_MAP_VALUE) {
7211 		map = reg->map_ptr;
7212 		if (!map->btf) {
7213 			verbose(env,
7214 				"map '%s' has to have BTF in order to use %s_lock\n",
7215 				map->name, lock_str);
7216 			return -EINVAL;
7217 		}
7218 	} else {
7219 		btf = reg->btf;
7220 	}
7221 
7222 	rec = reg_btf_record(reg);
7223 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
7224 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
7225 			map ? map->name : "kptr", lock_str);
7226 		return -EINVAL;
7227 	}
7228 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
7229 	if (spin_lock_off != val) {
7230 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
7231 			val, lock_str, spin_lock_off);
7232 		return -EINVAL;
7233 	}
7234 	if (is_lock) {
7235 		void *ptr;
7236 		int type;
7237 
7238 		if (map)
7239 			ptr = map;
7240 		else
7241 			ptr = btf;
7242 
7243 		if (!is_res_lock && cur->active_locks) {
7244 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
7245 				verbose(env,
7246 					"Locking two bpf_spin_locks are not allowed\n");
7247 				return -EINVAL;
7248 			}
7249 		} else if (is_res_lock && cur->active_locks) {
7250 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7251 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7252 				return -EINVAL;
7253 			}
7254 		}
7255 
7256 		if (is_res_lock && is_irq)
7257 			type = REF_TYPE_RES_LOCK_IRQ;
7258 		else if (is_res_lock)
7259 			type = REF_TYPE_RES_LOCK;
7260 		else
7261 			type = REF_TYPE_LOCK;
7262 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7263 		if (err < 0) {
7264 			verbose(env, "Failed to acquire lock state\n");
7265 			return err;
7266 		}
7267 	} else {
7268 		void *ptr;
7269 		int type;
7270 
7271 		if (map)
7272 			ptr = map;
7273 		else
7274 			ptr = btf;
7275 
7276 		if (!cur->active_locks) {
7277 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7278 			return -EINVAL;
7279 		}
7280 
7281 		if (is_res_lock && is_irq)
7282 			type = REF_TYPE_RES_LOCK_IRQ;
7283 		else if (is_res_lock)
7284 			type = REF_TYPE_RES_LOCK;
7285 		else
7286 			type = REF_TYPE_LOCK;
7287 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7288 			verbose(env, "%s_unlock of different lock\n", lock_str);
7289 			return -EINVAL;
7290 		}
7291 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7292 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7293 			return -EINVAL;
7294 		}
7295 		if (release_lock_state(cur, type, reg->id, ptr)) {
7296 			verbose(env, "%s_unlock of different lock\n", lock_str);
7297 			return -EINVAL;
7298 		}
7299 
7300 		invalidate_non_owning_refs(env);
7301 	}
7302 	return 0;
7303 }
7304 
7305 /* Check if @regno is a pointer to a specific field in a map value */
7306 static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
7307 				   enum btf_field_type field_type,
7308 				   struct bpf_map_desc *map_desc)
7309 {
7310 	struct bpf_reg_state *reg = reg_state(env, regno);
7311 	bool is_const = tnum_is_const(reg->var_off);
7312 	struct bpf_map *map = reg->map_ptr;
7313 	u64 val = reg->var_off.value;
7314 	const char *struct_name = btf_field_type_name(field_type);
7315 	int field_off = -1;
7316 
7317 	if (!is_const) {
7318 		verbose(env,
7319 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
7320 			regno, struct_name);
7321 		return -EINVAL;
7322 	}
7323 	if (!map->btf) {
7324 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7325 			struct_name);
7326 		return -EINVAL;
7327 	}
7328 	if (!btf_record_has_field(map->record, field_type)) {
7329 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7330 		return -EINVAL;
7331 	}
7332 	switch (field_type) {
7333 	case BPF_TIMER:
7334 		field_off = map->record->timer_off;
7335 		break;
7336 	case BPF_TASK_WORK:
7337 		field_off = map->record->task_work_off;
7338 		break;
7339 	case BPF_WORKQUEUE:
7340 		field_off = map->record->wq_off;
7341 		break;
7342 	default:
7343 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7344 		return -EINVAL;
7345 	}
7346 	if (field_off != val) {
7347 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7348 			val, struct_name, field_off);
7349 		return -EINVAL;
7350 	}
7351 	if (map_desc->ptr) {
7352 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7353 		return -EFAULT;
7354 	}
7355 	map_desc->uid = reg->map_uid;
7356 	map_desc->ptr = map;
7357 	return 0;
7358 }
7359 
7360 static int process_timer_func(struct bpf_verifier_env *env, int regno,
7361 			      struct bpf_map_desc *map)
7362 {
7363 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7364 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7365 		return -EOPNOTSUPP;
7366 	}
7367 	return check_map_field_pointer(env, regno, BPF_TIMER, map);
7368 }
7369 
7370 static int process_timer_helper(struct bpf_verifier_env *env, int regno,
7371 				struct bpf_call_arg_meta *meta)
7372 {
7373 	return process_timer_func(env, regno, &meta->map);
7374 }
7375 
7376 static int process_timer_kfunc(struct bpf_verifier_env *env, int regno,
7377 			       struct bpf_kfunc_call_arg_meta *meta)
7378 {
7379 	return process_timer_func(env, regno, &meta->map);
7380 }
7381 
7382 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7383 			     struct bpf_call_arg_meta *meta)
7384 {
7385 	struct bpf_reg_state *reg = reg_state(env, regno);
7386 	struct btf_field *kptr_field;
7387 	struct bpf_map *map_ptr;
7388 	struct btf_record *rec;
7389 	u32 kptr_off;
7390 
7391 	if (type_is_ptr_alloc_obj(reg->type)) {
7392 		rec = reg_btf_record(reg);
7393 	} else { /* PTR_TO_MAP_VALUE */
7394 		map_ptr = reg->map_ptr;
7395 		if (!map_ptr->btf) {
7396 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7397 				map_ptr->name);
7398 			return -EINVAL;
7399 		}
7400 		rec = map_ptr->record;
7401 		meta->map.ptr = map_ptr;
7402 	}
7403 
7404 	if (!tnum_is_const(reg->var_off)) {
7405 		verbose(env,
7406 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7407 			regno);
7408 		return -EINVAL;
7409 	}
7410 
7411 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7412 		verbose(env, "R%d has no valid kptr\n", regno);
7413 		return -EINVAL;
7414 	}
7415 
7416 	kptr_off = reg->var_off.value;
7417 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7418 	if (!kptr_field) {
7419 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7420 		return -EACCES;
7421 	}
7422 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7423 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7424 		return -EACCES;
7425 	}
7426 	meta->kptr_field = kptr_field;
7427 	return 0;
7428 }
7429 
7430 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7431  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7432  *
7433  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7434  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7435  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7436  *
7437  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7438  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7439  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7440  * mutate the view of the dynptr and also possibly destroy it. In the latter
7441  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7442  * memory that dynptr points to.
7443  *
7444  * The verifier will keep track both levels of mutation (bpf_dynptr's in
7445  * reg->type and the memory's in reg->dynptr.type), but there is no support for
7446  * readonly dynptr view yet, hence only the first case is tracked and checked.
7447  *
7448  * This is consistent with how C applies the const modifier to a struct object,
7449  * where the pointer itself inside bpf_dynptr becomes const but not what it
7450  * points to.
7451  *
7452  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7453  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7454  */
7455 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
7456 			       enum bpf_arg_type arg_type, int clone_ref_obj_id)
7457 {
7458 	struct bpf_reg_state *reg = reg_state(env, regno);
7459 	int err;
7460 
7461 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7462 		verbose(env,
7463 			"arg#%d expected pointer to stack or const struct bpf_dynptr\n",
7464 			regno - 1);
7465 		return -EINVAL;
7466 	}
7467 
7468 	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7469 	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7470 	 */
7471 	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
7472 		verifier_bug(env, "misconfigured dynptr helper type flags");
7473 		return -EFAULT;
7474 	}
7475 
7476 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7477 	 *		 constructing a mutable bpf_dynptr object.
7478 	 *
7479 	 *		 Currently, this is only possible with PTR_TO_STACK
7480 	 *		 pointing to a region of at least 16 bytes which doesn't
7481 	 *		 contain an existing bpf_dynptr.
7482 	 *
7483 	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7484 	 *		 mutated or destroyed. However, the memory it points to
7485 	 *		 may be mutated.
7486 	 *
7487 	 *  None       - Points to a initialized dynptr that can be mutated and
7488 	 *		 destroyed, including mutation of the memory it points
7489 	 *		 to.
7490 	 */
7491 	if (arg_type & MEM_UNINIT) {
7492 		int i;
7493 
7494 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7495 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7496 			return -EINVAL;
7497 		}
7498 
7499 		/* we write BPF_DW bits (8 bytes) at a time */
7500 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7501 			err = check_mem_access(env, insn_idx, regno,
7502 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7503 			if (err)
7504 				return err;
7505 		}
7506 
7507 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7508 	} else /* MEM_RDONLY and None case from above */ {
7509 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7510 		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7511 			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7512 			return -EINVAL;
7513 		}
7514 
7515 		if (!is_dynptr_reg_valid_init(env, reg)) {
7516 			verbose(env,
7517 				"Expected an initialized dynptr as arg #%d\n",
7518 				regno - 1);
7519 			return -EINVAL;
7520 		}
7521 
7522 		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7523 		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
7524 			verbose(env,
7525 				"Expected a dynptr of type %s as arg #%d\n",
7526 				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
7527 			return -EINVAL;
7528 		}
7529 
7530 		err = mark_dynptr_read(env, reg);
7531 	}
7532 	return err;
7533 }
7534 
7535 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
7536 {
7537 	struct bpf_func_state *state = bpf_func(env, reg);
7538 
7539 	return state->stack[spi].spilled_ptr.ref_obj_id;
7540 }
7541 
7542 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7543 {
7544 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7545 }
7546 
7547 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7548 {
7549 	return meta->kfunc_flags & KF_ITER_NEW;
7550 }
7551 
7552 
7553 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7554 {
7555 	return meta->kfunc_flags & KF_ITER_DESTROY;
7556 }
7557 
7558 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7559 			      const struct btf_param *arg)
7560 {
7561 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7562 	 * kfunc is iter state pointer
7563 	 */
7564 	if (is_iter_kfunc(meta))
7565 		return arg_idx == 0;
7566 
7567 	/* iter passed as an argument to a generic kfunc */
7568 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7569 }
7570 
7571 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
7572 			    struct bpf_kfunc_call_arg_meta *meta)
7573 {
7574 	struct bpf_reg_state *reg = reg_state(env, regno);
7575 	const struct btf_type *t;
7576 	int spi, err, i, nr_slots, btf_id;
7577 
7578 	if (reg->type != PTR_TO_STACK) {
7579 		verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
7580 		return -EINVAL;
7581 	}
7582 
7583 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7584 	 * ensures struct convention, so we wouldn't need to do any BTF
7585 	 * validation here. But given iter state can be passed as a parameter
7586 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7587 	 * conservative here.
7588 	 */
7589 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
7590 	if (btf_id < 0) {
7591 		verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
7592 		return -EINVAL;
7593 	}
7594 	t = btf_type_by_id(meta->btf, btf_id);
7595 	nr_slots = t->size / BPF_REG_SIZE;
7596 
7597 	if (is_iter_new_kfunc(meta)) {
7598 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7599 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7600 			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
7601 				iter_type_str(meta->btf, btf_id), regno - 1);
7602 			return -EINVAL;
7603 		}
7604 
7605 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7606 			err = check_mem_access(env, insn_idx, regno,
7607 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7608 			if (err)
7609 				return err;
7610 		}
7611 
7612 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7613 		if (err)
7614 			return err;
7615 	} else {
7616 		/* iter_next() or iter_destroy(), as well as any kfunc
7617 		 * accepting iter argument, expect initialized iter state
7618 		 */
7619 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7620 		switch (err) {
7621 		case 0:
7622 			break;
7623 		case -EINVAL:
7624 			verbose(env, "expected an initialized iter_%s as arg #%d\n",
7625 				iter_type_str(meta->btf, btf_id), regno - 1);
7626 			return err;
7627 		case -EPROTO:
7628 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7629 			return err;
7630 		default:
7631 			return err;
7632 		}
7633 
7634 		spi = iter_get_spi(env, reg, nr_slots);
7635 		if (spi < 0)
7636 			return spi;
7637 
7638 		err = mark_iter_read(env, reg, spi, nr_slots);
7639 		if (err)
7640 			return err;
7641 
7642 		/* remember meta->iter info for process_iter_next_call() */
7643 		meta->iter.spi = spi;
7644 		meta->iter.frameno = reg->frameno;
7645 		meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7646 
7647 		if (is_iter_destroy_kfunc(meta)) {
7648 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7649 			if (err)
7650 				return err;
7651 		}
7652 	}
7653 
7654 	return 0;
7655 }
7656 
7657 /* Look for a previous loop entry at insn_idx: nearest parent state
7658  * stopped at insn_idx with callsites matching those in cur->frame.
7659  */
7660 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7661 						  struct bpf_verifier_state *cur,
7662 						  int insn_idx)
7663 {
7664 	struct bpf_verifier_state_list *sl;
7665 	struct bpf_verifier_state *st;
7666 	struct list_head *pos, *head;
7667 
7668 	/* Explored states are pushed in stack order, most recent states come first */
7669 	head = bpf_explored_state(env, insn_idx);
7670 	list_for_each(pos, head) {
7671 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7672 		/* If st->branches != 0 state is a part of current DFS verification path,
7673 		 * hence cur & st for a loop.
7674 		 */
7675 		st = &sl->state;
7676 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7677 		    st->dfs_depth < cur->dfs_depth)
7678 			return st;
7679 	}
7680 
7681 	return NULL;
7682 }
7683 
7684 /*
7685  * Check if scalar registers are exact for the purpose of not widening.
7686  * More lenient than regs_exact()
7687  */
7688 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7689 				    const struct bpf_reg_state *rcur)
7690 {
7691 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7692 }
7693 
7694 static void maybe_widen_reg(struct bpf_verifier_env *env,
7695 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7696 {
7697 	if (rold->type != SCALAR_VALUE)
7698 		return;
7699 	if (rold->type != rcur->type)
7700 		return;
7701 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7702 		return;
7703 	__mark_reg_unknown(env, rcur);
7704 }
7705 
7706 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7707 				   struct bpf_verifier_state *old,
7708 				   struct bpf_verifier_state *cur)
7709 {
7710 	struct bpf_func_state *fold, *fcur;
7711 	int i, fr, num_slots;
7712 
7713 	for (fr = old->curframe; fr >= 0; fr--) {
7714 		fold = old->frame[fr];
7715 		fcur = cur->frame[fr];
7716 
7717 		for (i = 0; i < MAX_BPF_REG; i++)
7718 			maybe_widen_reg(env,
7719 					&fold->regs[i],
7720 					&fcur->regs[i]);
7721 
7722 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7723 				fcur->allocated_stack / BPF_REG_SIZE);
7724 		for (i = 0; i < num_slots; i++) {
7725 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7726 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7727 				continue;
7728 
7729 			maybe_widen_reg(env,
7730 					&fold->stack[i].spilled_ptr,
7731 					&fcur->stack[i].spilled_ptr);
7732 		}
7733 	}
7734 	return 0;
7735 }
7736 
7737 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7738 						 struct bpf_kfunc_call_arg_meta *meta)
7739 {
7740 	int iter_frameno = meta->iter.frameno;
7741 	int iter_spi = meta->iter.spi;
7742 
7743 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7744 }
7745 
7746 /* process_iter_next_call() is called when verifier gets to iterator's next
7747  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7748  * to it as just "iter_next()" in comments below.
7749  *
7750  * BPF verifier relies on a crucial contract for any iter_next()
7751  * implementation: it should *eventually* return NULL, and once that happens
7752  * it should keep returning NULL. That is, once iterator exhausts elements to
7753  * iterate, it should never reset or spuriously return new elements.
7754  *
7755  * With the assumption of such contract, process_iter_next_call() simulates
7756  * a fork in the verifier state to validate loop logic correctness and safety
7757  * without having to simulate infinite amount of iterations.
7758  *
7759  * In current state, we first assume that iter_next() returned NULL and
7760  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7761  * conditions we should not form an infinite loop and should eventually reach
7762  * exit.
7763  *
7764  * Besides that, we also fork current state and enqueue it for later
7765  * verification. In a forked state we keep iterator state as ACTIVE
7766  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7767  * also bump iteration depth to prevent erroneous infinite loop detection
7768  * later on (see iter_active_depths_differ() comment for details). In this
7769  * state we assume that we'll eventually loop back to another iter_next()
7770  * calls (it could be in exactly same location or in some other instruction,
7771  * it doesn't matter, we don't make any unnecessary assumptions about this,
7772  * everything revolves around iterator state in a stack slot, not which
7773  * instruction is calling iter_next()). When that happens, we either will come
7774  * to iter_next() with equivalent state and can conclude that next iteration
7775  * will proceed in exactly the same way as we just verified, so it's safe to
7776  * assume that loop converges. If not, we'll go on another iteration
7777  * simulation with a different input state, until all possible starting states
7778  * are validated or we reach maximum number of instructions limit.
7779  *
7780  * This way, we will either exhaustively discover all possible input states
7781  * that iterator loop can start with and eventually will converge, or we'll
7782  * effectively regress into bounded loop simulation logic and either reach
7783  * maximum number of instructions if loop is not provably convergent, or there
7784  * is some statically known limit on number of iterations (e.g., if there is
7785  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7786  *
7787  * Iteration convergence logic in is_state_visited() relies on exact
7788  * states comparison, which ignores read and precision marks.
7789  * This is necessary because read and precision marks are not finalized
7790  * while in the loop. Exact comparison might preclude convergence for
7791  * simple programs like below:
7792  *
7793  *     i = 0;
7794  *     while(iter_next(&it))
7795  *       i++;
7796  *
7797  * At each iteration step i++ would produce a new distinct state and
7798  * eventually instruction processing limit would be reached.
7799  *
7800  * To avoid such behavior speculatively forget (widen) range for
7801  * imprecise scalar registers, if those registers were not precise at the
7802  * end of the previous iteration and do not match exactly.
7803  *
7804  * This is a conservative heuristic that allows to verify wide range of programs,
7805  * however it precludes verification of programs that conjure an
7806  * imprecise value on the first loop iteration and use it as precise on a second.
7807  * For example, the following safe program would fail to verify:
7808  *
7809  *     struct bpf_num_iter it;
7810  *     int arr[10];
7811  *     int i = 0, a = 0;
7812  *     bpf_iter_num_new(&it, 0, 10);
7813  *     while (bpf_iter_num_next(&it)) {
7814  *       if (a == 0) {
7815  *         a = 1;
7816  *         i = 7; // Because i changed verifier would forget
7817  *                // it's range on second loop entry.
7818  *       } else {
7819  *         arr[i] = 42; // This would fail to verify.
7820  *       }
7821  *     }
7822  *     bpf_iter_num_destroy(&it);
7823  */
7824 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7825 				  struct bpf_kfunc_call_arg_meta *meta)
7826 {
7827 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7828 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7829 	struct bpf_reg_state *cur_iter, *queued_iter;
7830 
7831 	BTF_TYPE_EMIT(struct bpf_iter);
7832 
7833 	cur_iter = get_iter_from_state(cur_st, meta);
7834 
7835 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7836 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7837 		verifier_bug(env, "unexpected iterator state %d (%s)",
7838 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7839 		return -EFAULT;
7840 	}
7841 
7842 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7843 		/* Because iter_next() call is a checkpoint is_state_visitied()
7844 		 * should guarantee parent state with same call sites and insn_idx.
7845 		 */
7846 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7847 		    !same_callsites(cur_st->parent, cur_st)) {
7848 			verifier_bug(env, "bad parent state for iter next call");
7849 			return -EFAULT;
7850 		}
7851 		/* Note cur_st->parent in the call below, it is necessary to skip
7852 		 * checkpoint created for cur_st by is_state_visited()
7853 		 * right at this instruction.
7854 		 */
7855 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7856 		/* branch out active iter state */
7857 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7858 		if (IS_ERR(queued_st))
7859 			return PTR_ERR(queued_st);
7860 
7861 		queued_iter = get_iter_from_state(queued_st, meta);
7862 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7863 		queued_iter->iter.depth++;
7864 		if (prev_st)
7865 			widen_imprecise_scalars(env, prev_st, queued_st);
7866 
7867 		queued_fr = queued_st->frame[queued_st->curframe];
7868 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7869 	}
7870 
7871 	/* switch to DRAINED state, but keep the depth unchanged */
7872 	/* mark current iter state as drained and assume returned NULL */
7873 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7874 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7875 
7876 	return 0;
7877 }
7878 
7879 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7880 {
7881 	return type == ARG_CONST_SIZE ||
7882 	       type == ARG_CONST_SIZE_OR_ZERO;
7883 }
7884 
7885 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7886 {
7887 	return base_type(type) == ARG_PTR_TO_MEM &&
7888 	       type & MEM_UNINIT;
7889 }
7890 
7891 static bool arg_type_is_release(enum bpf_arg_type type)
7892 {
7893 	return type & OBJ_RELEASE;
7894 }
7895 
7896 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7897 {
7898 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7899 }
7900 
7901 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7902 				 const struct bpf_call_arg_meta *meta,
7903 				 enum bpf_arg_type *arg_type)
7904 {
7905 	if (!meta->map.ptr) {
7906 		/* kernel subsystem misconfigured verifier */
7907 		verifier_bug(env, "invalid map_ptr to access map->type");
7908 		return -EFAULT;
7909 	}
7910 
7911 	switch (meta->map.ptr->map_type) {
7912 	case BPF_MAP_TYPE_SOCKMAP:
7913 	case BPF_MAP_TYPE_SOCKHASH:
7914 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7915 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7916 		} else {
7917 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7918 			return -EINVAL;
7919 		}
7920 		break;
7921 	case BPF_MAP_TYPE_BLOOM_FILTER:
7922 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7923 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7924 		break;
7925 	default:
7926 		break;
7927 	}
7928 	return 0;
7929 }
7930 
7931 struct bpf_reg_types {
7932 	const enum bpf_reg_type types[10];
7933 	u32 *btf_id;
7934 };
7935 
7936 static const struct bpf_reg_types sock_types = {
7937 	.types = {
7938 		PTR_TO_SOCK_COMMON,
7939 		PTR_TO_SOCKET,
7940 		PTR_TO_TCP_SOCK,
7941 		PTR_TO_XDP_SOCK,
7942 	},
7943 };
7944 
7945 #ifdef CONFIG_NET
7946 static const struct bpf_reg_types btf_id_sock_common_types = {
7947 	.types = {
7948 		PTR_TO_SOCK_COMMON,
7949 		PTR_TO_SOCKET,
7950 		PTR_TO_TCP_SOCK,
7951 		PTR_TO_XDP_SOCK,
7952 		PTR_TO_BTF_ID,
7953 		PTR_TO_BTF_ID | PTR_TRUSTED,
7954 	},
7955 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7956 };
7957 #endif
7958 
7959 static const struct bpf_reg_types mem_types = {
7960 	.types = {
7961 		PTR_TO_STACK,
7962 		PTR_TO_PACKET,
7963 		PTR_TO_PACKET_META,
7964 		PTR_TO_MAP_KEY,
7965 		PTR_TO_MAP_VALUE,
7966 		PTR_TO_MEM,
7967 		PTR_TO_MEM | MEM_RINGBUF,
7968 		PTR_TO_BUF,
7969 		PTR_TO_BTF_ID | PTR_TRUSTED,
7970 		PTR_TO_CTX,
7971 	},
7972 };
7973 
7974 static const struct bpf_reg_types spin_lock_types = {
7975 	.types = {
7976 		PTR_TO_MAP_VALUE,
7977 		PTR_TO_BTF_ID | MEM_ALLOC,
7978 	}
7979 };
7980 
7981 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7982 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7983 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7984 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7985 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7986 static const struct bpf_reg_types btf_ptr_types = {
7987 	.types = {
7988 		PTR_TO_BTF_ID,
7989 		PTR_TO_BTF_ID | PTR_TRUSTED,
7990 		PTR_TO_BTF_ID | MEM_RCU,
7991 	},
7992 };
7993 static const struct bpf_reg_types percpu_btf_ptr_types = {
7994 	.types = {
7995 		PTR_TO_BTF_ID | MEM_PERCPU,
7996 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7997 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7998 	}
7999 };
8000 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8001 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8002 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8003 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8004 static const struct bpf_reg_types kptr_xchg_dest_types = {
8005 	.types = {
8006 		PTR_TO_MAP_VALUE,
8007 		PTR_TO_BTF_ID | MEM_ALLOC,
8008 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
8009 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
8010 	}
8011 };
8012 static const struct bpf_reg_types dynptr_types = {
8013 	.types = {
8014 		PTR_TO_STACK,
8015 		CONST_PTR_TO_DYNPTR,
8016 	}
8017 };
8018 
8019 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8020 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
8021 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
8022 	[ARG_CONST_SIZE]		= &scalar_types,
8023 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
8024 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
8025 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
8026 	[ARG_PTR_TO_CTX]		= &context_types,
8027 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
8028 #ifdef CONFIG_NET
8029 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
8030 #endif
8031 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
8032 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
8033 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
8034 	[ARG_PTR_TO_MEM]		= &mem_types,
8035 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
8036 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
8037 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
8038 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
8039 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
8040 	[ARG_PTR_TO_TIMER]		= &timer_types,
8041 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
8042 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
8043 };
8044 
8045 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8046 			  enum bpf_arg_type arg_type,
8047 			  const u32 *arg_btf_id,
8048 			  struct bpf_call_arg_meta *meta)
8049 {
8050 	struct bpf_reg_state *reg = reg_state(env, regno);
8051 	enum bpf_reg_type expected, type = reg->type;
8052 	const struct bpf_reg_types *compatible;
8053 	int i, j, err;
8054 
8055 	compatible = compatible_reg_types[base_type(arg_type)];
8056 	if (!compatible) {
8057 		verifier_bug(env, "unsupported arg type %d", arg_type);
8058 		return -EFAULT;
8059 	}
8060 
8061 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8062 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8063 	 *
8064 	 * Same for MAYBE_NULL:
8065 	 *
8066 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8067 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8068 	 *
8069 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8070 	 *
8071 	 * Therefore we fold these flags depending on the arg_type before comparison.
8072 	 */
8073 	if (arg_type & MEM_RDONLY)
8074 		type &= ~MEM_RDONLY;
8075 	if (arg_type & PTR_MAYBE_NULL)
8076 		type &= ~PTR_MAYBE_NULL;
8077 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
8078 		type &= ~DYNPTR_TYPE_FLAG_MASK;
8079 
8080 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
8081 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
8082 		type &= ~MEM_ALLOC;
8083 		type &= ~MEM_PERCPU;
8084 	}
8085 
8086 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
8087 		expected = compatible->types[i];
8088 		if (expected == NOT_INIT)
8089 			break;
8090 
8091 		if (type == expected)
8092 			goto found;
8093 	}
8094 
8095 	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
8096 	for (j = 0; j + 1 < i; j++)
8097 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
8098 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
8099 	return -EACCES;
8100 
8101 found:
8102 	if (base_type(reg->type) != PTR_TO_BTF_ID)
8103 		return 0;
8104 
8105 	if (compatible == &mem_types) {
8106 		if (!(arg_type & MEM_RDONLY)) {
8107 			verbose(env,
8108 				"%s() may write into memory pointed by R%d type=%s\n",
8109 				func_id_name(meta->func_id),
8110 				regno, reg_type_str(env, reg->type));
8111 			return -EACCES;
8112 		}
8113 		return 0;
8114 	}
8115 
8116 	switch ((int)reg->type) {
8117 	case PTR_TO_BTF_ID:
8118 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8119 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
8120 	case PTR_TO_BTF_ID | MEM_RCU:
8121 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
8122 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
8123 	{
8124 		/* For bpf_sk_release, it needs to match against first member
8125 		 * 'struct sock_common', hence make an exception for it. This
8126 		 * allows bpf_sk_release to work for multiple socket types.
8127 		 */
8128 		bool strict_type_match = arg_type_is_release(arg_type) &&
8129 					 meta->func_id != BPF_FUNC_sk_release;
8130 
8131 		if (type_may_be_null(reg->type) &&
8132 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
8133 			verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
8134 			return -EACCES;
8135 		}
8136 
8137 		if (!arg_btf_id) {
8138 			if (!compatible->btf_id) {
8139 				verifier_bug(env, "missing arg compatible BTF ID");
8140 				return -EFAULT;
8141 			}
8142 			arg_btf_id = compatible->btf_id;
8143 		}
8144 
8145 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
8146 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8147 				return -EACCES;
8148 		} else {
8149 			if (arg_btf_id == BPF_PTR_POISON) {
8150 				verbose(env, "verifier internal error:");
8151 				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
8152 					regno);
8153 				return -EACCES;
8154 			}
8155 
8156 			err = __check_ptr_off_reg(env, reg, regno, true);
8157 			if (err)
8158 				return err;
8159 
8160 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
8161 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
8162 						  strict_type_match)) {
8163 				verbose(env, "R%d is of type %s but %s is expected\n",
8164 					regno, btf_type_name(reg->btf, reg->btf_id),
8165 					btf_type_name(btf_vmlinux, *arg_btf_id));
8166 				return -EACCES;
8167 			}
8168 		}
8169 		break;
8170 	}
8171 	case PTR_TO_BTF_ID | MEM_ALLOC:
8172 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
8173 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8174 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8175 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8176 		    meta->func_id != BPF_FUNC_kptr_xchg) {
8177 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
8178 			return -EFAULT;
8179 		}
8180 		/* Check if local kptr in src arg matches kptr in dst arg */
8181 		if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
8182 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8183 				return -EACCES;
8184 		}
8185 		break;
8186 	case PTR_TO_BTF_ID | MEM_PERCPU:
8187 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
8188 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
8189 		/* Handled by helper specific checks */
8190 		break;
8191 	default:
8192 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
8193 		return -EFAULT;
8194 	}
8195 	return 0;
8196 }
8197 
8198 static struct btf_field *
8199 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8200 {
8201 	struct btf_field *field;
8202 	struct btf_record *rec;
8203 
8204 	rec = reg_btf_record(reg);
8205 	if (!rec)
8206 		return NULL;
8207 
8208 	field = btf_record_find(rec, off, fields);
8209 	if (!field)
8210 		return NULL;
8211 
8212 	return field;
8213 }
8214 
8215 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8216 				  const struct bpf_reg_state *reg, int regno,
8217 				  enum bpf_arg_type arg_type)
8218 {
8219 	u32 type = reg->type;
8220 
8221 	/* When referenced register is passed to release function, its fixed
8222 	 * offset must be 0.
8223 	 *
8224 	 * We will check arg_type_is_release reg has ref_obj_id when storing
8225 	 * meta->release_regno.
8226 	 */
8227 	if (arg_type_is_release(arg_type)) {
8228 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8229 		 * may not directly point to the object being released, but to
8230 		 * dynptr pointing to such object, which might be at some offset
8231 		 * on the stack. In that case, we simply to fallback to the
8232 		 * default handling.
8233 		 */
8234 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8235 			return 0;
8236 
8237 		/* Doing check_ptr_off_reg check for the offset will catch this
8238 		 * because fixed_off_ok is false, but checking here allows us
8239 		 * to give the user a better error message.
8240 		 */
8241 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8242 			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8243 				regno);
8244 			return -EINVAL;
8245 		}
8246 	}
8247 
8248 	switch (type) {
8249 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8250 	case PTR_TO_STACK:
8251 	case PTR_TO_PACKET:
8252 	case PTR_TO_PACKET_META:
8253 	case PTR_TO_MAP_KEY:
8254 	case PTR_TO_MAP_VALUE:
8255 	case PTR_TO_MEM:
8256 	case PTR_TO_MEM | MEM_RDONLY:
8257 	case PTR_TO_MEM | MEM_RINGBUF:
8258 	case PTR_TO_BUF:
8259 	case PTR_TO_BUF | MEM_RDONLY:
8260 	case PTR_TO_ARENA:
8261 	case SCALAR_VALUE:
8262 		return 0;
8263 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8264 	 * fixed offset.
8265 	 */
8266 	case PTR_TO_BTF_ID:
8267 	case PTR_TO_BTF_ID | MEM_ALLOC:
8268 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8269 	case PTR_TO_BTF_ID | MEM_RCU:
8270 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8271 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8272 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8273 		 * its fixed offset must be 0. In the other cases, fixed offset
8274 		 * can be non-zero. This was already checked above. So pass
8275 		 * fixed_off_ok as true to allow fixed offset for all other
8276 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8277 		 * still need to do checks instead of returning.
8278 		 */
8279 		return __check_ptr_off_reg(env, reg, regno, true);
8280 	case PTR_TO_CTX:
8281 		/*
8282 		 * Allow fixed and variable offsets for syscall context, but
8283 		 * only when the argument is passed as memory, not ctx,
8284 		 * otherwise we may get modified ctx in tail called programs and
8285 		 * global subprogs (that may act as extension prog hooks).
8286 		 */
8287 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8288 			return 0;
8289 		fallthrough;
8290 	default:
8291 		return __check_ptr_off_reg(env, reg, regno, false);
8292 	}
8293 }
8294 
8295 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
8296 						const struct bpf_func_proto *fn,
8297 						struct bpf_reg_state *regs)
8298 {
8299 	struct bpf_reg_state *state = NULL;
8300 	int i;
8301 
8302 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
8303 		if (arg_type_is_dynptr(fn->arg_type[i])) {
8304 			if (state) {
8305 				verbose(env, "verifier internal error: multiple dynptr args\n");
8306 				return NULL;
8307 			}
8308 			state = &regs[BPF_REG_1 + i];
8309 		}
8310 
8311 	if (!state)
8312 		verbose(env, "verifier internal error: no dynptr arg found\n");
8313 
8314 	return state;
8315 }
8316 
8317 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8318 {
8319 	struct bpf_func_state *state = bpf_func(env, reg);
8320 	int spi;
8321 
8322 	if (reg->type == CONST_PTR_TO_DYNPTR)
8323 		return reg->id;
8324 	spi = dynptr_get_spi(env, reg);
8325 	if (spi < 0)
8326 		return spi;
8327 	return state->stack[spi].spilled_ptr.id;
8328 }
8329 
8330 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8331 {
8332 	struct bpf_func_state *state = bpf_func(env, reg);
8333 	int spi;
8334 
8335 	if (reg->type == CONST_PTR_TO_DYNPTR)
8336 		return reg->ref_obj_id;
8337 	spi = dynptr_get_spi(env, reg);
8338 	if (spi < 0)
8339 		return spi;
8340 	return state->stack[spi].spilled_ptr.ref_obj_id;
8341 }
8342 
8343 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8344 					    struct bpf_reg_state *reg)
8345 {
8346 	struct bpf_func_state *state = bpf_func(env, reg);
8347 	int spi;
8348 
8349 	if (reg->type == CONST_PTR_TO_DYNPTR)
8350 		return reg->dynptr.type;
8351 
8352 	spi = bpf_get_spi(reg->var_off.value);
8353 	if (spi < 0) {
8354 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
8355 		return BPF_DYNPTR_TYPE_INVALID;
8356 	}
8357 
8358 	return state->stack[spi].spilled_ptr.dynptr.type;
8359 }
8360 
8361 static int check_reg_const_str(struct bpf_verifier_env *env,
8362 			       struct bpf_reg_state *reg, u32 regno)
8363 {
8364 	struct bpf_map *map = reg->map_ptr;
8365 	int err;
8366 	int map_off;
8367 	u64 map_addr;
8368 	char *str_ptr;
8369 
8370 	if (reg->type != PTR_TO_MAP_VALUE)
8371 		return -EINVAL;
8372 
8373 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8374 		verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno);
8375 		return -EACCES;
8376 	}
8377 
8378 	if (!bpf_map_is_rdonly(map)) {
8379 		verbose(env, "R%d does not point to a readonly map'\n", regno);
8380 		return -EACCES;
8381 	}
8382 
8383 	if (!tnum_is_const(reg->var_off)) {
8384 		verbose(env, "R%d is not a constant address'\n", regno);
8385 		return -EACCES;
8386 	}
8387 
8388 	if (!map->ops->map_direct_value_addr) {
8389 		verbose(env, "no direct value access support for this map type\n");
8390 		return -EACCES;
8391 	}
8392 
8393 	err = check_map_access(env, regno, 0,
8394 			       map->value_size - reg->var_off.value, false,
8395 			       ACCESS_HELPER);
8396 	if (err)
8397 		return err;
8398 
8399 	map_off = reg->var_off.value;
8400 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8401 	if (err) {
8402 		verbose(env, "direct value access on string failed\n");
8403 		return err;
8404 	}
8405 
8406 	str_ptr = (char *)(long)(map_addr);
8407 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8408 		verbose(env, "string is not zero-terminated\n");
8409 		return -EINVAL;
8410 	}
8411 	return 0;
8412 }
8413 
8414 /* Returns constant key value in `value` if possible, else negative error */
8415 static int get_constant_map_key(struct bpf_verifier_env *env,
8416 				struct bpf_reg_state *key,
8417 				u32 key_size,
8418 				s64 *value)
8419 {
8420 	struct bpf_func_state *state = bpf_func(env, key);
8421 	struct bpf_reg_state *reg;
8422 	int slot, spi, off;
8423 	int spill_size = 0;
8424 	int zero_size = 0;
8425 	int stack_off;
8426 	int i, err;
8427 	u8 *stype;
8428 
8429 	if (!env->bpf_capable)
8430 		return -EOPNOTSUPP;
8431 	if (key->type != PTR_TO_STACK)
8432 		return -EOPNOTSUPP;
8433 	if (!tnum_is_const(key->var_off))
8434 		return -EOPNOTSUPP;
8435 
8436 	stack_off = key->var_off.value;
8437 	slot = -stack_off - 1;
8438 	spi = slot / BPF_REG_SIZE;
8439 	off = slot % BPF_REG_SIZE;
8440 	stype = state->stack[spi].slot_type;
8441 
8442 	/* First handle precisely tracked STACK_ZERO */
8443 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8444 		zero_size++;
8445 	if (zero_size >= key_size) {
8446 		*value = 0;
8447 		return 0;
8448 	}
8449 
8450 	/* Check that stack contains a scalar spill of expected size */
8451 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8452 		return -EOPNOTSUPP;
8453 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8454 		spill_size++;
8455 	if (spill_size != key_size)
8456 		return -EOPNOTSUPP;
8457 
8458 	reg = &state->stack[spi].spilled_ptr;
8459 	if (!tnum_is_const(reg->var_off))
8460 		/* Stack value not statically known */
8461 		return -EOPNOTSUPP;
8462 
8463 	/* We are relying on a constant value. So mark as precise
8464 	 * to prevent pruning on it.
8465 	 */
8466 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8467 	err = mark_chain_precision_batch(env, env->cur_state);
8468 	if (err < 0)
8469 		return err;
8470 
8471 	*value = reg->var_off.value;
8472 	return 0;
8473 }
8474 
8475 static bool can_elide_value_nullness(enum bpf_map_type type);
8476 
8477 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8478 			  struct bpf_call_arg_meta *meta,
8479 			  const struct bpf_func_proto *fn,
8480 			  int insn_idx)
8481 {
8482 	u32 regno = BPF_REG_1 + arg;
8483 	struct bpf_reg_state *reg = reg_state(env, regno);
8484 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8485 	enum bpf_reg_type type = reg->type;
8486 	u32 *arg_btf_id = NULL;
8487 	u32 key_size;
8488 	int err = 0;
8489 
8490 	if (arg_type == ARG_DONTCARE)
8491 		return 0;
8492 
8493 	err = check_reg_arg(env, regno, SRC_OP);
8494 	if (err)
8495 		return err;
8496 
8497 	if (arg_type == ARG_ANYTHING) {
8498 		if (is_pointer_value(env, regno)) {
8499 			verbose(env, "R%d leaks addr into helper function\n",
8500 				regno);
8501 			return -EACCES;
8502 		}
8503 		return 0;
8504 	}
8505 
8506 	if (type_is_pkt_pointer(type) &&
8507 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8508 		verbose(env, "helper access to the packet is not allowed\n");
8509 		return -EACCES;
8510 	}
8511 
8512 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8513 		err = resolve_map_arg_type(env, meta, &arg_type);
8514 		if (err)
8515 			return err;
8516 	}
8517 
8518 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8519 		/* A NULL register has a SCALAR_VALUE type, so skip
8520 		 * type checking.
8521 		 */
8522 		goto skip_type_check;
8523 
8524 	/* arg_btf_id and arg_size are in a union. */
8525 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8526 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8527 		arg_btf_id = fn->arg_btf_id[arg];
8528 
8529 	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8530 	if (err)
8531 		return err;
8532 
8533 	err = check_func_arg_reg_off(env, reg, regno, arg_type);
8534 	if (err)
8535 		return err;
8536 
8537 skip_type_check:
8538 	if (arg_type_is_release(arg_type)) {
8539 		if (arg_type_is_dynptr(arg_type)) {
8540 			struct bpf_func_state *state = bpf_func(env, reg);
8541 			int spi;
8542 
8543 			/* Only dynptr created on stack can be released, thus
8544 			 * the get_spi and stack state checks for spilled_ptr
8545 			 * should only be done before process_dynptr_func for
8546 			 * PTR_TO_STACK.
8547 			 */
8548 			if (reg->type == PTR_TO_STACK) {
8549 				spi = dynptr_get_spi(env, reg);
8550 				if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
8551 					verbose(env, "arg %d is an unacquired reference\n", regno);
8552 					return -EINVAL;
8553 				}
8554 			} else {
8555 				verbose(env, "cannot release unowned const bpf_dynptr\n");
8556 				return -EINVAL;
8557 			}
8558 		} else if (!reg->ref_obj_id && !bpf_register_is_null(reg)) {
8559 			verbose(env, "R%d must be referenced when passed to release function\n",
8560 				regno);
8561 			return -EINVAL;
8562 		}
8563 		if (meta->release_regno) {
8564 			verifier_bug(env, "more than one release argument");
8565 			return -EFAULT;
8566 		}
8567 		meta->release_regno = regno;
8568 	}
8569 
8570 	if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
8571 		if (meta->ref_obj_id) {
8572 			verbose(env, "more than one arg with ref_obj_id R%d %u %u",
8573 				regno, reg->ref_obj_id,
8574 				meta->ref_obj_id);
8575 			return -EACCES;
8576 		}
8577 		meta->ref_obj_id = reg->ref_obj_id;
8578 	}
8579 
8580 	switch (base_type(arg_type)) {
8581 	case ARG_CONST_MAP_PTR:
8582 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8583 		if (meta->map.ptr) {
8584 			/* Use map_uid (which is unique id of inner map) to reject:
8585 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8586 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8587 			 * if (inner_map1 && inner_map2) {
8588 			 *     timer = bpf_map_lookup_elem(inner_map1);
8589 			 *     if (timer)
8590 			 *         // mismatch would have been allowed
8591 			 *         bpf_timer_init(timer, inner_map2);
8592 			 * }
8593 			 *
8594 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8595 			 */
8596 			if (meta->map.ptr != reg->map_ptr ||
8597 			    meta->map.uid != reg->map_uid) {
8598 				verbose(env,
8599 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8600 					meta->map.uid, reg->map_uid);
8601 				return -EINVAL;
8602 			}
8603 		}
8604 		meta->map.ptr = reg->map_ptr;
8605 		meta->map.uid = reg->map_uid;
8606 		break;
8607 	case ARG_PTR_TO_MAP_KEY:
8608 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8609 		 * check that [key, key + map->key_size) are within
8610 		 * stack limits and initialized
8611 		 */
8612 		if (!meta->map.ptr) {
8613 			/* in function declaration map_ptr must come before
8614 			 * map_key, so that it's verified and known before
8615 			 * we have to check map_key here. Otherwise it means
8616 			 * that kernel subsystem misconfigured verifier
8617 			 */
8618 			verifier_bug(env, "invalid map_ptr to access map->key");
8619 			return -EFAULT;
8620 		}
8621 		key_size = meta->map.ptr->key_size;
8622 		err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
8623 		if (err)
8624 			return err;
8625 		if (can_elide_value_nullness(meta->map.ptr->map_type)) {
8626 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8627 			if (err < 0) {
8628 				meta->const_map_key = -1;
8629 				if (err == -EOPNOTSUPP)
8630 					err = 0;
8631 				else
8632 					return err;
8633 			}
8634 		}
8635 		break;
8636 	case ARG_PTR_TO_MAP_VALUE:
8637 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8638 			return 0;
8639 
8640 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8641 		 * check [value, value + map->value_size) validity
8642 		 */
8643 		if (!meta->map.ptr) {
8644 			/* kernel subsystem misconfigured verifier */
8645 			verifier_bug(env, "invalid map_ptr to access map->value");
8646 			return -EFAULT;
8647 		}
8648 		meta->raw_mode = arg_type & MEM_UNINIT;
8649 		err = check_helper_mem_access(env, regno, meta->map.ptr->value_size,
8650 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8651 					      false, meta);
8652 		break;
8653 	case ARG_PTR_TO_PERCPU_BTF_ID:
8654 		if (!reg->btf_id) {
8655 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8656 			return -EACCES;
8657 		}
8658 		meta->ret_btf = reg->btf;
8659 		meta->ret_btf_id = reg->btf_id;
8660 		break;
8661 	case ARG_PTR_TO_SPIN_LOCK:
8662 		if (in_rbtree_lock_required_cb(env)) {
8663 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8664 			return -EACCES;
8665 		}
8666 		if (meta->func_id == BPF_FUNC_spin_lock) {
8667 			err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK);
8668 			if (err)
8669 				return err;
8670 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8671 			err = process_spin_lock(env, regno, 0);
8672 			if (err)
8673 				return err;
8674 		} else {
8675 			verifier_bug(env, "spin lock arg on unexpected helper");
8676 			return -EFAULT;
8677 		}
8678 		break;
8679 	case ARG_PTR_TO_TIMER:
8680 		err = process_timer_helper(env, regno, meta);
8681 		if (err)
8682 			return err;
8683 		break;
8684 	case ARG_PTR_TO_FUNC:
8685 		meta->subprogno = reg->subprogno;
8686 		break;
8687 	case ARG_PTR_TO_MEM:
8688 		/* The access to this pointer is only checked when we hit the
8689 		 * next is_mem_size argument below.
8690 		 */
8691 		meta->raw_mode = arg_type & MEM_UNINIT;
8692 		if (arg_type & MEM_FIXED_SIZE) {
8693 			err = check_helper_mem_access(env, regno, fn->arg_size[arg],
8694 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8695 						      false, meta);
8696 			if (err)
8697 				return err;
8698 			if (arg_type & MEM_ALIGNED)
8699 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8700 		}
8701 		break;
8702 	case ARG_CONST_SIZE:
8703 		err = check_mem_size_reg(env, reg, regno,
8704 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8705 					 BPF_WRITE : BPF_READ,
8706 					 false, meta);
8707 		break;
8708 	case ARG_CONST_SIZE_OR_ZERO:
8709 		err = check_mem_size_reg(env, reg, regno,
8710 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8711 					 BPF_WRITE : BPF_READ,
8712 					 true, meta);
8713 		break;
8714 	case ARG_PTR_TO_DYNPTR:
8715 		err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
8716 		if (err)
8717 			return err;
8718 		break;
8719 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8720 		if (!tnum_is_const(reg->var_off)) {
8721 			verbose(env, "R%d is not a known constant'\n",
8722 				regno);
8723 			return -EACCES;
8724 		}
8725 		meta->mem_size = reg->var_off.value;
8726 		err = mark_chain_precision(env, regno);
8727 		if (err)
8728 			return err;
8729 		break;
8730 	case ARG_PTR_TO_CONST_STR:
8731 	{
8732 		err = check_reg_const_str(env, reg, regno);
8733 		if (err)
8734 			return err;
8735 		break;
8736 	}
8737 	case ARG_KPTR_XCHG_DEST:
8738 		err = process_kptr_func(env, regno, meta);
8739 		if (err)
8740 			return err;
8741 		break;
8742 	}
8743 
8744 	return err;
8745 }
8746 
8747 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8748 {
8749 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8750 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8751 
8752 	if (func_id != BPF_FUNC_map_update_elem &&
8753 	    func_id != BPF_FUNC_map_delete_elem)
8754 		return false;
8755 
8756 	/* It's not possible to get access to a locked struct sock in these
8757 	 * contexts, so updating is safe.
8758 	 */
8759 	switch (type) {
8760 	case BPF_PROG_TYPE_TRACING:
8761 		if (eatype == BPF_TRACE_ITER)
8762 			return true;
8763 		break;
8764 	case BPF_PROG_TYPE_SOCK_OPS:
8765 		/* map_update allowed only via dedicated helpers with event type checks */
8766 		if (func_id == BPF_FUNC_map_delete_elem)
8767 			return true;
8768 		break;
8769 	case BPF_PROG_TYPE_SOCKET_FILTER:
8770 	case BPF_PROG_TYPE_SCHED_CLS:
8771 	case BPF_PROG_TYPE_SCHED_ACT:
8772 	case BPF_PROG_TYPE_XDP:
8773 	case BPF_PROG_TYPE_SK_REUSEPORT:
8774 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8775 	case BPF_PROG_TYPE_SK_LOOKUP:
8776 		return true;
8777 	default:
8778 		break;
8779 	}
8780 
8781 	verbose(env, "cannot update sockmap in this context\n");
8782 	return false;
8783 }
8784 
8785 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8786 {
8787 	return env->prog->jit_requested &&
8788 	       bpf_jit_supports_subprog_tailcalls();
8789 }
8790 
8791 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8792 					struct bpf_map *map, int func_id)
8793 {
8794 	if (!map)
8795 		return 0;
8796 
8797 	/* We need a two way check, first is from map perspective ... */
8798 	switch (map->map_type) {
8799 	case BPF_MAP_TYPE_PROG_ARRAY:
8800 		if (func_id != BPF_FUNC_tail_call)
8801 			goto error;
8802 		break;
8803 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8804 		if (func_id != BPF_FUNC_perf_event_read &&
8805 		    func_id != BPF_FUNC_perf_event_output &&
8806 		    func_id != BPF_FUNC_skb_output &&
8807 		    func_id != BPF_FUNC_perf_event_read_value &&
8808 		    func_id != BPF_FUNC_xdp_output)
8809 			goto error;
8810 		break;
8811 	case BPF_MAP_TYPE_RINGBUF:
8812 		if (func_id != BPF_FUNC_ringbuf_output &&
8813 		    func_id != BPF_FUNC_ringbuf_reserve &&
8814 		    func_id != BPF_FUNC_ringbuf_query &&
8815 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8816 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8817 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8818 			goto error;
8819 		break;
8820 	case BPF_MAP_TYPE_USER_RINGBUF:
8821 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8822 			goto error;
8823 		break;
8824 	case BPF_MAP_TYPE_STACK_TRACE:
8825 		if (func_id != BPF_FUNC_get_stackid)
8826 			goto error;
8827 		break;
8828 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8829 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8830 		    func_id != BPF_FUNC_current_task_under_cgroup)
8831 			goto error;
8832 		break;
8833 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8834 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8835 		if (func_id != BPF_FUNC_get_local_storage)
8836 			goto error;
8837 		break;
8838 	case BPF_MAP_TYPE_DEVMAP:
8839 	case BPF_MAP_TYPE_DEVMAP_HASH:
8840 		if (func_id != BPF_FUNC_redirect_map &&
8841 		    func_id != BPF_FUNC_map_lookup_elem)
8842 			goto error;
8843 		break;
8844 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8845 	 * appear.
8846 	 */
8847 	case BPF_MAP_TYPE_CPUMAP:
8848 		if (func_id != BPF_FUNC_redirect_map)
8849 			goto error;
8850 		break;
8851 	case BPF_MAP_TYPE_XSKMAP:
8852 		if (func_id != BPF_FUNC_redirect_map &&
8853 		    func_id != BPF_FUNC_map_lookup_elem)
8854 			goto error;
8855 		break;
8856 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8857 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8858 		if (func_id != BPF_FUNC_map_lookup_elem)
8859 			goto error;
8860 		break;
8861 	case BPF_MAP_TYPE_SOCKMAP:
8862 		if (func_id != BPF_FUNC_sk_redirect_map &&
8863 		    func_id != BPF_FUNC_sock_map_update &&
8864 		    func_id != BPF_FUNC_msg_redirect_map &&
8865 		    func_id != BPF_FUNC_sk_select_reuseport &&
8866 		    func_id != BPF_FUNC_map_lookup_elem &&
8867 		    !may_update_sockmap(env, func_id))
8868 			goto error;
8869 		break;
8870 	case BPF_MAP_TYPE_SOCKHASH:
8871 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8872 		    func_id != BPF_FUNC_sock_hash_update &&
8873 		    func_id != BPF_FUNC_msg_redirect_hash &&
8874 		    func_id != BPF_FUNC_sk_select_reuseport &&
8875 		    func_id != BPF_FUNC_map_lookup_elem &&
8876 		    !may_update_sockmap(env, func_id))
8877 			goto error;
8878 		break;
8879 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8880 		if (func_id != BPF_FUNC_sk_select_reuseport)
8881 			goto error;
8882 		break;
8883 	case BPF_MAP_TYPE_QUEUE:
8884 	case BPF_MAP_TYPE_STACK:
8885 		if (func_id != BPF_FUNC_map_peek_elem &&
8886 		    func_id != BPF_FUNC_map_pop_elem &&
8887 		    func_id != BPF_FUNC_map_push_elem)
8888 			goto error;
8889 		break;
8890 	case BPF_MAP_TYPE_SK_STORAGE:
8891 		if (func_id != BPF_FUNC_sk_storage_get &&
8892 		    func_id != BPF_FUNC_sk_storage_delete &&
8893 		    func_id != BPF_FUNC_kptr_xchg)
8894 			goto error;
8895 		break;
8896 	case BPF_MAP_TYPE_INODE_STORAGE:
8897 		if (func_id != BPF_FUNC_inode_storage_get &&
8898 		    func_id != BPF_FUNC_inode_storage_delete &&
8899 		    func_id != BPF_FUNC_kptr_xchg)
8900 			goto error;
8901 		break;
8902 	case BPF_MAP_TYPE_TASK_STORAGE:
8903 		if (func_id != BPF_FUNC_task_storage_get &&
8904 		    func_id != BPF_FUNC_task_storage_delete &&
8905 		    func_id != BPF_FUNC_kptr_xchg)
8906 			goto error;
8907 		break;
8908 	case BPF_MAP_TYPE_CGRP_STORAGE:
8909 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8910 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8911 		    func_id != BPF_FUNC_kptr_xchg)
8912 			goto error;
8913 		break;
8914 	case BPF_MAP_TYPE_BLOOM_FILTER:
8915 		if (func_id != BPF_FUNC_map_peek_elem &&
8916 		    func_id != BPF_FUNC_map_push_elem)
8917 			goto error;
8918 		break;
8919 	case BPF_MAP_TYPE_INSN_ARRAY:
8920 		goto error;
8921 	default:
8922 		break;
8923 	}
8924 
8925 	/* ... and second from the function itself. */
8926 	switch (func_id) {
8927 	case BPF_FUNC_tail_call:
8928 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8929 			goto error;
8930 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8931 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8932 			return -EINVAL;
8933 		}
8934 		break;
8935 	case BPF_FUNC_perf_event_read:
8936 	case BPF_FUNC_perf_event_output:
8937 	case BPF_FUNC_perf_event_read_value:
8938 	case BPF_FUNC_skb_output:
8939 	case BPF_FUNC_xdp_output:
8940 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8941 			goto error;
8942 		break;
8943 	case BPF_FUNC_ringbuf_output:
8944 	case BPF_FUNC_ringbuf_reserve:
8945 	case BPF_FUNC_ringbuf_query:
8946 	case BPF_FUNC_ringbuf_reserve_dynptr:
8947 	case BPF_FUNC_ringbuf_submit_dynptr:
8948 	case BPF_FUNC_ringbuf_discard_dynptr:
8949 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8950 			goto error;
8951 		break;
8952 	case BPF_FUNC_user_ringbuf_drain:
8953 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8954 			goto error;
8955 		break;
8956 	case BPF_FUNC_get_stackid:
8957 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8958 			goto error;
8959 		break;
8960 	case BPF_FUNC_current_task_under_cgroup:
8961 	case BPF_FUNC_skb_under_cgroup:
8962 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8963 			goto error;
8964 		break;
8965 	case BPF_FUNC_redirect_map:
8966 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8967 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8968 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8969 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8970 			goto error;
8971 		break;
8972 	case BPF_FUNC_sk_redirect_map:
8973 	case BPF_FUNC_msg_redirect_map:
8974 	case BPF_FUNC_sock_map_update:
8975 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8976 			goto error;
8977 		break;
8978 	case BPF_FUNC_sk_redirect_hash:
8979 	case BPF_FUNC_msg_redirect_hash:
8980 	case BPF_FUNC_sock_hash_update:
8981 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8982 			goto error;
8983 		break;
8984 	case BPF_FUNC_get_local_storage:
8985 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8986 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8987 			goto error;
8988 		break;
8989 	case BPF_FUNC_sk_select_reuseport:
8990 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8991 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8992 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8993 			goto error;
8994 		break;
8995 	case BPF_FUNC_map_pop_elem:
8996 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8997 		    map->map_type != BPF_MAP_TYPE_STACK)
8998 			goto error;
8999 		break;
9000 	case BPF_FUNC_map_peek_elem:
9001 	case BPF_FUNC_map_push_elem:
9002 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
9003 		    map->map_type != BPF_MAP_TYPE_STACK &&
9004 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
9005 			goto error;
9006 		break;
9007 	case BPF_FUNC_map_lookup_percpu_elem:
9008 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9009 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9010 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9011 			goto error;
9012 		break;
9013 	case BPF_FUNC_sk_storage_get:
9014 	case BPF_FUNC_sk_storage_delete:
9015 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9016 			goto error;
9017 		break;
9018 	case BPF_FUNC_inode_storage_get:
9019 	case BPF_FUNC_inode_storage_delete:
9020 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9021 			goto error;
9022 		break;
9023 	case BPF_FUNC_task_storage_get:
9024 	case BPF_FUNC_task_storage_delete:
9025 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9026 			goto error;
9027 		break;
9028 	case BPF_FUNC_cgrp_storage_get:
9029 	case BPF_FUNC_cgrp_storage_delete:
9030 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9031 			goto error;
9032 		break;
9033 	default:
9034 		break;
9035 	}
9036 
9037 	return 0;
9038 error:
9039 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
9040 		map->map_type, func_id_name(func_id), func_id);
9041 	return -EINVAL;
9042 }
9043 
9044 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9045 {
9046 	int count = 0;
9047 
9048 	if (arg_type_is_raw_mem(fn->arg1_type))
9049 		count++;
9050 	if (arg_type_is_raw_mem(fn->arg2_type))
9051 		count++;
9052 	if (arg_type_is_raw_mem(fn->arg3_type))
9053 		count++;
9054 	if (arg_type_is_raw_mem(fn->arg4_type))
9055 		count++;
9056 	if (arg_type_is_raw_mem(fn->arg5_type))
9057 		count++;
9058 
9059 	/* We only support one arg being in raw mode at the moment,
9060 	 * which is sufficient for the helper functions we have
9061 	 * right now.
9062 	 */
9063 	return count <= 1;
9064 }
9065 
9066 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
9067 {
9068 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9069 	bool has_size = fn->arg_size[arg] != 0;
9070 	bool is_next_size = false;
9071 
9072 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
9073 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
9074 
9075 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9076 		return is_next_size;
9077 
9078 	return has_size == is_next_size || is_next_size == is_fixed;
9079 }
9080 
9081 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9082 {
9083 	/* bpf_xxx(..., buf, len) call will access 'len'
9084 	 * bytes from memory 'buf'. Both arg types need
9085 	 * to be paired, so make sure there's no buggy
9086 	 * helper function specification.
9087 	 */
9088 	if (arg_type_is_mem_size(fn->arg1_type) ||
9089 	    check_args_pair_invalid(fn, 0) ||
9090 	    check_args_pair_invalid(fn, 1) ||
9091 	    check_args_pair_invalid(fn, 2) ||
9092 	    check_args_pair_invalid(fn, 3) ||
9093 	    check_args_pair_invalid(fn, 4))
9094 		return false;
9095 
9096 	return true;
9097 }
9098 
9099 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9100 {
9101 	int i;
9102 
9103 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9104 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9105 			return !!fn->arg_btf_id[i];
9106 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9107 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
9108 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9109 		    /* arg_btf_id and arg_size are in a union. */
9110 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
9111 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
9112 			return false;
9113 	}
9114 
9115 	return true;
9116 }
9117 
9118 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
9119 {
9120 	int i;
9121 
9122 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9123 		enum bpf_arg_type arg_type = fn->arg_type[i];
9124 
9125 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
9126 			continue;
9127 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
9128 			return false;
9129 	}
9130 
9131 	return true;
9132 }
9133 
9134 static int check_func_proto(const struct bpf_func_proto *fn)
9135 {
9136 	return check_raw_mode_ok(fn) &&
9137 	       check_arg_pair_ok(fn) &&
9138 	       check_mem_arg_rw_flag_ok(fn) &&
9139 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
9140 }
9141 
9142 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9143  * are now invalid, so turn them into unknown SCALAR_VALUE.
9144  *
9145  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9146  * since these slices point to packet data.
9147  */
9148 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9149 {
9150 	struct bpf_func_state *state;
9151 	struct bpf_reg_state *reg;
9152 
9153 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9154 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
9155 			mark_reg_invalid(env, reg);
9156 	}));
9157 }
9158 
9159 enum {
9160 	AT_PKT_END = -1,
9161 	BEYOND_PKT_END = -2,
9162 };
9163 
9164 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
9165 {
9166 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9167 	struct bpf_reg_state *reg = &state->regs[regn];
9168 
9169 	if (reg->type != PTR_TO_PACKET)
9170 		/* PTR_TO_PACKET_META is not supported yet */
9171 		return;
9172 
9173 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9174 	 * How far beyond pkt_end it goes is unknown.
9175 	 * if (!range_open) it's the case of pkt >= pkt_end
9176 	 * if (range_open) it's the case of pkt > pkt_end
9177 	 * hence this pointer is at least 1 byte bigger than pkt_end
9178 	 */
9179 	if (range_open)
9180 		reg->range = BEYOND_PKT_END;
9181 	else
9182 		reg->range = AT_PKT_END;
9183 }
9184 
9185 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id)
9186 {
9187 	int i;
9188 
9189 	for (i = 0; i < state->acquired_refs; i++) {
9190 		if (state->refs[i].type != REF_TYPE_PTR)
9191 			continue;
9192 		if (state->refs[i].id == ref_obj_id) {
9193 			release_reference_state(state, i);
9194 			return 0;
9195 		}
9196 	}
9197 	return -EINVAL;
9198 }
9199 
9200 /* The pointer with the specified id has released its reference to kernel
9201  * resources. Identify all copies of the same pointer and clear the reference.
9202  *
9203  * This is the release function corresponding to acquire_reference(). Idempotent.
9204  */
9205 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
9206 {
9207 	struct bpf_verifier_state *vstate = env->cur_state;
9208 	struct bpf_func_state *state;
9209 	struct bpf_reg_state *reg;
9210 	int err;
9211 
9212 	err = release_reference_nomark(vstate, ref_obj_id);
9213 	if (err)
9214 		return err;
9215 
9216 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
9217 		if (reg->ref_obj_id == ref_obj_id)
9218 			mark_reg_invalid(env, reg);
9219 	}));
9220 
9221 	return 0;
9222 }
9223 
9224 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9225 {
9226 	struct bpf_func_state *unused;
9227 	struct bpf_reg_state *reg;
9228 
9229 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9230 		if (type_is_non_owning_ref(reg->type))
9231 			mark_reg_invalid(env, reg);
9232 	}));
9233 }
9234 
9235 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9236 				    struct bpf_reg_state *regs)
9237 {
9238 	int i;
9239 
9240 	/* after the call registers r0 - r5 were scratched */
9241 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9242 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9243 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9244 	}
9245 }
9246 
9247 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9248 				   struct bpf_func_state *caller,
9249 				   struct bpf_func_state *callee,
9250 				   int insn_idx);
9251 
9252 static int set_callee_state(struct bpf_verifier_env *env,
9253 			    struct bpf_func_state *caller,
9254 			    struct bpf_func_state *callee, int insn_idx);
9255 
9256 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9257 			    set_callee_state_fn set_callee_state_cb,
9258 			    struct bpf_verifier_state *state)
9259 {
9260 	struct bpf_func_state *caller, *callee;
9261 	int err;
9262 
9263 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9264 		verbose(env, "the call stack of %d frames is too deep\n",
9265 			state->curframe + 2);
9266 		return -E2BIG;
9267 	}
9268 
9269 	if (state->frame[state->curframe + 1]) {
9270 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9271 		return -EFAULT;
9272 	}
9273 
9274 	caller = state->frame[state->curframe];
9275 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9276 	if (!callee)
9277 		return -ENOMEM;
9278 	state->frame[state->curframe + 1] = callee;
9279 
9280 	/* callee cannot access r0, r6 - r9 for reading and has to write
9281 	 * into its own stack before reading from it.
9282 	 * callee can read/write into caller's stack
9283 	 */
9284 	init_func_state(env, callee,
9285 			/* remember the callsite, it will be used by bpf_exit */
9286 			callsite,
9287 			state->curframe + 1 /* frameno within this callchain */,
9288 			subprog /* subprog number within this prog */);
9289 	err = set_callee_state_cb(env, caller, callee, callsite);
9290 	if (err)
9291 		goto err_out;
9292 
9293 	/* only increment it after check_reg_arg() finished */
9294 	state->curframe++;
9295 
9296 	return 0;
9297 
9298 err_out:
9299 	free_func_state(callee);
9300 	state->frame[state->curframe + 1] = NULL;
9301 	return err;
9302 }
9303 
9304 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9305 				    const struct btf *btf,
9306 				    struct bpf_reg_state *regs)
9307 {
9308 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9309 	struct bpf_verifier_log *log = &env->log;
9310 	u32 i;
9311 	int ret;
9312 
9313 	ret = btf_prepare_func_args(env, subprog);
9314 	if (ret)
9315 		return ret;
9316 
9317 	/* check that BTF function arguments match actual types that the
9318 	 * verifier sees.
9319 	 */
9320 	for (i = 0; i < sub->arg_cnt; i++) {
9321 		u32 regno = i + 1;
9322 		struct bpf_reg_state *reg = &regs[regno];
9323 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9324 
9325 		if (arg->arg_type == ARG_ANYTHING) {
9326 			if (reg->type != SCALAR_VALUE) {
9327 				bpf_log(log, "R%d is not a scalar\n", regno);
9328 				return -EINVAL;
9329 			}
9330 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9331 			/*
9332 			 * Anything is allowed for untrusted arguments, as these are
9333 			 * read-only and probe read instructions would protect against
9334 			 * invalid memory access.
9335 			 */
9336 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9337 			ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_CTX);
9338 			if (ret < 0)
9339 				return ret;
9340 			/* If function expects ctx type in BTF check that caller
9341 			 * is passing PTR_TO_CTX.
9342 			 */
9343 			if (reg->type != PTR_TO_CTX) {
9344 				bpf_log(log, "arg#%d expects pointer to ctx\n", i);
9345 				return -EINVAL;
9346 			}
9347 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9348 			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
9349 			if (ret < 0)
9350 				return ret;
9351 			if (check_mem_reg(env, reg, regno, arg->mem_size))
9352 				return -EINVAL;
9353 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9354 				bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
9355 				return -EINVAL;
9356 			}
9357 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9358 			/*
9359 			 * Can pass any value and the kernel won't crash, but
9360 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9361 			 * else is a bug in the bpf program. Point it out to
9362 			 * the user at the verification time instead of
9363 			 * run-time debug nightmare.
9364 			 */
9365 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9366 				bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
9367 				return -EINVAL;
9368 			}
9369 		} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
9370 			ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
9371 			if (ret)
9372 				return ret;
9373 
9374 			ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
9375 			if (ret)
9376 				return ret;
9377 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9378 			struct bpf_call_arg_meta meta;
9379 			int err;
9380 
9381 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9382 				continue;
9383 
9384 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9385 			err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
9386 			err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
9387 			if (err)
9388 				return err;
9389 		} else {
9390 			verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type);
9391 			return -EFAULT;
9392 		}
9393 	}
9394 
9395 	return 0;
9396 }
9397 
9398 /* Compare BTF of a function call with given bpf_reg_state.
9399  * Returns:
9400  * EFAULT - there is a verifier bug. Abort verification.
9401  * EINVAL - there is a type mismatch or BTF is not available.
9402  * 0 - BTF matches with what bpf_reg_state expects.
9403  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9404  */
9405 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9406 				  struct bpf_reg_state *regs)
9407 {
9408 	struct bpf_prog *prog = env->prog;
9409 	struct btf *btf = prog->aux->btf;
9410 	u32 btf_id;
9411 	int err;
9412 
9413 	if (!prog->aux->func_info)
9414 		return -EINVAL;
9415 
9416 	btf_id = prog->aux->func_info[subprog].type_id;
9417 	if (!btf_id)
9418 		return -EFAULT;
9419 
9420 	if (prog->aux->func_info_aux[subprog].unreliable)
9421 		return -EINVAL;
9422 
9423 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9424 	/* Compiler optimizations can remove arguments from static functions
9425 	 * or mismatched type can be passed into a global function.
9426 	 * In such cases mark the function as unreliable from BTF point of view.
9427 	 */
9428 	if (err)
9429 		prog->aux->func_info_aux[subprog].unreliable = true;
9430 	return err;
9431 }
9432 
9433 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9434 			      int insn_idx, int subprog,
9435 			      set_callee_state_fn set_callee_state_cb)
9436 {
9437 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9438 	struct bpf_func_state *caller, *callee;
9439 	int err;
9440 
9441 	caller = state->frame[state->curframe];
9442 	err = btf_check_subprog_call(env, subprog, caller->regs);
9443 	if (err == -EFAULT)
9444 		return err;
9445 
9446 	/* set_callee_state is used for direct subprog calls, but we are
9447 	 * interested in validating only BPF helpers that can call subprogs as
9448 	 * callbacks
9449 	 */
9450 	env->subprog_info[subprog].is_cb = true;
9451 	if (bpf_pseudo_kfunc_call(insn) &&
9452 	    !is_callback_calling_kfunc(insn->imm)) {
9453 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9454 			     func_id_name(insn->imm), insn->imm);
9455 		return -EFAULT;
9456 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9457 		   !is_callback_calling_function(insn->imm)) { /* helper */
9458 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9459 			     func_id_name(insn->imm), insn->imm);
9460 		return -EFAULT;
9461 	}
9462 
9463 	if (bpf_is_async_callback_calling_insn(insn)) {
9464 		struct bpf_verifier_state *async_cb;
9465 
9466 		/* there is no real recursion here. timer and workqueue callbacks are async */
9467 		env->subprog_info[subprog].is_async_cb = true;
9468 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9469 					 insn_idx, subprog,
9470 					 is_async_cb_sleepable(env, insn));
9471 		if (IS_ERR(async_cb))
9472 			return PTR_ERR(async_cb);
9473 		callee = async_cb->frame[0];
9474 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9475 
9476 		/* Convert bpf_timer_set_callback() args into timer callback args */
9477 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9478 		if (err)
9479 			return err;
9480 
9481 		return 0;
9482 	}
9483 
9484 	/* for callback functions enqueue entry to callback and
9485 	 * proceed with next instruction within current frame.
9486 	 */
9487 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9488 	if (IS_ERR(callback_state))
9489 		return PTR_ERR(callback_state);
9490 
9491 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9492 			       callback_state);
9493 	if (err)
9494 		return err;
9495 
9496 	callback_state->callback_unroll_depth++;
9497 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9498 	caller->callback_depth = 0;
9499 	return 0;
9500 }
9501 
9502 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9503 			   int *insn_idx)
9504 {
9505 	struct bpf_verifier_state *state = env->cur_state;
9506 	struct bpf_func_state *caller;
9507 	int err, subprog, target_insn;
9508 
9509 	target_insn = *insn_idx + insn->imm + 1;
9510 	subprog = bpf_find_subprog(env, target_insn);
9511 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9512 			    target_insn))
9513 		return -EFAULT;
9514 
9515 	caller = state->frame[state->curframe];
9516 	err = btf_check_subprog_call(env, subprog, caller->regs);
9517 	if (err == -EFAULT)
9518 		return err;
9519 	if (bpf_subprog_is_global(env, subprog)) {
9520 		const char *sub_name = subprog_name(env, subprog);
9521 
9522 		if (env->cur_state->active_locks) {
9523 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9524 				     "use static function instead\n");
9525 			return -EINVAL;
9526 		}
9527 
9528 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9529 			verbose(env, "sleepable global function %s() called in %s\n",
9530 				sub_name, non_sleepable_context_description(env));
9531 			return -EINVAL;
9532 		}
9533 
9534 		if (err) {
9535 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9536 				subprog, sub_name);
9537 			return err;
9538 		}
9539 
9540 		if (env->log.level & BPF_LOG_LEVEL)
9541 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9542 				subprog, sub_name);
9543 		if (env->subprog_info[subprog].changes_pkt_data)
9544 			clear_all_pkt_pointers(env);
9545 		/* mark global subprog for verifying after main prog */
9546 		subprog_aux(env, subprog)->called = true;
9547 		clear_caller_saved_regs(env, caller->regs);
9548 
9549 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9550 		if (!subprog_returns_void(env, subprog)) {
9551 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9552 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9553 		}
9554 
9555 		/* continue with next insn after call */
9556 		return 0;
9557 	}
9558 
9559 	/* for regular function entry setup new frame and continue
9560 	 * from that frame.
9561 	 */
9562 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9563 	if (err)
9564 		return err;
9565 
9566 	clear_caller_saved_regs(env, caller->regs);
9567 
9568 	/* and go analyze first insn of the callee */
9569 	*insn_idx = env->subprog_info[subprog].start - 1;
9570 
9571 	if (env->log.level & BPF_LOG_LEVEL) {
9572 		verbose(env, "caller:\n");
9573 		print_verifier_state(env, state, caller->frameno, true);
9574 		verbose(env, "callee:\n");
9575 		print_verifier_state(env, state, state->curframe, true);
9576 	}
9577 
9578 	return 0;
9579 }
9580 
9581 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9582 				   struct bpf_func_state *caller,
9583 				   struct bpf_func_state *callee)
9584 {
9585 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9586 	 *      void *callback_ctx, u64 flags);
9587 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9588 	 *      void *callback_ctx);
9589 	 */
9590 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9591 
9592 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9593 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9594 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9595 
9596 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9597 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9598 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9599 
9600 	/* pointer to stack or null */
9601 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9602 
9603 	/* unused */
9604 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9605 	return 0;
9606 }
9607 
9608 static int set_callee_state(struct bpf_verifier_env *env,
9609 			    struct bpf_func_state *caller,
9610 			    struct bpf_func_state *callee, int insn_idx)
9611 {
9612 	int i;
9613 
9614 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9615 	 * pointers, which connects us up to the liveness chain
9616 	 */
9617 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9618 		callee->regs[i] = caller->regs[i];
9619 	return 0;
9620 }
9621 
9622 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9623 				       struct bpf_func_state *caller,
9624 				       struct bpf_func_state *callee,
9625 				       int insn_idx)
9626 {
9627 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9628 	struct bpf_map *map;
9629 	int err;
9630 
9631 	/* valid map_ptr and poison value does not matter */
9632 	map = insn_aux->map_ptr_state.map_ptr;
9633 	if (!map->ops->map_set_for_each_callback_args ||
9634 	    !map->ops->map_for_each_callback) {
9635 		verbose(env, "callback function not allowed for map\n");
9636 		return -ENOTSUPP;
9637 	}
9638 
9639 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9640 	if (err)
9641 		return err;
9642 
9643 	callee->in_callback_fn = true;
9644 	callee->callback_ret_range = retval_range(0, 1);
9645 	return 0;
9646 }
9647 
9648 static int set_loop_callback_state(struct bpf_verifier_env *env,
9649 				   struct bpf_func_state *caller,
9650 				   struct bpf_func_state *callee,
9651 				   int insn_idx)
9652 {
9653 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9654 	 *	    u64 flags);
9655 	 * callback_fn(u64 index, void *callback_ctx);
9656 	 */
9657 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9658 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9659 
9660 	/* unused */
9661 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9662 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9663 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9664 
9665 	callee->in_callback_fn = true;
9666 	callee->callback_ret_range = retval_range(0, 1);
9667 	return 0;
9668 }
9669 
9670 static int set_timer_callback_state(struct bpf_verifier_env *env,
9671 				    struct bpf_func_state *caller,
9672 				    struct bpf_func_state *callee,
9673 				    int insn_idx)
9674 {
9675 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9676 
9677 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9678 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9679 	 */
9680 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9681 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9682 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9683 
9684 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9685 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9686 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9687 
9688 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9689 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9690 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9691 
9692 	/* unused */
9693 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9694 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9695 	callee->in_async_callback_fn = true;
9696 	callee->callback_ret_range = retval_range(0, 0);
9697 	return 0;
9698 }
9699 
9700 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9701 				       struct bpf_func_state *caller,
9702 				       struct bpf_func_state *callee,
9703 				       int insn_idx)
9704 {
9705 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9706 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9707 	 * (callback_fn)(struct task_struct *task,
9708 	 *               struct vm_area_struct *vma, void *callback_ctx);
9709 	 */
9710 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9711 
9712 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9713 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9714 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9715 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9716 
9717 	/* pointer to stack or null */
9718 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9719 
9720 	/* unused */
9721 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9722 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9723 	callee->in_callback_fn = true;
9724 	callee->callback_ret_range = retval_range(0, 1);
9725 	return 0;
9726 }
9727 
9728 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9729 					   struct bpf_func_state *caller,
9730 					   struct bpf_func_state *callee,
9731 					   int insn_idx)
9732 {
9733 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9734 	 *			  callback_ctx, u64 flags);
9735 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9736 	 */
9737 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9738 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9739 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9740 
9741 	/* unused */
9742 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9743 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9744 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9745 
9746 	callee->in_callback_fn = true;
9747 	callee->callback_ret_range = retval_range(0, 1);
9748 	return 0;
9749 }
9750 
9751 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9752 					 struct bpf_func_state *caller,
9753 					 struct bpf_func_state *callee,
9754 					 int insn_idx)
9755 {
9756 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9757 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9758 	 *
9759 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9760 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9761 	 * by this point, so look at 'root'
9762 	 */
9763 	struct btf_field *field;
9764 
9765 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9766 				      caller->regs[BPF_REG_1].var_off.value,
9767 				      BPF_RB_ROOT);
9768 	if (!field || !field->graph_root.value_btf_id)
9769 		return -EFAULT;
9770 
9771 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9772 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9773 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9774 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9775 
9776 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9777 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9778 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9779 	callee->in_callback_fn = true;
9780 	callee->callback_ret_range = retval_range(0, 1);
9781 	return 0;
9782 }
9783 
9784 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9785 						 struct bpf_func_state *caller,
9786 						 struct bpf_func_state *callee,
9787 						 int insn_idx)
9788 {
9789 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9790 
9791 	/*
9792 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9793 	 */
9794 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9795 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9796 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9797 
9798 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9799 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9800 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9801 
9802 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9803 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9804 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9805 
9806 	/* unused */
9807 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9808 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9809 	callee->in_async_callback_fn = true;
9810 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9811 	return 0;
9812 }
9813 
9814 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9815 
9816 /* Are we currently verifying the callback for a rbtree helper that must
9817  * be called with lock held? If so, no need to complain about unreleased
9818  * lock
9819  */
9820 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9821 {
9822 	struct bpf_verifier_state *state = env->cur_state;
9823 	struct bpf_insn *insn = env->prog->insnsi;
9824 	struct bpf_func_state *callee;
9825 	int kfunc_btf_id;
9826 
9827 	if (!state->curframe)
9828 		return false;
9829 
9830 	callee = state->frame[state->curframe];
9831 
9832 	if (!callee->in_callback_fn)
9833 		return false;
9834 
9835 	kfunc_btf_id = insn[callee->callsite].imm;
9836 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9837 }
9838 
9839 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9840 {
9841 	if (range.return_32bit)
9842 		return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
9843 	else
9844 		return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
9845 }
9846 
9847 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9848 {
9849 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9850 	struct bpf_func_state *caller, *callee;
9851 	struct bpf_reg_state *r0;
9852 	bool in_callback_fn;
9853 	int err;
9854 
9855 	callee = state->frame[state->curframe];
9856 	r0 = &callee->regs[BPF_REG_0];
9857 	if (r0->type == PTR_TO_STACK) {
9858 		/* technically it's ok to return caller's stack pointer
9859 		 * (or caller's caller's pointer) back to the caller,
9860 		 * since these pointers are valid. Only current stack
9861 		 * pointer will be invalid as soon as function exits,
9862 		 * but let's be conservative
9863 		 */
9864 		verbose(env, "cannot return stack pointer to the caller\n");
9865 		return -EINVAL;
9866 	}
9867 
9868 	caller = state->frame[state->curframe - 1];
9869 	if (callee->in_callback_fn) {
9870 		if (r0->type != SCALAR_VALUE) {
9871 			verbose(env, "R0 not a scalar value\n");
9872 			return -EACCES;
9873 		}
9874 
9875 		/* we are going to rely on register's precise value */
9876 		err = mark_chain_precision(env, BPF_REG_0);
9877 		if (err)
9878 			return err;
9879 
9880 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9881 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9882 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9883 					       "At callback return", "R0");
9884 			return -EINVAL;
9885 		}
9886 		if (!bpf_calls_callback(env, callee->callsite)) {
9887 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9888 				     *insn_idx, callee->callsite);
9889 			return -EFAULT;
9890 		}
9891 	} else {
9892 		/* return to the caller whatever r0 had in the callee */
9893 		caller->regs[BPF_REG_0] = *r0;
9894 	}
9895 
9896 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9897 	 * there function call logic would reschedule callback visit. If iteration
9898 	 * converges is_state_visited() would prune that visit eventually.
9899 	 */
9900 	in_callback_fn = callee->in_callback_fn;
9901 	if (in_callback_fn)
9902 		*insn_idx = callee->callsite;
9903 	else
9904 		*insn_idx = callee->callsite + 1;
9905 
9906 	if (env->log.level & BPF_LOG_LEVEL) {
9907 		verbose(env, "returning from callee:\n");
9908 		print_verifier_state(env, state, callee->frameno, true);
9909 		verbose(env, "to caller at %d:\n", *insn_idx);
9910 		print_verifier_state(env, state, caller->frameno, true);
9911 	}
9912 	/* clear everything in the callee. In case of exceptional exits using
9913 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9914 	free_func_state(callee);
9915 	state->frame[state->curframe--] = NULL;
9916 
9917 	/* for callbacks widen imprecise scalars to make programs like below verify:
9918 	 *
9919 	 *   struct ctx { int i; }
9920 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9921 	 *   ...
9922 	 *   struct ctx = { .i = 0; }
9923 	 *   bpf_loop(100, cb, &ctx, 0);
9924 	 *
9925 	 * This is similar to what is done in process_iter_next_call() for open
9926 	 * coded iterators.
9927 	 */
9928 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9929 	if (prev_st) {
9930 		err = widen_imprecise_scalars(env, prev_st, state);
9931 		if (err)
9932 			return err;
9933 	}
9934 	return 0;
9935 }
9936 
9937 static int do_refine_retval_range(struct bpf_verifier_env *env,
9938 				  struct bpf_reg_state *regs, int ret_type,
9939 				  int func_id,
9940 				  struct bpf_call_arg_meta *meta)
9941 {
9942 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9943 
9944 	if (ret_type != RET_INTEGER)
9945 		return 0;
9946 
9947 	switch (func_id) {
9948 	case BPF_FUNC_get_stack:
9949 	case BPF_FUNC_get_task_stack:
9950 	case BPF_FUNC_probe_read_str:
9951 	case BPF_FUNC_probe_read_kernel_str:
9952 	case BPF_FUNC_probe_read_user_str:
9953 		ret_reg->smax_value = meta->msize_max_value;
9954 		ret_reg->s32_max_value = meta->msize_max_value;
9955 		ret_reg->smin_value = -MAX_ERRNO;
9956 		ret_reg->s32_min_value = -MAX_ERRNO;
9957 		reg_bounds_sync(ret_reg);
9958 		break;
9959 	case BPF_FUNC_get_smp_processor_id:
9960 		ret_reg->umax_value = nr_cpu_ids - 1;
9961 		ret_reg->u32_max_value = nr_cpu_ids - 1;
9962 		ret_reg->smax_value = nr_cpu_ids - 1;
9963 		ret_reg->s32_max_value = nr_cpu_ids - 1;
9964 		ret_reg->umin_value = 0;
9965 		ret_reg->u32_min_value = 0;
9966 		ret_reg->smin_value = 0;
9967 		ret_reg->s32_min_value = 0;
9968 		reg_bounds_sync(ret_reg);
9969 		break;
9970 	}
9971 
9972 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9973 }
9974 
9975 static int
9976 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9977 		int func_id, int insn_idx)
9978 {
9979 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9980 	struct bpf_map *map = meta->map.ptr;
9981 
9982 	if (func_id != BPF_FUNC_tail_call &&
9983 	    func_id != BPF_FUNC_map_lookup_elem &&
9984 	    func_id != BPF_FUNC_map_update_elem &&
9985 	    func_id != BPF_FUNC_map_delete_elem &&
9986 	    func_id != BPF_FUNC_map_push_elem &&
9987 	    func_id != BPF_FUNC_map_pop_elem &&
9988 	    func_id != BPF_FUNC_map_peek_elem &&
9989 	    func_id != BPF_FUNC_for_each_map_elem &&
9990 	    func_id != BPF_FUNC_redirect_map &&
9991 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9992 		return 0;
9993 
9994 	if (map == NULL) {
9995 		verifier_bug(env, "expected map for helper call");
9996 		return -EFAULT;
9997 	}
9998 
9999 	/* In case of read-only, some additional restrictions
10000 	 * need to be applied in order to prevent altering the
10001 	 * state of the map from program side.
10002 	 */
10003 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
10004 	    (func_id == BPF_FUNC_map_delete_elem ||
10005 	     func_id == BPF_FUNC_map_update_elem ||
10006 	     func_id == BPF_FUNC_map_push_elem ||
10007 	     func_id == BPF_FUNC_map_pop_elem)) {
10008 		verbose(env, "write into map forbidden\n");
10009 		return -EACCES;
10010 	}
10011 
10012 	if (!aux->map_ptr_state.map_ptr)
10013 		bpf_map_ptr_store(aux, meta->map.ptr,
10014 				  !meta->map.ptr->bypass_spec_v1, false);
10015 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
10016 		bpf_map_ptr_store(aux, meta->map.ptr,
10017 				  !meta->map.ptr->bypass_spec_v1, true);
10018 	return 0;
10019 }
10020 
10021 static int
10022 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10023 		int func_id, int insn_idx)
10024 {
10025 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10026 	struct bpf_reg_state *reg;
10027 	struct bpf_map *map = meta->map.ptr;
10028 	u64 val, max;
10029 	int err;
10030 
10031 	if (func_id != BPF_FUNC_tail_call)
10032 		return 0;
10033 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
10034 		verbose(env, "expected prog array map for tail call");
10035 		return -EINVAL;
10036 	}
10037 
10038 	reg = reg_state(env, BPF_REG_3);
10039 	val = reg->var_off.value;
10040 	max = map->max_entries;
10041 
10042 	if (!(is_reg_const(reg, false) && val < max)) {
10043 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10044 		return 0;
10045 	}
10046 
10047 	err = mark_chain_precision(env, BPF_REG_3);
10048 	if (err)
10049 		return err;
10050 	if (bpf_map_key_unseen(aux))
10051 		bpf_map_key_store(aux, val);
10052 	else if (!bpf_map_key_poisoned(aux) &&
10053 		  bpf_map_key_immediate(aux) != val)
10054 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10055 	return 0;
10056 }
10057 
10058 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
10059 {
10060 	struct bpf_verifier_state *state = env->cur_state;
10061 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10062 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
10063 	bool refs_lingering = false;
10064 	int i;
10065 
10066 	if (!exception_exit && cur_func(env)->frameno)
10067 		return 0;
10068 
10069 	for (i = 0; i < state->acquired_refs; i++) {
10070 		if (state->refs[i].type != REF_TYPE_PTR)
10071 			continue;
10072 		/* Allow struct_ops programs to return a referenced kptr back to
10073 		 * kernel. Type checks are performed later in check_return_code.
10074 		 */
10075 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
10076 		    reg->ref_obj_id == state->refs[i].id)
10077 			continue;
10078 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
10079 			state->refs[i].id, state->refs[i].insn_idx);
10080 		refs_lingering = true;
10081 	}
10082 	return refs_lingering ? -EINVAL : 0;
10083 }
10084 
10085 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
10086 {
10087 	int err;
10088 
10089 	if (check_lock && env->cur_state->active_locks) {
10090 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
10091 		return -EINVAL;
10092 	}
10093 
10094 	err = check_reference_leak(env, exception_exit);
10095 	if (err) {
10096 		verbose(env, "%s would lead to reference leak\n", prefix);
10097 		return err;
10098 	}
10099 
10100 	if (check_lock && env->cur_state->active_irq_id) {
10101 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
10102 		return -EINVAL;
10103 	}
10104 
10105 	if (check_lock && env->cur_state->active_rcu_locks) {
10106 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
10107 		return -EINVAL;
10108 	}
10109 
10110 	if (check_lock && env->cur_state->active_preempt_locks) {
10111 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
10112 		return -EINVAL;
10113 	}
10114 
10115 	return 0;
10116 }
10117 
10118 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
10119 				   struct bpf_reg_state *regs)
10120 {
10121 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
10122 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
10123 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
10124 	struct bpf_bprintf_data data = {};
10125 	int err, fmt_map_off, num_args;
10126 	u64 fmt_addr;
10127 	char *fmt;
10128 
10129 	/* data must be an array of u64 */
10130 	if (data_len_reg->var_off.value % 8)
10131 		return -EINVAL;
10132 	num_args = data_len_reg->var_off.value / 8;
10133 
10134 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10135 	 * and map_direct_value_addr is set.
10136 	 */
10137 	fmt_map_off = fmt_reg->var_off.value;
10138 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10139 						  fmt_map_off);
10140 	if (err) {
10141 		verbose(env, "failed to retrieve map value address\n");
10142 		return -EFAULT;
10143 	}
10144 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10145 
10146 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10147 	 * can focus on validating the format specifiers.
10148 	 */
10149 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10150 	if (err < 0)
10151 		verbose(env, "Invalid format string\n");
10152 
10153 	return err;
10154 }
10155 
10156 static int check_get_func_ip(struct bpf_verifier_env *env)
10157 {
10158 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10159 	int func_id = BPF_FUNC_get_func_ip;
10160 
10161 	if (type == BPF_PROG_TYPE_TRACING) {
10162 		if (!bpf_prog_has_trampoline(env->prog)) {
10163 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10164 				func_id_name(func_id), func_id);
10165 			return -ENOTSUPP;
10166 		}
10167 		return 0;
10168 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10169 		return 0;
10170 	}
10171 
10172 	verbose(env, "func %s#%d not supported for program type %d\n",
10173 		func_id_name(func_id), func_id, type);
10174 	return -ENOTSUPP;
10175 }
10176 
10177 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10178 {
10179 	return &env->insn_aux_data[env->insn_idx];
10180 }
10181 
10182 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10183 {
10184 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10185 	bool reg_is_null = bpf_register_is_null(reg);
10186 
10187 	if (reg_is_null)
10188 		mark_chain_precision(env, BPF_REG_4);
10189 
10190 	return reg_is_null;
10191 }
10192 
10193 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10194 {
10195 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10196 
10197 	if (!state->initialized) {
10198 		state->initialized = 1;
10199 		state->fit_for_inline = loop_flag_is_zero(env);
10200 		state->callback_subprogno = subprogno;
10201 		return;
10202 	}
10203 
10204 	if (!state->fit_for_inline)
10205 		return;
10206 
10207 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10208 				 state->callback_subprogno == subprogno);
10209 }
10210 
10211 /* Returns whether or not the given map type can potentially elide
10212  * lookup return value nullness check. This is possible if the key
10213  * is statically known.
10214  */
10215 static bool can_elide_value_nullness(enum bpf_map_type type)
10216 {
10217 	switch (type) {
10218 	case BPF_MAP_TYPE_ARRAY:
10219 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10220 		return true;
10221 	default:
10222 		return false;
10223 	}
10224 }
10225 
10226 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10227 			 const struct bpf_func_proto **ptr)
10228 {
10229 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10230 		return -ERANGE;
10231 
10232 	if (!env->ops->get_func_proto)
10233 		return -EINVAL;
10234 
10235 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10236 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10237 }
10238 
10239 /* Check if we're in a sleepable context. */
10240 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10241 {
10242 	return !env->cur_state->active_rcu_locks &&
10243 	       !env->cur_state->active_preempt_locks &&
10244 	       !env->cur_state->active_locks &&
10245 	       !env->cur_state->active_irq_id &&
10246 	       in_sleepable(env);
10247 }
10248 
10249 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10250 {
10251 	if (env->cur_state->active_rcu_locks)
10252 		return "rcu_read_lock region";
10253 	if (env->cur_state->active_preempt_locks)
10254 		return "non-preemptible region";
10255 	if (env->cur_state->active_irq_id)
10256 		return "IRQ-disabled region";
10257 	if (env->cur_state->active_locks)
10258 		return "lock region";
10259 	return "non-sleepable prog";
10260 }
10261 
10262 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10263 			     int *insn_idx_p)
10264 {
10265 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10266 	bool returns_cpu_specific_alloc_ptr = false;
10267 	const struct bpf_func_proto *fn = NULL;
10268 	enum bpf_return_type ret_type;
10269 	enum bpf_type_flag ret_flag;
10270 	struct bpf_reg_state *regs;
10271 	struct bpf_call_arg_meta meta;
10272 	int insn_idx = *insn_idx_p;
10273 	bool changes_data;
10274 	int i, err, func_id;
10275 
10276 	/* find function prototype */
10277 	func_id = insn->imm;
10278 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10279 	if (err == -ERANGE) {
10280 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10281 		return -EINVAL;
10282 	}
10283 
10284 	if (err) {
10285 		verbose(env, "program of this type cannot use helper %s#%d\n",
10286 			func_id_name(func_id), func_id);
10287 		return err;
10288 	}
10289 
10290 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10291 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10292 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10293 		return -EINVAL;
10294 	}
10295 
10296 	if (fn->allowed && !fn->allowed(env->prog)) {
10297 		verbose(env, "helper call is not allowed in probe\n");
10298 		return -EINVAL;
10299 	}
10300 
10301 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10302 	changes_data = bpf_helper_changes_pkt_data(func_id);
10303 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10304 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10305 		return -EFAULT;
10306 	}
10307 
10308 	memset(&meta, 0, sizeof(meta));
10309 	meta.pkt_access = fn->pkt_access;
10310 
10311 	err = check_func_proto(fn);
10312 	if (err) {
10313 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10314 		return err;
10315 	}
10316 
10317 	if (fn->might_sleep && !in_sleepable_context(env)) {
10318 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10319 			non_sleepable_context_description(env));
10320 		return -EINVAL;
10321 	}
10322 
10323 	/* Track non-sleepable context for helpers. */
10324 	if (!in_sleepable_context(env))
10325 		env->insn_aux_data[insn_idx].non_sleepable = true;
10326 
10327 	meta.func_id = func_id;
10328 	/* check args */
10329 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10330 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10331 		if (err)
10332 			return err;
10333 	}
10334 
10335 	err = record_func_map(env, &meta, func_id, insn_idx);
10336 	if (err)
10337 		return err;
10338 
10339 	err = record_func_key(env, &meta, func_id, insn_idx);
10340 	if (err)
10341 		return err;
10342 
10343 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10344 	 * is inferred from register state.
10345 	 */
10346 	for (i = 0; i < meta.access_size; i++) {
10347 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
10348 				       BPF_WRITE, -1, false, false);
10349 		if (err)
10350 			return err;
10351 	}
10352 
10353 	regs = cur_regs(env);
10354 
10355 	if (meta.release_regno) {
10356 		err = -EINVAL;
10357 		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
10358 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
10359 		} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
10360 			u32 ref_obj_id = meta.ref_obj_id;
10361 			bool in_rcu = in_rcu_cs(env);
10362 			struct bpf_func_state *state;
10363 			struct bpf_reg_state *reg;
10364 
10365 			err = release_reference_nomark(env->cur_state, ref_obj_id);
10366 			if (!err) {
10367 				bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10368 					if (reg->ref_obj_id == ref_obj_id) {
10369 						if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
10370 							reg->ref_obj_id = 0;
10371 							reg->type &= ~MEM_ALLOC;
10372 							reg->type |= MEM_RCU;
10373 						} else {
10374 							mark_reg_invalid(env, reg);
10375 						}
10376 					}
10377 				}));
10378 			}
10379 		} else if (meta.ref_obj_id) {
10380 			err = release_reference(env, meta.ref_obj_id);
10381 		} else if (bpf_register_is_null(&regs[meta.release_regno])) {
10382 			/* meta.ref_obj_id can only be 0 if register that is meant to be
10383 			 * released is NULL, which must be > R0.
10384 			 */
10385 			err = 0;
10386 		}
10387 		if (err) {
10388 			verbose(env, "func %s#%d reference has not been acquired before\n",
10389 				func_id_name(func_id), func_id);
10390 			return err;
10391 		}
10392 	}
10393 
10394 	switch (func_id) {
10395 	case BPF_FUNC_tail_call:
10396 		err = check_resource_leak(env, false, true, "tail_call");
10397 		if (err)
10398 			return err;
10399 		break;
10400 	case BPF_FUNC_get_local_storage:
10401 		/* check that flags argument in get_local_storage(map, flags) is 0,
10402 		 * this is required because get_local_storage() can't return an error.
10403 		 */
10404 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10405 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10406 			return -EINVAL;
10407 		}
10408 		break;
10409 	case BPF_FUNC_for_each_map_elem:
10410 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10411 					 set_map_elem_callback_state);
10412 		break;
10413 	case BPF_FUNC_timer_set_callback:
10414 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10415 					 set_timer_callback_state);
10416 		break;
10417 	case BPF_FUNC_find_vma:
10418 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10419 					 set_find_vma_callback_state);
10420 		break;
10421 	case BPF_FUNC_snprintf:
10422 		err = check_bpf_snprintf_call(env, regs);
10423 		break;
10424 	case BPF_FUNC_loop:
10425 		update_loop_inline_state(env, meta.subprogno);
10426 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10427 		 * is finished, thus mark it precise.
10428 		 */
10429 		err = mark_chain_precision(env, BPF_REG_1);
10430 		if (err)
10431 			return err;
10432 		if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
10433 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10434 						 set_loop_callback_state);
10435 		} else {
10436 			cur_func(env)->callback_depth = 0;
10437 			if (env->log.level & BPF_LOG_LEVEL2)
10438 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10439 					env->cur_state->curframe);
10440 		}
10441 		break;
10442 	case BPF_FUNC_dynptr_from_mem:
10443 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10444 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10445 				reg_type_str(env, regs[BPF_REG_1].type));
10446 			return -EACCES;
10447 		}
10448 		break;
10449 	case BPF_FUNC_set_retval:
10450 		if (prog_type == BPF_PROG_TYPE_LSM &&
10451 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10452 			if (!env->prog->aux->attach_func_proto->type) {
10453 				/* Make sure programs that attach to void
10454 				 * hooks don't try to modify return value.
10455 				 */
10456 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10457 				return -EINVAL;
10458 			}
10459 		}
10460 		break;
10461 	case BPF_FUNC_dynptr_data:
10462 	{
10463 		struct bpf_reg_state *reg;
10464 		int id, ref_obj_id;
10465 
10466 		reg = get_dynptr_arg_reg(env, fn, regs);
10467 		if (!reg)
10468 			return -EFAULT;
10469 
10470 
10471 		if (meta.dynptr_id) {
10472 			verifier_bug(env, "meta.dynptr_id already set");
10473 			return -EFAULT;
10474 		}
10475 		if (meta.ref_obj_id) {
10476 			verifier_bug(env, "meta.ref_obj_id already set");
10477 			return -EFAULT;
10478 		}
10479 
10480 		id = dynptr_id(env, reg);
10481 		if (id < 0) {
10482 			verifier_bug(env, "failed to obtain dynptr id");
10483 			return id;
10484 		}
10485 
10486 		ref_obj_id = dynptr_ref_obj_id(env, reg);
10487 		if (ref_obj_id < 0) {
10488 			verifier_bug(env, "failed to obtain dynptr ref_obj_id");
10489 			return ref_obj_id;
10490 		}
10491 
10492 		meta.dynptr_id = id;
10493 		meta.ref_obj_id = ref_obj_id;
10494 
10495 		break;
10496 	}
10497 	case BPF_FUNC_dynptr_write:
10498 	{
10499 		enum bpf_dynptr_type dynptr_type;
10500 		struct bpf_reg_state *reg;
10501 
10502 		reg = get_dynptr_arg_reg(env, fn, regs);
10503 		if (!reg)
10504 			return -EFAULT;
10505 
10506 		dynptr_type = dynptr_get_type(env, reg);
10507 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10508 			return -EFAULT;
10509 
10510 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10511 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10512 			/* this will trigger clear_all_pkt_pointers(), which will
10513 			 * invalidate all dynptr slices associated with the skb
10514 			 */
10515 			changes_data = true;
10516 
10517 		break;
10518 	}
10519 	case BPF_FUNC_per_cpu_ptr:
10520 	case BPF_FUNC_this_cpu_ptr:
10521 	{
10522 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10523 		const struct btf_type *type;
10524 
10525 		if (reg->type & MEM_RCU) {
10526 			type = btf_type_by_id(reg->btf, reg->btf_id);
10527 			if (!type || !btf_type_is_struct(type)) {
10528 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10529 				return -EFAULT;
10530 			}
10531 			returns_cpu_specific_alloc_ptr = true;
10532 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10533 		}
10534 		break;
10535 	}
10536 	case BPF_FUNC_user_ringbuf_drain:
10537 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10538 					 set_user_ringbuf_callback_state);
10539 		break;
10540 	}
10541 
10542 	if (err)
10543 		return err;
10544 
10545 	/* reset caller saved regs */
10546 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10547 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10548 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10549 	}
10550 
10551 	/* helper call returns 64-bit value. */
10552 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10553 
10554 	/* update return register (already marked as written above) */
10555 	ret_type = fn->ret_type;
10556 	ret_flag = type_flag(ret_type);
10557 
10558 	switch (base_type(ret_type)) {
10559 	case RET_INTEGER:
10560 		/* sets type to SCALAR_VALUE */
10561 		mark_reg_unknown(env, regs, BPF_REG_0);
10562 		break;
10563 	case RET_VOID:
10564 		regs[BPF_REG_0].type = NOT_INIT;
10565 		break;
10566 	case RET_PTR_TO_MAP_VALUE:
10567 		/* There is no offset yet applied, variable or fixed */
10568 		mark_reg_known_zero(env, regs, BPF_REG_0);
10569 		/* remember map_ptr, so that check_map_access()
10570 		 * can check 'value_size' boundary of memory access
10571 		 * to map element returned from bpf_map_lookup_elem()
10572 		 */
10573 		if (meta.map.ptr == NULL) {
10574 			verifier_bug(env, "unexpected null map_ptr");
10575 			return -EFAULT;
10576 		}
10577 
10578 		if (func_id == BPF_FUNC_map_lookup_elem &&
10579 		    can_elide_value_nullness(meta.map.ptr->map_type) &&
10580 		    meta.const_map_key >= 0 &&
10581 		    meta.const_map_key < meta.map.ptr->max_entries)
10582 			ret_flag &= ~PTR_MAYBE_NULL;
10583 
10584 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10585 		regs[BPF_REG_0].map_uid = meta.map.uid;
10586 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10587 		if (!type_may_be_null(ret_flag) &&
10588 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10589 			regs[BPF_REG_0].id = ++env->id_gen;
10590 		}
10591 		break;
10592 	case RET_PTR_TO_SOCKET:
10593 		mark_reg_known_zero(env, regs, BPF_REG_0);
10594 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10595 		break;
10596 	case RET_PTR_TO_SOCK_COMMON:
10597 		mark_reg_known_zero(env, regs, BPF_REG_0);
10598 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10599 		break;
10600 	case RET_PTR_TO_TCP_SOCK:
10601 		mark_reg_known_zero(env, regs, BPF_REG_0);
10602 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10603 		break;
10604 	case RET_PTR_TO_MEM:
10605 		mark_reg_known_zero(env, regs, BPF_REG_0);
10606 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10607 		regs[BPF_REG_0].mem_size = meta.mem_size;
10608 		break;
10609 	case RET_PTR_TO_MEM_OR_BTF_ID:
10610 	{
10611 		const struct btf_type *t;
10612 
10613 		mark_reg_known_zero(env, regs, BPF_REG_0);
10614 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10615 		if (!btf_type_is_struct(t)) {
10616 			u32 tsize;
10617 			const struct btf_type *ret;
10618 			const char *tname;
10619 
10620 			/* resolve the type size of ksym. */
10621 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10622 			if (IS_ERR(ret)) {
10623 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10624 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10625 					tname, PTR_ERR(ret));
10626 				return -EINVAL;
10627 			}
10628 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10629 			regs[BPF_REG_0].mem_size = tsize;
10630 		} else {
10631 			if (returns_cpu_specific_alloc_ptr) {
10632 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10633 			} else {
10634 				/* MEM_RDONLY may be carried from ret_flag, but it
10635 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10636 				 * it will confuse the check of PTR_TO_BTF_ID in
10637 				 * check_mem_access().
10638 				 */
10639 				ret_flag &= ~MEM_RDONLY;
10640 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10641 			}
10642 
10643 			regs[BPF_REG_0].btf = meta.ret_btf;
10644 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10645 		}
10646 		break;
10647 	}
10648 	case RET_PTR_TO_BTF_ID:
10649 	{
10650 		struct btf *ret_btf;
10651 		int ret_btf_id;
10652 
10653 		mark_reg_known_zero(env, regs, BPF_REG_0);
10654 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10655 		if (func_id == BPF_FUNC_kptr_xchg) {
10656 			ret_btf = meta.kptr_field->kptr.btf;
10657 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10658 			if (!btf_is_kernel(ret_btf)) {
10659 				regs[BPF_REG_0].type |= MEM_ALLOC;
10660 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10661 					regs[BPF_REG_0].type |= MEM_PERCPU;
10662 			}
10663 		} else {
10664 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10665 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10666 					     func_id_name(func_id));
10667 				return -EFAULT;
10668 			}
10669 			ret_btf = btf_vmlinux;
10670 			ret_btf_id = *fn->ret_btf_id;
10671 		}
10672 		if (ret_btf_id == 0) {
10673 			verbose(env, "invalid return type %u of func %s#%d\n",
10674 				base_type(ret_type), func_id_name(func_id),
10675 				func_id);
10676 			return -EINVAL;
10677 		}
10678 		regs[BPF_REG_0].btf = ret_btf;
10679 		regs[BPF_REG_0].btf_id = ret_btf_id;
10680 		break;
10681 	}
10682 	default:
10683 		verbose(env, "unknown return type %u of func %s#%d\n",
10684 			base_type(ret_type), func_id_name(func_id), func_id);
10685 		return -EINVAL;
10686 	}
10687 
10688 	if (type_may_be_null(regs[BPF_REG_0].type))
10689 		regs[BPF_REG_0].id = ++env->id_gen;
10690 
10691 	if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) {
10692 		verifier_bug(env, "func %s#%d sets ref_obj_id more than once",
10693 			     func_id_name(func_id), func_id);
10694 		return -EFAULT;
10695 	}
10696 
10697 	if (is_dynptr_ref_function(func_id))
10698 		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10699 
10700 	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
10701 		/* For release_reference() */
10702 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10703 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10704 		int id = acquire_reference(env, insn_idx);
10705 
10706 		if (id < 0)
10707 			return id;
10708 		/* For mark_ptr_or_null_reg() */
10709 		regs[BPF_REG_0].id = id;
10710 		/* For release_reference() */
10711 		regs[BPF_REG_0].ref_obj_id = id;
10712 	}
10713 
10714 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10715 	if (err)
10716 		return err;
10717 
10718 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10719 	if (err)
10720 		return err;
10721 
10722 	if ((func_id == BPF_FUNC_get_stack ||
10723 	     func_id == BPF_FUNC_get_task_stack) &&
10724 	    !env->prog->has_callchain_buf) {
10725 		const char *err_str;
10726 
10727 #ifdef CONFIG_PERF_EVENTS
10728 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10729 		err_str = "cannot get callchain buffer for func %s#%d\n";
10730 #else
10731 		err = -ENOTSUPP;
10732 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10733 #endif
10734 		if (err) {
10735 			verbose(env, err_str, func_id_name(func_id), func_id);
10736 			return err;
10737 		}
10738 
10739 		env->prog->has_callchain_buf = true;
10740 	}
10741 
10742 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10743 		env->prog->call_get_stack = true;
10744 
10745 	if (func_id == BPF_FUNC_get_func_ip) {
10746 		if (check_get_func_ip(env))
10747 			return -ENOTSUPP;
10748 		env->prog->call_get_func_ip = true;
10749 	}
10750 
10751 	if (func_id == BPF_FUNC_tail_call) {
10752 		if (env->cur_state->curframe) {
10753 			struct bpf_verifier_state *branch;
10754 
10755 			mark_reg_scratched(env, BPF_REG_0);
10756 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10757 			if (IS_ERR(branch))
10758 				return PTR_ERR(branch);
10759 			clear_all_pkt_pointers(env);
10760 			mark_reg_unknown(env, regs, BPF_REG_0);
10761 			err = prepare_func_exit(env, &env->insn_idx);
10762 			if (err)
10763 				return err;
10764 			env->insn_idx--;
10765 		} else {
10766 			changes_data = false;
10767 		}
10768 	}
10769 
10770 	if (changes_data)
10771 		clear_all_pkt_pointers(env);
10772 	return 0;
10773 }
10774 
10775 /* mark_btf_func_reg_size() is used when the reg size is determined by
10776  * the BTF func_proto's return value size and argument.
10777  */
10778 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10779 				     u32 regno, size_t reg_size)
10780 {
10781 	struct bpf_reg_state *reg = &regs[regno];
10782 
10783 	if (regno == BPF_REG_0) {
10784 		/* Function return value */
10785 		reg->subreg_def = reg_size == sizeof(u64) ?
10786 			DEF_NOT_SUBREG : env->insn_idx + 1;
10787 	} else if (reg_size == sizeof(u64)) {
10788 		/* Function argument */
10789 		mark_insn_zext(env, reg);
10790 	}
10791 }
10792 
10793 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10794 				   size_t reg_size)
10795 {
10796 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10797 }
10798 
10799 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10800 {
10801 	return meta->kfunc_flags & KF_ACQUIRE;
10802 }
10803 
10804 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10805 {
10806 	return meta->kfunc_flags & KF_RELEASE;
10807 }
10808 
10809 
10810 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10811 {
10812 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10813 }
10814 
10815 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10816 {
10817 	return meta->kfunc_flags & KF_RCU;
10818 }
10819 
10820 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10821 {
10822 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10823 }
10824 
10825 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10826 				  const struct btf_param *arg,
10827 				  const struct bpf_reg_state *reg)
10828 {
10829 	const struct btf_type *t;
10830 
10831 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10832 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10833 		return false;
10834 
10835 	return btf_param_match_suffix(btf, arg, "__sz");
10836 }
10837 
10838 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10839 					const struct btf_param *arg,
10840 					const struct bpf_reg_state *reg)
10841 {
10842 	const struct btf_type *t;
10843 
10844 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10845 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10846 		return false;
10847 
10848 	return btf_param_match_suffix(btf, arg, "__szk");
10849 }
10850 
10851 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10852 {
10853 	return btf_param_match_suffix(btf, arg, "__k");
10854 }
10855 
10856 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10857 {
10858 	return btf_param_match_suffix(btf, arg, "__ign");
10859 }
10860 
10861 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10862 {
10863 	return btf_param_match_suffix(btf, arg, "__map");
10864 }
10865 
10866 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10867 {
10868 	return btf_param_match_suffix(btf, arg, "__alloc");
10869 }
10870 
10871 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10872 {
10873 	return btf_param_match_suffix(btf, arg, "__uninit");
10874 }
10875 
10876 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10877 {
10878 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10879 }
10880 
10881 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10882 {
10883 	return btf_param_match_suffix(btf, arg, "__nullable");
10884 }
10885 
10886 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10887 {
10888 	return btf_param_match_suffix(btf, arg, "__str");
10889 }
10890 
10891 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10892 {
10893 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10894 }
10895 
10896 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10897 					  const struct btf_param *arg,
10898 					  const char *name)
10899 {
10900 	int len, target_len = strlen(name);
10901 	const char *param_name;
10902 
10903 	param_name = btf_name_by_offset(btf, arg->name_off);
10904 	if (str_is_empty(param_name))
10905 		return false;
10906 	len = strlen(param_name);
10907 	if (len != target_len)
10908 		return false;
10909 	if (strcmp(param_name, name))
10910 		return false;
10911 
10912 	return true;
10913 }
10914 
10915 enum {
10916 	KF_ARG_DYNPTR_ID,
10917 	KF_ARG_LIST_HEAD_ID,
10918 	KF_ARG_LIST_NODE_ID,
10919 	KF_ARG_RB_ROOT_ID,
10920 	KF_ARG_RB_NODE_ID,
10921 	KF_ARG_WORKQUEUE_ID,
10922 	KF_ARG_RES_SPIN_LOCK_ID,
10923 	KF_ARG_TASK_WORK_ID,
10924 	KF_ARG_PROG_AUX_ID,
10925 	KF_ARG_TIMER_ID
10926 };
10927 
10928 BTF_ID_LIST(kf_arg_btf_ids)
10929 BTF_ID(struct, bpf_dynptr)
10930 BTF_ID(struct, bpf_list_head)
10931 BTF_ID(struct, bpf_list_node)
10932 BTF_ID(struct, bpf_rb_root)
10933 BTF_ID(struct, bpf_rb_node)
10934 BTF_ID(struct, bpf_wq)
10935 BTF_ID(struct, bpf_res_spin_lock)
10936 BTF_ID(struct, bpf_task_work)
10937 BTF_ID(struct, bpf_prog_aux)
10938 BTF_ID(struct, bpf_timer)
10939 
10940 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10941 				    const struct btf_param *arg, int type)
10942 {
10943 	const struct btf_type *t;
10944 	u32 res_id;
10945 
10946 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10947 	if (!t)
10948 		return false;
10949 	if (!btf_type_is_ptr(t))
10950 		return false;
10951 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10952 	if (!t)
10953 		return false;
10954 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10955 }
10956 
10957 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10958 {
10959 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10960 }
10961 
10962 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10963 {
10964 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10965 }
10966 
10967 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10968 {
10969 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10970 }
10971 
10972 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10973 {
10974 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10975 }
10976 
10977 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10978 {
10979 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10980 }
10981 
10982 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10983 {
10984 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10985 }
10986 
10987 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10988 {
10989 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10990 }
10991 
10992 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10993 {
10994 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10995 }
10996 
10997 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10998 {
10999 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
11000 }
11001 
11002 static bool is_rbtree_node_type(const struct btf_type *t)
11003 {
11004 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
11005 }
11006 
11007 static bool is_list_node_type(const struct btf_type *t)
11008 {
11009 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
11010 }
11011 
11012 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
11013 				  const struct btf_param *arg)
11014 {
11015 	const struct btf_type *t;
11016 
11017 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
11018 	if (!t)
11019 		return false;
11020 
11021 	return true;
11022 }
11023 
11024 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
11025 {
11026 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
11027 }
11028 
11029 /*
11030  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
11031  *   - the _impl prototype with full arg list (meta->func_proto)
11032  *   - the BPF API prototype w/o implicit args (func->type in BTF)
11033  * To determine whether an argument is implicit, we compare its position
11034  * against the number of arguments in the prototype w/o implicit args.
11035  */
11036 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
11037 {
11038 	const struct btf_type *func, *func_proto;
11039 	u32 argn;
11040 
11041 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
11042 		return false;
11043 
11044 	func = btf_type_by_id(meta->btf, meta->func_id);
11045 	func_proto = btf_type_by_id(meta->btf, func->type);
11046 	argn = btf_type_vlen(func_proto);
11047 
11048 	return argn <= arg_idx;
11049 }
11050 
11051 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
11052 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
11053 					const struct btf *btf,
11054 					const struct btf_type *t, int rec)
11055 {
11056 	const struct btf_type *member_type;
11057 	const struct btf_member *member;
11058 	u32 i;
11059 
11060 	if (!btf_type_is_struct(t))
11061 		return false;
11062 
11063 	for_each_member(i, t, member) {
11064 		const struct btf_array *array;
11065 
11066 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
11067 		if (btf_type_is_struct(member_type)) {
11068 			if (rec >= 3) {
11069 				verbose(env, "max struct nesting depth exceeded\n");
11070 				return false;
11071 			}
11072 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
11073 				return false;
11074 			continue;
11075 		}
11076 		if (btf_type_is_array(member_type)) {
11077 			array = btf_array(member_type);
11078 			if (!array->nelems)
11079 				return false;
11080 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
11081 			if (!btf_type_is_scalar(member_type))
11082 				return false;
11083 			continue;
11084 		}
11085 		if (!btf_type_is_scalar(member_type))
11086 			return false;
11087 	}
11088 	return true;
11089 }
11090 
11091 enum kfunc_ptr_arg_type {
11092 	KF_ARG_PTR_TO_CTX,
11093 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
11094 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
11095 	KF_ARG_PTR_TO_DYNPTR,
11096 	KF_ARG_PTR_TO_ITER,
11097 	KF_ARG_PTR_TO_LIST_HEAD,
11098 	KF_ARG_PTR_TO_LIST_NODE,
11099 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
11100 	KF_ARG_PTR_TO_MEM,
11101 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
11102 	KF_ARG_PTR_TO_CALLBACK,
11103 	KF_ARG_PTR_TO_RB_ROOT,
11104 	KF_ARG_PTR_TO_RB_NODE,
11105 	KF_ARG_PTR_TO_NULL,
11106 	KF_ARG_PTR_TO_CONST_STR,
11107 	KF_ARG_PTR_TO_MAP,
11108 	KF_ARG_PTR_TO_TIMER,
11109 	KF_ARG_PTR_TO_WORKQUEUE,
11110 	KF_ARG_PTR_TO_IRQ_FLAG,
11111 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
11112 	KF_ARG_PTR_TO_TASK_WORK,
11113 };
11114 
11115 enum special_kfunc_type {
11116 	KF_bpf_obj_new_impl,
11117 	KF_bpf_obj_new,
11118 	KF_bpf_obj_drop_impl,
11119 	KF_bpf_obj_drop,
11120 	KF_bpf_refcount_acquire_impl,
11121 	KF_bpf_refcount_acquire,
11122 	KF_bpf_list_push_front_impl,
11123 	KF_bpf_list_push_front,
11124 	KF_bpf_list_push_back_impl,
11125 	KF_bpf_list_push_back,
11126 	KF_bpf_list_pop_front,
11127 	KF_bpf_list_pop_back,
11128 	KF_bpf_list_front,
11129 	KF_bpf_list_back,
11130 	KF_bpf_cast_to_kern_ctx,
11131 	KF_bpf_rdonly_cast,
11132 	KF_bpf_rcu_read_lock,
11133 	KF_bpf_rcu_read_unlock,
11134 	KF_bpf_rbtree_remove,
11135 	KF_bpf_rbtree_add_impl,
11136 	KF_bpf_rbtree_add,
11137 	KF_bpf_rbtree_first,
11138 	KF_bpf_rbtree_root,
11139 	KF_bpf_rbtree_left,
11140 	KF_bpf_rbtree_right,
11141 	KF_bpf_dynptr_from_skb,
11142 	KF_bpf_dynptr_from_xdp,
11143 	KF_bpf_dynptr_from_skb_meta,
11144 	KF_bpf_xdp_pull_data,
11145 	KF_bpf_dynptr_slice,
11146 	KF_bpf_dynptr_slice_rdwr,
11147 	KF_bpf_dynptr_clone,
11148 	KF_bpf_percpu_obj_new_impl,
11149 	KF_bpf_percpu_obj_new,
11150 	KF_bpf_percpu_obj_drop_impl,
11151 	KF_bpf_percpu_obj_drop,
11152 	KF_bpf_throw,
11153 	KF_bpf_wq_set_callback,
11154 	KF_bpf_preempt_disable,
11155 	KF_bpf_preempt_enable,
11156 	KF_bpf_iter_css_task_new,
11157 	KF_bpf_session_cookie,
11158 	KF_bpf_get_kmem_cache,
11159 	KF_bpf_local_irq_save,
11160 	KF_bpf_local_irq_restore,
11161 	KF_bpf_iter_num_new,
11162 	KF_bpf_iter_num_next,
11163 	KF_bpf_iter_num_destroy,
11164 	KF_bpf_set_dentry_xattr,
11165 	KF_bpf_remove_dentry_xattr,
11166 	KF_bpf_res_spin_lock,
11167 	KF_bpf_res_spin_unlock,
11168 	KF_bpf_res_spin_lock_irqsave,
11169 	KF_bpf_res_spin_unlock_irqrestore,
11170 	KF_bpf_dynptr_from_file,
11171 	KF_bpf_dynptr_file_discard,
11172 	KF___bpf_trap,
11173 	KF_bpf_task_work_schedule_signal,
11174 	KF_bpf_task_work_schedule_resume,
11175 	KF_bpf_arena_alloc_pages,
11176 	KF_bpf_arena_free_pages,
11177 	KF_bpf_arena_reserve_pages,
11178 	KF_bpf_session_is_return,
11179 	KF_bpf_stream_vprintk,
11180 	KF_bpf_stream_print_stack,
11181 };
11182 
11183 BTF_ID_LIST(special_kfunc_list)
11184 BTF_ID(func, bpf_obj_new_impl)
11185 BTF_ID(func, bpf_obj_new)
11186 BTF_ID(func, bpf_obj_drop_impl)
11187 BTF_ID(func, bpf_obj_drop)
11188 BTF_ID(func, bpf_refcount_acquire_impl)
11189 BTF_ID(func, bpf_refcount_acquire)
11190 BTF_ID(func, bpf_list_push_front_impl)
11191 BTF_ID(func, bpf_list_push_front)
11192 BTF_ID(func, bpf_list_push_back_impl)
11193 BTF_ID(func, bpf_list_push_back)
11194 BTF_ID(func, bpf_list_pop_front)
11195 BTF_ID(func, bpf_list_pop_back)
11196 BTF_ID(func, bpf_list_front)
11197 BTF_ID(func, bpf_list_back)
11198 BTF_ID(func, bpf_cast_to_kern_ctx)
11199 BTF_ID(func, bpf_rdonly_cast)
11200 BTF_ID(func, bpf_rcu_read_lock)
11201 BTF_ID(func, bpf_rcu_read_unlock)
11202 BTF_ID(func, bpf_rbtree_remove)
11203 BTF_ID(func, bpf_rbtree_add_impl)
11204 BTF_ID(func, bpf_rbtree_add)
11205 BTF_ID(func, bpf_rbtree_first)
11206 BTF_ID(func, bpf_rbtree_root)
11207 BTF_ID(func, bpf_rbtree_left)
11208 BTF_ID(func, bpf_rbtree_right)
11209 #ifdef CONFIG_NET
11210 BTF_ID(func, bpf_dynptr_from_skb)
11211 BTF_ID(func, bpf_dynptr_from_xdp)
11212 BTF_ID(func, bpf_dynptr_from_skb_meta)
11213 BTF_ID(func, bpf_xdp_pull_data)
11214 #else
11215 BTF_ID_UNUSED
11216 BTF_ID_UNUSED
11217 BTF_ID_UNUSED
11218 BTF_ID_UNUSED
11219 #endif
11220 BTF_ID(func, bpf_dynptr_slice)
11221 BTF_ID(func, bpf_dynptr_slice_rdwr)
11222 BTF_ID(func, bpf_dynptr_clone)
11223 BTF_ID(func, bpf_percpu_obj_new_impl)
11224 BTF_ID(func, bpf_percpu_obj_new)
11225 BTF_ID(func, bpf_percpu_obj_drop_impl)
11226 BTF_ID(func, bpf_percpu_obj_drop)
11227 BTF_ID(func, bpf_throw)
11228 BTF_ID(func, bpf_wq_set_callback)
11229 BTF_ID(func, bpf_preempt_disable)
11230 BTF_ID(func, bpf_preempt_enable)
11231 #ifdef CONFIG_CGROUPS
11232 BTF_ID(func, bpf_iter_css_task_new)
11233 #else
11234 BTF_ID_UNUSED
11235 #endif
11236 #ifdef CONFIG_BPF_EVENTS
11237 BTF_ID(func, bpf_session_cookie)
11238 #else
11239 BTF_ID_UNUSED
11240 #endif
11241 BTF_ID(func, bpf_get_kmem_cache)
11242 BTF_ID(func, bpf_local_irq_save)
11243 BTF_ID(func, bpf_local_irq_restore)
11244 BTF_ID(func, bpf_iter_num_new)
11245 BTF_ID(func, bpf_iter_num_next)
11246 BTF_ID(func, bpf_iter_num_destroy)
11247 #ifdef CONFIG_BPF_LSM
11248 BTF_ID(func, bpf_set_dentry_xattr)
11249 BTF_ID(func, bpf_remove_dentry_xattr)
11250 #else
11251 BTF_ID_UNUSED
11252 BTF_ID_UNUSED
11253 #endif
11254 BTF_ID(func, bpf_res_spin_lock)
11255 BTF_ID(func, bpf_res_spin_unlock)
11256 BTF_ID(func, bpf_res_spin_lock_irqsave)
11257 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11258 BTF_ID(func, bpf_dynptr_from_file)
11259 BTF_ID(func, bpf_dynptr_file_discard)
11260 BTF_ID(func, __bpf_trap)
11261 BTF_ID(func, bpf_task_work_schedule_signal)
11262 BTF_ID(func, bpf_task_work_schedule_resume)
11263 BTF_ID(func, bpf_arena_alloc_pages)
11264 BTF_ID(func, bpf_arena_free_pages)
11265 BTF_ID(func, bpf_arena_reserve_pages)
11266 BTF_ID(func, bpf_session_is_return)
11267 BTF_ID(func, bpf_stream_vprintk)
11268 BTF_ID(func, bpf_stream_print_stack)
11269 
11270 static bool is_bpf_obj_new_kfunc(u32 func_id)
11271 {
11272 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11273 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11274 }
11275 
11276 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11277 {
11278 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11279 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11280 }
11281 
11282 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11283 {
11284 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11285 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11286 }
11287 
11288 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11289 {
11290 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11291 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11292 }
11293 
11294 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11295 {
11296 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11297 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11298 }
11299 
11300 static bool is_bpf_list_push_kfunc(u32 func_id)
11301 {
11302 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11303 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11304 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11305 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl];
11306 }
11307 
11308 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11309 {
11310 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11311 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11312 }
11313 
11314 static bool is_task_work_add_kfunc(u32 func_id)
11315 {
11316 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11317 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11318 }
11319 
11320 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11321 {
11322 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11323 		return false;
11324 
11325 	return meta->kfunc_flags & KF_RET_NULL;
11326 }
11327 
11328 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11329 {
11330 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11331 }
11332 
11333 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11334 {
11335 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11336 }
11337 
11338 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11339 {
11340 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11341 }
11342 
11343 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11344 {
11345 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11346 }
11347 
11348 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11349 {
11350 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11351 }
11352 
11353 static enum kfunc_ptr_arg_type
11354 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
11355 		       struct bpf_kfunc_call_arg_meta *meta,
11356 		       const struct btf_type *t, const struct btf_type *ref_t,
11357 		       const char *ref_tname, const struct btf_param *args,
11358 		       int argno, int nargs)
11359 {
11360 	u32 regno = argno + 1;
11361 	struct bpf_reg_state *regs = cur_regs(env);
11362 	struct bpf_reg_state *reg = &regs[regno];
11363 	bool arg_mem_size = false;
11364 
11365 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11366 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11367 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11368 		return KF_ARG_PTR_TO_CTX;
11369 
11370 	if (argno + 1 < nargs &&
11371 	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
11372 	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
11373 		arg_mem_size = true;
11374 
11375 	/* In this function, we verify the kfunc's BTF as per the argument type,
11376 	 * leaving the rest of the verification with respect to the register
11377 	 * type to our caller. When a set of conditions hold in the BTF type of
11378 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11379 	 */
11380 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
11381 		return KF_ARG_PTR_TO_CTX;
11382 
11383 	if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && bpf_register_is_null(reg) &&
11384 	    !arg_mem_size)
11385 		return KF_ARG_PTR_TO_NULL;
11386 
11387 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
11388 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11389 
11390 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
11391 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11392 
11393 	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
11394 		return KF_ARG_PTR_TO_DYNPTR;
11395 
11396 	if (is_kfunc_arg_iter(meta, argno, &args[argno]))
11397 		return KF_ARG_PTR_TO_ITER;
11398 
11399 	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
11400 		return KF_ARG_PTR_TO_LIST_HEAD;
11401 
11402 	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
11403 		return KF_ARG_PTR_TO_LIST_NODE;
11404 
11405 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
11406 		return KF_ARG_PTR_TO_RB_ROOT;
11407 
11408 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
11409 		return KF_ARG_PTR_TO_RB_NODE;
11410 
11411 	if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
11412 		return KF_ARG_PTR_TO_CONST_STR;
11413 
11414 	if (is_kfunc_arg_map(meta->btf, &args[argno]))
11415 		return KF_ARG_PTR_TO_MAP;
11416 
11417 	if (is_kfunc_arg_wq(meta->btf, &args[argno]))
11418 		return KF_ARG_PTR_TO_WORKQUEUE;
11419 
11420 	if (is_kfunc_arg_timer(meta->btf, &args[argno]))
11421 		return KF_ARG_PTR_TO_TIMER;
11422 
11423 	if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
11424 		return KF_ARG_PTR_TO_TASK_WORK;
11425 
11426 	if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
11427 		return KF_ARG_PTR_TO_IRQ_FLAG;
11428 
11429 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno]))
11430 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11431 
11432 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11433 		if (!btf_type_is_struct(ref_t)) {
11434 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
11435 				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
11436 			return -EINVAL;
11437 		}
11438 		return KF_ARG_PTR_TO_BTF_ID;
11439 	}
11440 
11441 	if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
11442 		return KF_ARG_PTR_TO_CALLBACK;
11443 
11444 	/* This is the catch all argument type of register types supported by
11445 	 * check_helper_mem_access. However, we only allow when argument type is
11446 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11447 	 * arg_mem_size is true, the pointer can be void *.
11448 	 */
11449 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11450 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11451 		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11452 			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11453 		return -EINVAL;
11454 	}
11455 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11456 }
11457 
11458 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11459 					struct bpf_reg_state *reg,
11460 					const struct btf_type *ref_t,
11461 					const char *ref_tname, u32 ref_id,
11462 					struct bpf_kfunc_call_arg_meta *meta,
11463 					int argno)
11464 {
11465 	const struct btf_type *reg_ref_t;
11466 	bool strict_type_match = false;
11467 	const struct btf *reg_btf;
11468 	const char *reg_ref_tname;
11469 	bool taking_projection;
11470 	bool struct_same;
11471 	u32 reg_ref_id;
11472 
11473 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11474 		reg_btf = reg->btf;
11475 		reg_ref_id = reg->btf_id;
11476 	} else {
11477 		reg_btf = btf_vmlinux;
11478 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11479 	}
11480 
11481 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11482 	 * or releasing a reference, or are no-cast aliases. We do _not_
11483 	 * enforce strict matching for kfuncs by default,
11484 	 * as we want to enable BPF programs to pass types that are bitwise
11485 	 * equivalent without forcing them to explicitly cast with something
11486 	 * like bpf_cast_to_kern_ctx().
11487 	 *
11488 	 * For example, say we had a type like the following:
11489 	 *
11490 	 * struct bpf_cpumask {
11491 	 *	cpumask_t cpumask;
11492 	 *	refcount_t usage;
11493 	 * };
11494 	 *
11495 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11496 	 * to a struct cpumask, so it would be safe to pass a struct
11497 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11498 	 *
11499 	 * The philosophy here is similar to how we allow scalars of different
11500 	 * types to be passed to kfuncs as long as the size is the same. The
11501 	 * only difference here is that we're simply allowing
11502 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11503 	 * resolve types.
11504 	 */
11505 	if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
11506 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11507 		strict_type_match = true;
11508 
11509 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11510 
11511 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11512 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11513 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11514 					   meta->btf, ref_id, strict_type_match);
11515 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11516 	 * actually use it -- it must cast to the underlying type. So we allow
11517 	 * caller to pass in the underlying type.
11518 	 */
11519 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11520 	if (!taking_projection && !struct_same) {
11521 		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11522 			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
11523 			btf_type_str(reg_ref_t), reg_ref_tname);
11524 		return -EINVAL;
11525 	}
11526 	return 0;
11527 }
11528 
11529 static int process_irq_flag(struct bpf_verifier_env *env, int regno,
11530 			     struct bpf_kfunc_call_arg_meta *meta)
11531 {
11532 	struct bpf_reg_state *reg = reg_state(env, regno);
11533 	int err, kfunc_class = IRQ_NATIVE_KFUNC;
11534 	bool irq_save;
11535 
11536 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11537 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11538 		irq_save = true;
11539 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11540 			kfunc_class = IRQ_LOCK_KFUNC;
11541 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11542 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11543 		irq_save = false;
11544 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11545 			kfunc_class = IRQ_LOCK_KFUNC;
11546 	} else {
11547 		verifier_bug(env, "unknown irq flags kfunc");
11548 		return -EFAULT;
11549 	}
11550 
11551 	if (irq_save) {
11552 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11553 			verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1);
11554 			return -EINVAL;
11555 		}
11556 
11557 		err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false);
11558 		if (err)
11559 			return err;
11560 
11561 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11562 		if (err)
11563 			return err;
11564 	} else {
11565 		err = is_irq_flag_reg_valid_init(env, reg);
11566 		if (err) {
11567 			verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1);
11568 			return err;
11569 		}
11570 
11571 		err = mark_irq_flag_read(env, reg);
11572 		if (err)
11573 			return err;
11574 
11575 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11576 		if (err)
11577 			return err;
11578 	}
11579 	return 0;
11580 }
11581 
11582 
11583 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11584 {
11585 	struct btf_record *rec = reg_btf_record(reg);
11586 
11587 	if (!env->cur_state->active_locks) {
11588 		verifier_bug(env, "%s w/o active lock", __func__);
11589 		return -EFAULT;
11590 	}
11591 
11592 	if (type_flag(reg->type) & NON_OWN_REF) {
11593 		verifier_bug(env, "NON_OWN_REF already set");
11594 		return -EFAULT;
11595 	}
11596 
11597 	reg->type |= NON_OWN_REF;
11598 	if (rec->refcount_off >= 0)
11599 		reg->type |= MEM_RCU;
11600 
11601 	return 0;
11602 }
11603 
11604 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
11605 {
11606 	struct bpf_verifier_state *state = env->cur_state;
11607 	struct bpf_func_state *unused;
11608 	struct bpf_reg_state *reg;
11609 	int i;
11610 
11611 	if (!ref_obj_id) {
11612 		verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion");
11613 		return -EFAULT;
11614 	}
11615 
11616 	for (i = 0; i < state->acquired_refs; i++) {
11617 		if (state->refs[i].id != ref_obj_id)
11618 			continue;
11619 
11620 		/* Clear ref_obj_id here so release_reference doesn't clobber
11621 		 * the whole reg
11622 		 */
11623 		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11624 			if (reg->ref_obj_id == ref_obj_id) {
11625 				reg->ref_obj_id = 0;
11626 				ref_set_non_owning(env, reg);
11627 			}
11628 		}));
11629 		return 0;
11630 	}
11631 
11632 	verifier_bug(env, "ref state missing for ref_obj_id");
11633 	return -EFAULT;
11634 }
11635 
11636 /* Implementation details:
11637  *
11638  * Each register points to some region of memory, which we define as an
11639  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11640  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11641  * allocation. The lock and the data it protects are colocated in the same
11642  * memory region.
11643  *
11644  * Hence, everytime a register holds a pointer value pointing to such
11645  * allocation, the verifier preserves a unique reg->id for it.
11646  *
11647  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11648  * bpf_spin_lock is called.
11649  *
11650  * To enable this, lock state in the verifier captures two values:
11651  *	active_lock.ptr = Register's type specific pointer
11652  *	active_lock.id  = A unique ID for each register pointer value
11653  *
11654  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11655  * supported register types.
11656  *
11657  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11658  * allocated objects is the reg->btf pointer.
11659  *
11660  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11661  * can establish the provenance of the map value statically for each distinct
11662  * lookup into such maps. They always contain a single map value hence unique
11663  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11664  *
11665  * So, in case of global variables, they use array maps with max_entries = 1,
11666  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11667  * into the same map value as max_entries is 1, as described above).
11668  *
11669  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11670  * outer map pointer (in verifier context), but each lookup into an inner map
11671  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11672  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11673  * will get different reg->id assigned to each lookup, hence different
11674  * active_lock.id.
11675  *
11676  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11677  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11678  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11679  */
11680 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11681 {
11682 	struct bpf_reference_state *s;
11683 	void *ptr;
11684 	u32 id;
11685 
11686 	switch ((int)reg->type) {
11687 	case PTR_TO_MAP_VALUE:
11688 		ptr = reg->map_ptr;
11689 		break;
11690 	case PTR_TO_BTF_ID | MEM_ALLOC:
11691 		ptr = reg->btf;
11692 		break;
11693 	default:
11694 		verifier_bug(env, "unknown reg type for lock check");
11695 		return -EFAULT;
11696 	}
11697 	id = reg->id;
11698 
11699 	if (!env->cur_state->active_locks)
11700 		return -EINVAL;
11701 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11702 	if (!s) {
11703 		verbose(env, "held lock and object are not in the same allocation\n");
11704 		return -EINVAL;
11705 	}
11706 	return 0;
11707 }
11708 
11709 static bool is_bpf_list_api_kfunc(u32 btf_id)
11710 {
11711 	return is_bpf_list_push_kfunc(btf_id) ||
11712 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11713 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11714 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11715 	       btf_id == special_kfunc_list[KF_bpf_list_back];
11716 }
11717 
11718 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11719 {
11720 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11721 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11722 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11723 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11724 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11725 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11726 }
11727 
11728 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11729 {
11730 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11731 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11732 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11733 }
11734 
11735 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11736 {
11737 	return is_bpf_list_api_kfunc(btf_id) ||
11738 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11739 	       is_bpf_refcount_acquire_kfunc(btf_id);
11740 }
11741 
11742 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11743 {
11744 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11745 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11746 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11747 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11748 }
11749 
11750 static bool is_bpf_arena_kfunc(u32 btf_id)
11751 {
11752 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11753 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11754 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11755 }
11756 
11757 static bool is_bpf_stream_kfunc(u32 btf_id)
11758 {
11759 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11760 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11761 }
11762 
11763 static bool kfunc_spin_allowed(u32 btf_id)
11764 {
11765 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11766 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11767 	       is_bpf_stream_kfunc(btf_id);
11768 }
11769 
11770 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11771 {
11772 	return is_bpf_rbtree_add_kfunc(btf_id);
11773 }
11774 
11775 static bool is_async_callback_calling_kfunc(u32 btf_id)
11776 {
11777 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11778 	       is_task_work_add_kfunc(btf_id);
11779 }
11780 
11781 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
11782 {
11783 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11784 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11785 }
11786 
11787 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11788 {
11789 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11790 }
11791 
11792 static bool is_callback_calling_kfunc(u32 btf_id)
11793 {
11794 	return is_sync_callback_calling_kfunc(btf_id) ||
11795 	       is_async_callback_calling_kfunc(btf_id);
11796 }
11797 
11798 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11799 {
11800 	return is_bpf_rbtree_api_kfunc(btf_id);
11801 }
11802 
11803 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11804 					  enum btf_field_type head_field_type,
11805 					  u32 kfunc_btf_id)
11806 {
11807 	bool ret;
11808 
11809 	switch (head_field_type) {
11810 	case BPF_LIST_HEAD:
11811 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11812 		break;
11813 	case BPF_RB_ROOT:
11814 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11815 		break;
11816 	default:
11817 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11818 			btf_field_type_name(head_field_type));
11819 		return false;
11820 	}
11821 
11822 	if (!ret)
11823 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11824 			btf_field_type_name(head_field_type));
11825 	return ret;
11826 }
11827 
11828 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11829 					  enum btf_field_type node_field_type,
11830 					  u32 kfunc_btf_id)
11831 {
11832 	bool ret;
11833 
11834 	switch (node_field_type) {
11835 	case BPF_LIST_NODE:
11836 		ret = is_bpf_list_push_kfunc(kfunc_btf_id);
11837 		break;
11838 	case BPF_RB_NODE:
11839 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11840 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11841 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11842 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11843 		break;
11844 	default:
11845 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11846 			btf_field_type_name(node_field_type));
11847 		return false;
11848 	}
11849 
11850 	if (!ret)
11851 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11852 			btf_field_type_name(node_field_type));
11853 	return ret;
11854 }
11855 
11856 static int
11857 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11858 				   struct bpf_reg_state *reg, u32 regno,
11859 				   struct bpf_kfunc_call_arg_meta *meta,
11860 				   enum btf_field_type head_field_type,
11861 				   struct btf_field **head_field)
11862 {
11863 	const char *head_type_name;
11864 	struct btf_field *field;
11865 	struct btf_record *rec;
11866 	u32 head_off;
11867 
11868 	if (meta->btf != btf_vmlinux) {
11869 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11870 		return -EFAULT;
11871 	}
11872 
11873 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11874 		return -EFAULT;
11875 
11876 	head_type_name = btf_field_type_name(head_field_type);
11877 	if (!tnum_is_const(reg->var_off)) {
11878 		verbose(env,
11879 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
11880 			regno, head_type_name);
11881 		return -EINVAL;
11882 	}
11883 
11884 	rec = reg_btf_record(reg);
11885 	head_off = reg->var_off.value;
11886 	field = btf_record_find(rec, head_off, head_field_type);
11887 	if (!field) {
11888 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11889 		return -EINVAL;
11890 	}
11891 
11892 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11893 	if (check_reg_allocation_locked(env, reg)) {
11894 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11895 			rec->spin_lock_off, head_type_name);
11896 		return -EINVAL;
11897 	}
11898 
11899 	if (*head_field) {
11900 		verifier_bug(env, "repeating %s arg", head_type_name);
11901 		return -EFAULT;
11902 	}
11903 	*head_field = field;
11904 	return 0;
11905 }
11906 
11907 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11908 					   struct bpf_reg_state *reg, u32 regno,
11909 					   struct bpf_kfunc_call_arg_meta *meta)
11910 {
11911 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
11912 							  &meta->arg_list_head.field);
11913 }
11914 
11915 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11916 					     struct bpf_reg_state *reg, u32 regno,
11917 					     struct bpf_kfunc_call_arg_meta *meta)
11918 {
11919 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
11920 							  &meta->arg_rbtree_root.field);
11921 }
11922 
11923 static int
11924 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11925 				   struct bpf_reg_state *reg, u32 regno,
11926 				   struct bpf_kfunc_call_arg_meta *meta,
11927 				   enum btf_field_type head_field_type,
11928 				   enum btf_field_type node_field_type,
11929 				   struct btf_field **node_field)
11930 {
11931 	const char *node_type_name;
11932 	const struct btf_type *et, *t;
11933 	struct btf_field *field;
11934 	u32 node_off;
11935 
11936 	if (meta->btf != btf_vmlinux) {
11937 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11938 		return -EFAULT;
11939 	}
11940 
11941 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11942 		return -EFAULT;
11943 
11944 	node_type_name = btf_field_type_name(node_field_type);
11945 	if (!tnum_is_const(reg->var_off)) {
11946 		verbose(env,
11947 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
11948 			regno, node_type_name);
11949 		return -EINVAL;
11950 	}
11951 
11952 	node_off = reg->var_off.value;
11953 	field = reg_find_field_offset(reg, node_off, node_field_type);
11954 	if (!field) {
11955 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11956 		return -EINVAL;
11957 	}
11958 
11959 	field = *node_field;
11960 
11961 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11962 	t = btf_type_by_id(reg->btf, reg->btf_id);
11963 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11964 				  field->graph_root.value_btf_id, true)) {
11965 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11966 			"in struct %s, but arg is at offset=%d in struct %s\n",
11967 			btf_field_type_name(head_field_type),
11968 			btf_field_type_name(node_field_type),
11969 			field->graph_root.node_offset,
11970 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11971 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11972 		return -EINVAL;
11973 	}
11974 	meta->arg_btf = reg->btf;
11975 	meta->arg_btf_id = reg->btf_id;
11976 
11977 	if (node_off != field->graph_root.node_offset) {
11978 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11979 			node_off, btf_field_type_name(node_field_type),
11980 			field->graph_root.node_offset,
11981 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11982 		return -EINVAL;
11983 	}
11984 
11985 	return 0;
11986 }
11987 
11988 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11989 					   struct bpf_reg_state *reg, u32 regno,
11990 					   struct bpf_kfunc_call_arg_meta *meta)
11991 {
11992 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11993 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11994 						  &meta->arg_list_head.field);
11995 }
11996 
11997 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11998 					     struct bpf_reg_state *reg, u32 regno,
11999 					     struct bpf_kfunc_call_arg_meta *meta)
12000 {
12001 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
12002 						  BPF_RB_ROOT, BPF_RB_NODE,
12003 						  &meta->arg_rbtree_root.field);
12004 }
12005 
12006 /*
12007  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
12008  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
12009  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
12010  * them can only be attached to some specific hook points.
12011  */
12012 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
12013 {
12014 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12015 
12016 	switch (prog_type) {
12017 	case BPF_PROG_TYPE_LSM:
12018 		return true;
12019 	case BPF_PROG_TYPE_TRACING:
12020 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
12021 			return true;
12022 		fallthrough;
12023 	default:
12024 		return in_sleepable(env);
12025 	}
12026 }
12027 
12028 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12029 			    int insn_idx)
12030 {
12031 	const char *func_name = meta->func_name, *ref_tname;
12032 	const struct btf *btf = meta->btf;
12033 	const struct btf_param *args;
12034 	struct btf_record *rec;
12035 	u32 i, nargs;
12036 	int ret;
12037 
12038 	args = (const struct btf_param *)(meta->func_proto + 1);
12039 	nargs = btf_type_vlen(meta->func_proto);
12040 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
12041 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
12042 			MAX_BPF_FUNC_REG_ARGS);
12043 		return -EINVAL;
12044 	}
12045 
12046 	/* Check that BTF function arguments match actual types that the
12047 	 * verifier sees.
12048 	 */
12049 	for (i = 0; i < nargs; i++) {
12050 		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
12051 		const struct btf_type *t, *ref_t, *resolve_ret;
12052 		enum bpf_arg_type arg_type = ARG_DONTCARE;
12053 		u32 regno = i + 1, ref_id, type_size;
12054 		bool is_ret_buf_sz = false;
12055 		int kf_arg_type;
12056 
12057 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
12058 			/* Reject repeated use bpf_prog_aux */
12059 			if (meta->arg_prog) {
12060 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
12061 				return -EFAULT;
12062 			}
12063 			meta->arg_prog = true;
12064 			cur_aux(env)->arg_prog = regno;
12065 			continue;
12066 		}
12067 
12068 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
12069 			continue;
12070 
12071 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
12072 
12073 		if (btf_type_is_scalar(t)) {
12074 			if (reg->type != SCALAR_VALUE) {
12075 				verbose(env, "R%d is not a scalar\n", regno);
12076 				return -EINVAL;
12077 			}
12078 
12079 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
12080 				if (meta->arg_constant.found) {
12081 					verifier_bug(env, "only one constant argument permitted");
12082 					return -EFAULT;
12083 				}
12084 				if (!tnum_is_const(reg->var_off)) {
12085 					verbose(env, "R%d must be a known constant\n", regno);
12086 					return -EINVAL;
12087 				}
12088 				ret = mark_chain_precision(env, regno);
12089 				if (ret < 0)
12090 					return ret;
12091 				meta->arg_constant.found = true;
12092 				meta->arg_constant.value = reg->var_off.value;
12093 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
12094 				meta->r0_rdonly = true;
12095 				is_ret_buf_sz = true;
12096 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
12097 				is_ret_buf_sz = true;
12098 			}
12099 
12100 			if (is_ret_buf_sz) {
12101 				if (meta->r0_size) {
12102 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12103 					return -EINVAL;
12104 				}
12105 
12106 				if (!tnum_is_const(reg->var_off)) {
12107 					verbose(env, "R%d is not a const\n", regno);
12108 					return -EINVAL;
12109 				}
12110 
12111 				meta->r0_size = reg->var_off.value;
12112 				ret = mark_chain_precision(env, regno);
12113 				if (ret)
12114 					return ret;
12115 			}
12116 			continue;
12117 		}
12118 
12119 		if (!btf_type_is_ptr(t)) {
12120 			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
12121 			return -EINVAL;
12122 		}
12123 
12124 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12125 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12126 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
12127 			return -EACCES;
12128 		}
12129 
12130 		if (reg->ref_obj_id) {
12131 			if (is_kfunc_release(meta) && meta->ref_obj_id) {
12132 				verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u",
12133 					     regno, reg->ref_obj_id,
12134 					     meta->ref_obj_id);
12135 				return -EFAULT;
12136 			}
12137 			meta->ref_obj_id = reg->ref_obj_id;
12138 			if (is_kfunc_release(meta))
12139 				meta->release_regno = regno;
12140 		}
12141 
12142 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12143 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12144 
12145 		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
12146 		if (kf_arg_type < 0)
12147 			return kf_arg_type;
12148 
12149 		switch (kf_arg_type) {
12150 		case KF_ARG_PTR_TO_NULL:
12151 			continue;
12152 		case KF_ARG_PTR_TO_MAP:
12153 			if (!reg->map_ptr) {
12154 				verbose(env, "pointer in R%d isn't map pointer\n", regno);
12155 				return -EINVAL;
12156 			}
12157 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12158 					      reg->map_ptr->record->task_work_off >= 0)) {
12159 				/* Use map_uid (which is unique id of inner map) to reject:
12160 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12161 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12162 				 * if (inner_map1 && inner_map2) {
12163 				 *     wq = bpf_map_lookup_elem(inner_map1);
12164 				 *     if (wq)
12165 				 *         // mismatch would have been allowed
12166 				 *         bpf_wq_init(wq, inner_map2);
12167 				 * }
12168 				 *
12169 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12170 				 */
12171 				if (meta->map.ptr != reg->map_ptr ||
12172 				    meta->map.uid != reg->map_uid) {
12173 					if (reg->map_ptr->record->task_work_off >= 0) {
12174 						verbose(env,
12175 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12176 							meta->map.uid, reg->map_uid);
12177 						return -EINVAL;
12178 					}
12179 					verbose(env,
12180 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12181 						meta->map.uid, reg->map_uid);
12182 					return -EINVAL;
12183 				}
12184 			}
12185 			meta->map.ptr = reg->map_ptr;
12186 			meta->map.uid = reg->map_uid;
12187 			fallthrough;
12188 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12189 		case KF_ARG_PTR_TO_BTF_ID:
12190 			if (!is_trusted_reg(reg)) {
12191 				if (!is_kfunc_rcu(meta)) {
12192 					verbose(env, "R%d must be referenced or trusted\n", regno);
12193 					return -EINVAL;
12194 				}
12195 				if (!is_rcu_reg(reg)) {
12196 					verbose(env, "R%d must be a rcu pointer\n", regno);
12197 					return -EINVAL;
12198 				}
12199 			}
12200 			fallthrough;
12201 		case KF_ARG_PTR_TO_DYNPTR:
12202 		case KF_ARG_PTR_TO_ITER:
12203 		case KF_ARG_PTR_TO_LIST_HEAD:
12204 		case KF_ARG_PTR_TO_LIST_NODE:
12205 		case KF_ARG_PTR_TO_RB_ROOT:
12206 		case KF_ARG_PTR_TO_RB_NODE:
12207 		case KF_ARG_PTR_TO_MEM:
12208 		case KF_ARG_PTR_TO_MEM_SIZE:
12209 		case KF_ARG_PTR_TO_CALLBACK:
12210 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12211 		case KF_ARG_PTR_TO_CONST_STR:
12212 		case KF_ARG_PTR_TO_WORKQUEUE:
12213 		case KF_ARG_PTR_TO_TIMER:
12214 		case KF_ARG_PTR_TO_TASK_WORK:
12215 		case KF_ARG_PTR_TO_IRQ_FLAG:
12216 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12217 			break;
12218 		case KF_ARG_PTR_TO_CTX:
12219 			arg_type = ARG_PTR_TO_CTX;
12220 			break;
12221 		default:
12222 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12223 			return -EFAULT;
12224 		}
12225 
12226 		if (is_kfunc_release(meta) && reg->ref_obj_id)
12227 			arg_type |= OBJ_RELEASE;
12228 		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
12229 		if (ret < 0)
12230 			return ret;
12231 
12232 		switch (kf_arg_type) {
12233 		case KF_ARG_PTR_TO_CTX:
12234 			if (reg->type != PTR_TO_CTX) {
12235 				verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
12236 					i, reg_type_str(env, reg->type));
12237 				return -EINVAL;
12238 			}
12239 
12240 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12241 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12242 				if (ret < 0)
12243 					return -EINVAL;
12244 				meta->ret_btf_id  = ret;
12245 			}
12246 			break;
12247 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12248 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12249 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12250 					verbose(env, "arg#%d expected for bpf_obj_drop()\n", i);
12251 					return -EINVAL;
12252 				}
12253 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12254 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12255 					verbose(env, "arg#%d expected for bpf_percpu_obj_drop()\n", i);
12256 					return -EINVAL;
12257 				}
12258 			} else {
12259 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12260 				return -EINVAL;
12261 			}
12262 			if (!reg->ref_obj_id) {
12263 				verbose(env, "allocated object must be referenced\n");
12264 				return -EINVAL;
12265 			}
12266 			if (meta->btf == btf_vmlinux) {
12267 				meta->arg_btf = reg->btf;
12268 				meta->arg_btf_id = reg->btf_id;
12269 			}
12270 			break;
12271 		case KF_ARG_PTR_TO_DYNPTR:
12272 		{
12273 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12274 			int clone_ref_obj_id = 0;
12275 
12276 			if (reg->type == CONST_PTR_TO_DYNPTR)
12277 				dynptr_arg_type |= MEM_RDONLY;
12278 
12279 			if (is_kfunc_arg_uninit(btf, &args[i]))
12280 				dynptr_arg_type |= MEM_UNINIT;
12281 
12282 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12283 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12284 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12285 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12286 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12287 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12288 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12289 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12290 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12291 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12292 				meta->release_regno = regno;
12293 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12294 				   (dynptr_arg_type & MEM_UNINIT)) {
12295 				enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
12296 
12297 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12298 					verifier_bug(env, "no dynptr type for parent of clone");
12299 					return -EFAULT;
12300 				}
12301 
12302 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12303 				clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
12304 				if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
12305 					verifier_bug(env, "missing ref obj id for parent of clone");
12306 					return -EFAULT;
12307 				}
12308 			}
12309 
12310 			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
12311 			if (ret < 0)
12312 				return ret;
12313 
12314 			if (!(dynptr_arg_type & MEM_UNINIT)) {
12315 				int id = dynptr_id(env, reg);
12316 
12317 				if (id < 0) {
12318 					verifier_bug(env, "failed to obtain dynptr id");
12319 					return id;
12320 				}
12321 				meta->initialized_dynptr.id = id;
12322 				meta->initialized_dynptr.type = dynptr_get_type(env, reg);
12323 				meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
12324 			}
12325 
12326 			break;
12327 		}
12328 		case KF_ARG_PTR_TO_ITER:
12329 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12330 				if (!check_css_task_iter_allowlist(env)) {
12331 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12332 					return -EINVAL;
12333 				}
12334 			}
12335 			ret = process_iter_arg(env, regno, insn_idx, meta);
12336 			if (ret < 0)
12337 				return ret;
12338 			break;
12339 		case KF_ARG_PTR_TO_LIST_HEAD:
12340 			if (reg->type != PTR_TO_MAP_VALUE &&
12341 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12342 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12343 				return -EINVAL;
12344 			}
12345 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12346 				verbose(env, "allocated object must be referenced\n");
12347 				return -EINVAL;
12348 			}
12349 			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
12350 			if (ret < 0)
12351 				return ret;
12352 			break;
12353 		case KF_ARG_PTR_TO_RB_ROOT:
12354 			if (reg->type != PTR_TO_MAP_VALUE &&
12355 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12356 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12357 				return -EINVAL;
12358 			}
12359 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12360 				verbose(env, "allocated object must be referenced\n");
12361 				return -EINVAL;
12362 			}
12363 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
12364 			if (ret < 0)
12365 				return ret;
12366 			break;
12367 		case KF_ARG_PTR_TO_LIST_NODE:
12368 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12369 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12370 				return -EINVAL;
12371 			}
12372 			if (!reg->ref_obj_id) {
12373 				verbose(env, "allocated object must be referenced\n");
12374 				return -EINVAL;
12375 			}
12376 			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
12377 			if (ret < 0)
12378 				return ret;
12379 			break;
12380 		case KF_ARG_PTR_TO_RB_NODE:
12381 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12382 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12383 					verbose(env, "arg#%d expected pointer to allocated object\n", i);
12384 					return -EINVAL;
12385 				}
12386 				if (!reg->ref_obj_id) {
12387 					verbose(env, "allocated object must be referenced\n");
12388 					return -EINVAL;
12389 				}
12390 			} else {
12391 				if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
12392 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12393 					return -EINVAL;
12394 				}
12395 				if (in_rbtree_lock_required_cb(env)) {
12396 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12397 					return -EINVAL;
12398 				}
12399 			}
12400 
12401 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
12402 			if (ret < 0)
12403 				return ret;
12404 			break;
12405 		case KF_ARG_PTR_TO_MAP:
12406 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12407 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12408 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12409 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12410 			fallthrough;
12411 		case KF_ARG_PTR_TO_BTF_ID:
12412 			/* Only base_type is checked, further checks are done here */
12413 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12414 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12415 			    !reg2btf_ids[base_type(reg->type)]) {
12416 				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
12417 				verbose(env, "expected %s or socket\n",
12418 					reg_type_str(env, base_type(reg->type) |
12419 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12420 				return -EINVAL;
12421 			}
12422 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
12423 			if (ret < 0)
12424 				return ret;
12425 			break;
12426 		case KF_ARG_PTR_TO_MEM:
12427 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12428 			if (IS_ERR(resolve_ret)) {
12429 				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
12430 					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
12431 				return -EINVAL;
12432 			}
12433 			ret = check_mem_reg(env, reg, regno, type_size);
12434 			if (ret < 0)
12435 				return ret;
12436 			break;
12437 		case KF_ARG_PTR_TO_MEM_SIZE:
12438 		{
12439 			struct bpf_reg_state *buff_reg = &regs[regno];
12440 			const struct btf_param *buff_arg = &args[i];
12441 			struct bpf_reg_state *size_reg = &regs[regno + 1];
12442 			const struct btf_param *size_arg = &args[i + 1];
12443 
12444 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12445 				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
12446 				if (ret < 0) {
12447 					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
12448 					return ret;
12449 				}
12450 			}
12451 
12452 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12453 				if (meta->arg_constant.found) {
12454 					verifier_bug(env, "only one constant argument permitted");
12455 					return -EFAULT;
12456 				}
12457 				if (!tnum_is_const(size_reg->var_off)) {
12458 					verbose(env, "R%d must be a known constant\n", regno + 1);
12459 					return -EINVAL;
12460 				}
12461 				meta->arg_constant.found = true;
12462 				meta->arg_constant.value = size_reg->var_off.value;
12463 			}
12464 
12465 			/* Skip next '__sz' or '__szk' argument */
12466 			i++;
12467 			break;
12468 		}
12469 		case KF_ARG_PTR_TO_CALLBACK:
12470 			if (reg->type != PTR_TO_FUNC) {
12471 				verbose(env, "arg%d expected pointer to func\n", i);
12472 				return -EINVAL;
12473 			}
12474 			meta->subprogno = reg->subprogno;
12475 			break;
12476 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12477 			if (!type_is_ptr_alloc_obj(reg->type)) {
12478 				verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
12479 				return -EINVAL;
12480 			}
12481 			if (!type_is_non_owning_ref(reg->type))
12482 				meta->arg_owning_ref = true;
12483 
12484 			rec = reg_btf_record(reg);
12485 			if (!rec) {
12486 				verifier_bug(env, "Couldn't find btf_record");
12487 				return -EFAULT;
12488 			}
12489 
12490 			if (rec->refcount_off < 0) {
12491 				verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
12492 				return -EINVAL;
12493 			}
12494 
12495 			meta->arg_btf = reg->btf;
12496 			meta->arg_btf_id = reg->btf_id;
12497 			break;
12498 		case KF_ARG_PTR_TO_CONST_STR:
12499 			if (reg->type != PTR_TO_MAP_VALUE) {
12500 				verbose(env, "arg#%d doesn't point to a const string\n", i);
12501 				return -EINVAL;
12502 			}
12503 			ret = check_reg_const_str(env, reg, regno);
12504 			if (ret)
12505 				return ret;
12506 			break;
12507 		case KF_ARG_PTR_TO_WORKQUEUE:
12508 			if (reg->type != PTR_TO_MAP_VALUE) {
12509 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12510 				return -EINVAL;
12511 			}
12512 			ret = check_map_field_pointer(env, regno, BPF_WORKQUEUE, &meta->map);
12513 			if (ret < 0)
12514 				return ret;
12515 			break;
12516 		case KF_ARG_PTR_TO_TIMER:
12517 			if (reg->type != PTR_TO_MAP_VALUE) {
12518 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12519 				return -EINVAL;
12520 			}
12521 			ret = process_timer_kfunc(env, regno, meta);
12522 			if (ret < 0)
12523 				return ret;
12524 			break;
12525 		case KF_ARG_PTR_TO_TASK_WORK:
12526 			if (reg->type != PTR_TO_MAP_VALUE) {
12527 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12528 				return -EINVAL;
12529 			}
12530 			ret = check_map_field_pointer(env, regno, BPF_TASK_WORK, &meta->map);
12531 			if (ret < 0)
12532 				return ret;
12533 			break;
12534 		case KF_ARG_PTR_TO_IRQ_FLAG:
12535 			if (reg->type != PTR_TO_STACK) {
12536 				verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
12537 				return -EINVAL;
12538 			}
12539 			ret = process_irq_flag(env, regno, meta);
12540 			if (ret < 0)
12541 				return ret;
12542 			break;
12543 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12544 		{
12545 			int flags = PROCESS_RES_LOCK;
12546 
12547 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12548 				verbose(env, "arg#%d doesn't point to map value or allocated object\n", i);
12549 				return -EINVAL;
12550 			}
12551 
12552 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12553 				return -EFAULT;
12554 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12555 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12556 				flags |= PROCESS_SPIN_LOCK;
12557 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12558 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12559 				flags |= PROCESS_LOCK_IRQ;
12560 			ret = process_spin_lock(env, regno, flags);
12561 			if (ret < 0)
12562 				return ret;
12563 			break;
12564 		}
12565 		}
12566 	}
12567 
12568 	if (is_kfunc_release(meta) && !meta->release_regno) {
12569 		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
12570 			func_name);
12571 		return -EINVAL;
12572 	}
12573 
12574 	return 0;
12575 }
12576 
12577 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12578 			     s32 func_id,
12579 			     s16 offset,
12580 			     struct bpf_kfunc_call_arg_meta *meta)
12581 {
12582 	struct bpf_kfunc_meta kfunc;
12583 	int err;
12584 
12585 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12586 	if (err)
12587 		return err;
12588 
12589 	memset(meta, 0, sizeof(*meta));
12590 	meta->btf = kfunc.btf;
12591 	meta->func_id = kfunc.id;
12592 	meta->func_proto = kfunc.proto;
12593 	meta->func_name = kfunc.name;
12594 
12595 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12596 		return -EACCES;
12597 
12598 	meta->kfunc_flags = *kfunc.flags;
12599 
12600 	return 0;
12601 }
12602 
12603 /*
12604  * Determine how many bytes a helper accesses through a stack pointer at
12605  * argument position @arg (0-based, corresponding to R1-R5).
12606  *
12607  * Returns:
12608  *   > 0   known read access size in bytes
12609  *     0   doesn't read anything directly
12610  * S64_MIN unknown
12611  *   < 0   known write access of (-return) bytes
12612  */
12613 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12614 				  int arg, int insn_idx)
12615 {
12616 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12617 	const struct bpf_func_proto *fn;
12618 	enum bpf_arg_type at;
12619 	s64 size;
12620 
12621 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12622 		return S64_MIN;
12623 
12624 	at = fn->arg_type[arg];
12625 
12626 	switch (base_type(at)) {
12627 	case ARG_PTR_TO_MAP_KEY:
12628 	case ARG_PTR_TO_MAP_VALUE: {
12629 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12630 		u64 val;
12631 		int i, map_reg;
12632 
12633 		for (i = 0; i < arg; i++) {
12634 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12635 				break;
12636 		}
12637 		if (i >= arg)
12638 			goto scan_all_maps;
12639 
12640 		map_reg = BPF_REG_1 + i;
12641 
12642 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12643 			goto scan_all_maps;
12644 
12645 		i = aux->const_reg_vals[map_reg];
12646 		if (i < env->used_map_cnt) {
12647 			size = is_key ? env->used_maps[i]->key_size
12648 				      : env->used_maps[i]->value_size;
12649 			goto out;
12650 		}
12651 scan_all_maps:
12652 		/*
12653 		 * Map pointer is not known at this call site (e.g. different
12654 		 * maps on merged paths).  Conservatively return the largest
12655 		 * key_size or value_size across all maps used by the program.
12656 		 */
12657 		val = 0;
12658 		for (i = 0; i < env->used_map_cnt; i++) {
12659 			struct bpf_map *map = env->used_maps[i];
12660 			u32 sz = is_key ? map->key_size : map->value_size;
12661 
12662 			if (sz > val)
12663 				val = sz;
12664 			if (map->inner_map_meta) {
12665 				sz = is_key ? map->inner_map_meta->key_size
12666 					    : map->inner_map_meta->value_size;
12667 				if (sz > val)
12668 					val = sz;
12669 			}
12670 		}
12671 		if (!val)
12672 			return S64_MIN;
12673 		size = val;
12674 		goto out;
12675 	}
12676 	case ARG_PTR_TO_MEM:
12677 		if (at & MEM_FIXED_SIZE) {
12678 			size = fn->arg_size[arg];
12679 			goto out;
12680 		}
12681 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12682 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12683 			int size_reg = BPF_REG_1 + arg + 1;
12684 
12685 			if (aux->const_reg_mask & BIT(size_reg)) {
12686 				size = (s64)aux->const_reg_vals[size_reg];
12687 				goto out;
12688 			}
12689 			/*
12690 			 * Size arg is const on each path but differs across merged
12691 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12692 			 */
12693 			if (at & MEM_UNINIT)
12694 				return 0;
12695 			return MAX_BPF_STACK;
12696 		}
12697 		return S64_MIN;
12698 	case ARG_PTR_TO_DYNPTR:
12699 		size = BPF_DYNPTR_SIZE;
12700 		break;
12701 	case ARG_PTR_TO_STACK:
12702 		/*
12703 		 * Only used by bpf_calls_callback() helpers. The helper itself
12704 		 * doesn't access stack. The callback subprog does and it's
12705 		 * analyzed separately.
12706 		 */
12707 		return 0;
12708 	default:
12709 		return S64_MIN;
12710 	}
12711 out:
12712 	/*
12713 	 * MEM_UNINIT args are write-only: the helper initializes the
12714 	 * buffer without reading it.
12715 	 */
12716 	if (at & MEM_UNINIT)
12717 		return -size;
12718 	return size;
12719 }
12720 
12721 /*
12722  * Determine how many bytes a kfunc accesses through a stack pointer at
12723  * argument position @arg (0-based, corresponding to R1-R5).
12724  *
12725  * Returns:
12726  *   > 0      known read access size in bytes
12727  *     0      doesn't access memory through that argument (ex: not a pointer)
12728  *   S64_MIN  unknown
12729  *   < 0      known write access of (-return) bytes
12730  */
12731 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12732 				 int arg, int insn_idx)
12733 {
12734 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12735 	struct bpf_kfunc_call_arg_meta meta;
12736 	const struct btf_param *args;
12737 	const struct btf_type *t, *ref_t;
12738 	const struct btf *btf;
12739 	u32 nargs, type_size;
12740 	s64 size;
12741 
12742 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12743 		return S64_MIN;
12744 
12745 	btf = meta.btf;
12746 	args = btf_params(meta.func_proto);
12747 	nargs = btf_type_vlen(meta.func_proto);
12748 	if (arg >= nargs)
12749 		return 0;
12750 
12751 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12752 	if (!btf_type_is_ptr(t))
12753 		return 0;
12754 
12755 	/* dynptr: fixed 16-byte on-stack representation */
12756 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12757 		size = BPF_DYNPTR_SIZE;
12758 		goto out;
12759 	}
12760 
12761 	/* ptr + __sz/__szk pair: size is in the next register */
12762 	if (arg + 1 < nargs &&
12763 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12764 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12765 		int size_reg = BPF_REG_1 + arg + 1;
12766 
12767 		if (aux->const_reg_mask & BIT(size_reg)) {
12768 			size = (s64)aux->const_reg_vals[size_reg];
12769 			goto out;
12770 		}
12771 		return MAX_BPF_STACK;
12772 	}
12773 
12774 	/* fixed-size pointed-to type: resolve via BTF */
12775 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12776 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12777 		size = type_size;
12778 		goto out;
12779 	}
12780 
12781 	return S64_MIN;
12782 out:
12783 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12784 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12785 		return -size;
12786 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12787 		return -size;
12788 	return size;
12789 }
12790 
12791 /* check special kfuncs and return:
12792  *  1  - not fall-through to 'else' branch, continue verification
12793  *  0  - fall-through to 'else' branch
12794  * < 0 - not fall-through to 'else' branch, return error
12795  */
12796 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12797 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12798 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12799 {
12800 	const struct btf_type *ret_t;
12801 	int err = 0;
12802 
12803 	if (meta->btf != btf_vmlinux)
12804 		return 0;
12805 
12806 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12807 		struct btf_struct_meta *struct_meta;
12808 		struct btf *ret_btf;
12809 		u32 ret_btf_id;
12810 
12811 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12812 			return -ENOMEM;
12813 
12814 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12815 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12816 			return -EINVAL;
12817 		}
12818 
12819 		ret_btf = env->prog->aux->btf;
12820 		ret_btf_id = meta->arg_constant.value;
12821 
12822 		/* This may be NULL due to user not supplying a BTF */
12823 		if (!ret_btf) {
12824 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12825 			return -EINVAL;
12826 		}
12827 
12828 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12829 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12830 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12831 			return -EINVAL;
12832 		}
12833 
12834 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12835 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12836 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12837 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12838 				return -EINVAL;
12839 			}
12840 
12841 			if (!bpf_global_percpu_ma_set) {
12842 				mutex_lock(&bpf_percpu_ma_lock);
12843 				if (!bpf_global_percpu_ma_set) {
12844 					/* Charge memory allocated with bpf_global_percpu_ma to
12845 					 * root memcg. The obj_cgroup for root memcg is NULL.
12846 					 */
12847 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12848 					if (!err)
12849 						bpf_global_percpu_ma_set = true;
12850 				}
12851 				mutex_unlock(&bpf_percpu_ma_lock);
12852 				if (err)
12853 					return err;
12854 			}
12855 
12856 			mutex_lock(&bpf_percpu_ma_lock);
12857 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12858 			mutex_unlock(&bpf_percpu_ma_lock);
12859 			if (err)
12860 				return err;
12861 		}
12862 
12863 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12864 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12865 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12866 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12867 				return -EINVAL;
12868 			}
12869 
12870 			if (struct_meta) {
12871 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12872 				return -EINVAL;
12873 			}
12874 		}
12875 
12876 		mark_reg_known_zero(env, regs, BPF_REG_0);
12877 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12878 		regs[BPF_REG_0].btf = ret_btf;
12879 		regs[BPF_REG_0].btf_id = ret_btf_id;
12880 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12881 			regs[BPF_REG_0].type |= MEM_PERCPU;
12882 
12883 		insn_aux->obj_new_size = ret_t->size;
12884 		insn_aux->kptr_struct_meta = struct_meta;
12885 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12886 		mark_reg_known_zero(env, regs, BPF_REG_0);
12887 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12888 		regs[BPF_REG_0].btf = meta->arg_btf;
12889 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12890 
12891 		insn_aux->kptr_struct_meta =
12892 			btf_find_struct_meta(meta->arg_btf,
12893 					     meta->arg_btf_id);
12894 	} else if (is_list_node_type(ptr_type)) {
12895 		struct btf_field *field = meta->arg_list_head.field;
12896 
12897 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12898 	} else if (is_rbtree_node_type(ptr_type)) {
12899 		struct btf_field *field = meta->arg_rbtree_root.field;
12900 
12901 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12902 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12903 		mark_reg_known_zero(env, regs, BPF_REG_0);
12904 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12905 		regs[BPF_REG_0].btf = desc_btf;
12906 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12907 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12908 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12909 		if (!ret_t) {
12910 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12911 				meta->arg_constant.value);
12912 			return -EINVAL;
12913 		} else if (btf_type_is_struct(ret_t)) {
12914 			mark_reg_known_zero(env, regs, BPF_REG_0);
12915 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12916 			regs[BPF_REG_0].btf = desc_btf;
12917 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12918 		} else if (btf_type_is_void(ret_t)) {
12919 			mark_reg_known_zero(env, regs, BPF_REG_0);
12920 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12921 			regs[BPF_REG_0].mem_size = 0;
12922 		} else {
12923 			verbose(env,
12924 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12925 			return -EINVAL;
12926 		}
12927 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12928 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12929 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
12930 
12931 		mark_reg_known_zero(env, regs, BPF_REG_0);
12932 
12933 		if (!meta->arg_constant.found) {
12934 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12935 			return -EFAULT;
12936 		}
12937 
12938 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12939 
12940 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12941 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12942 
12943 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12944 			regs[BPF_REG_0].type |= MEM_RDONLY;
12945 		} else {
12946 			/* this will set env->seen_direct_write to true */
12947 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12948 				verbose(env, "the prog does not allow writes to packet data\n");
12949 				return -EINVAL;
12950 			}
12951 		}
12952 
12953 		if (!meta->initialized_dynptr.id) {
12954 			verifier_bug(env, "no dynptr id");
12955 			return -EFAULT;
12956 		}
12957 		regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
12958 
12959 		/* we don't need to set BPF_REG_0's ref obj id
12960 		 * because packet slices are not refcounted (see
12961 		 * dynptr_type_refcounted)
12962 		 */
12963 	} else {
12964 		return 0;
12965 	}
12966 
12967 	return 1;
12968 }
12969 
12970 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12971 static int process_bpf_exit_full(struct bpf_verifier_env *env,
12972 				 bool *do_print_state, bool exception_exit);
12973 
12974 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12975 			    int *insn_idx_p)
12976 {
12977 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12978 	u32 i, nargs, ptr_type_id, release_ref_obj_id;
12979 	struct bpf_reg_state *regs = cur_regs(env);
12980 	const char *func_name, *ptr_type_name;
12981 	const struct btf_type *t, *ptr_type;
12982 	struct bpf_kfunc_call_arg_meta meta;
12983 	struct bpf_insn_aux_data *insn_aux;
12984 	int err, insn_idx = *insn_idx_p;
12985 	const struct btf_param *args;
12986 	struct btf *desc_btf;
12987 
12988 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12989 	if (!insn->imm)
12990 		return 0;
12991 
12992 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12993 	if (err == -EACCES && meta.func_name)
12994 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12995 	if (err)
12996 		return err;
12997 	desc_btf = meta.btf;
12998 	func_name = meta.func_name;
12999 	insn_aux = &env->insn_aux_data[insn_idx];
13000 
13001 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
13002 
13003 	if (!insn->off &&
13004 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
13005 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
13006 		struct bpf_verifier_state *branch;
13007 		struct bpf_reg_state *regs;
13008 
13009 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
13010 		if (IS_ERR(branch)) {
13011 			verbose(env, "failed to push state for failed lock acquisition\n");
13012 			return PTR_ERR(branch);
13013 		}
13014 
13015 		regs = branch->frame[branch->curframe]->regs;
13016 
13017 		/* Clear r0-r5 registers in forked state */
13018 		for (i = 0; i < CALLER_SAVED_REGS; i++)
13019 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
13020 
13021 		mark_reg_unknown(env, regs, BPF_REG_0);
13022 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
13023 		if (err) {
13024 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
13025 			return err;
13026 		}
13027 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
13028 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
13029 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
13030 		return -EFAULT;
13031 	}
13032 
13033 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
13034 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
13035 		return -EACCES;
13036 	}
13037 
13038 	sleepable = bpf_is_kfunc_sleepable(&meta);
13039 	if (sleepable && !in_sleepable(env)) {
13040 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
13041 		return -EACCES;
13042 	}
13043 
13044 	/* Track non-sleepable context for kfuncs, same as for helpers. */
13045 	if (!in_sleepable_context(env))
13046 		insn_aux->non_sleepable = true;
13047 
13048 	/* Check the arguments */
13049 	err = check_kfunc_args(env, &meta, insn_idx);
13050 	if (err < 0)
13051 		return err;
13052 
13053 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
13054 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13055 					 set_rbtree_add_callback_state);
13056 		if (err) {
13057 			verbose(env, "kfunc %s#%d failed callback verification\n",
13058 				func_name, meta.func_id);
13059 			return err;
13060 		}
13061 	}
13062 
13063 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
13064 		meta.r0_size = sizeof(u64);
13065 		meta.r0_rdonly = false;
13066 	}
13067 
13068 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
13069 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13070 					 set_timer_callback_state);
13071 		if (err) {
13072 			verbose(env, "kfunc %s#%d failed callback verification\n",
13073 				func_name, meta.func_id);
13074 			return err;
13075 		}
13076 	}
13077 
13078 	if (is_task_work_add_kfunc(meta.func_id)) {
13079 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13080 					 set_task_work_schedule_callback_state);
13081 		if (err) {
13082 			verbose(env, "kfunc %s#%d failed callback verification\n",
13083 				func_name, meta.func_id);
13084 			return err;
13085 		}
13086 	}
13087 
13088 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13089 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13090 
13091 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13092 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13093 
13094 	if (rcu_lock) {
13095 		env->cur_state->active_rcu_locks++;
13096 	} else if (rcu_unlock) {
13097 		struct bpf_func_state *state;
13098 		struct bpf_reg_state *reg;
13099 		u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
13100 
13101 		if (env->cur_state->active_rcu_locks == 0) {
13102 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13103 			return -EINVAL;
13104 		}
13105 		if (--env->cur_state->active_rcu_locks == 0) {
13106 			bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
13107 				if (reg->type & MEM_RCU) {
13108 					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
13109 					reg->type |= PTR_UNTRUSTED;
13110 				}
13111 			}));
13112 		}
13113 	} else if (preempt_disable) {
13114 		env->cur_state->active_preempt_locks++;
13115 	} else if (preempt_enable) {
13116 		if (env->cur_state->active_preempt_locks == 0) {
13117 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13118 			return -EINVAL;
13119 		}
13120 		env->cur_state->active_preempt_locks--;
13121 	}
13122 
13123 	if (sleepable && !in_sleepable_context(env)) {
13124 		verbose(env, "kernel func %s is sleepable within %s\n",
13125 			func_name, non_sleepable_context_description(env));
13126 		return -EACCES;
13127 	}
13128 
13129 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13130 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13131 		return -EACCES;
13132 	}
13133 
13134 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13135 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13136 		return -EACCES;
13137 	}
13138 
13139 	/* In case of release function, we get register number of refcounted
13140 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13141 	 */
13142 	if (meta.release_regno) {
13143 		struct bpf_reg_state *reg = &regs[meta.release_regno];
13144 
13145 		if (meta.initialized_dynptr.ref_obj_id) {
13146 			err = unmark_stack_slots_dynptr(env, reg);
13147 		} else {
13148 			err = release_reference(env, reg->ref_obj_id);
13149 			if (err)
13150 				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13151 					func_name, meta.func_id);
13152 		}
13153 		if (err)
13154 			return err;
13155 	}
13156 
13157 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13158 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
13159 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13160 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13161 		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
13162 		if (err) {
13163 			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
13164 				func_name, meta.func_id);
13165 			return err;
13166 		}
13167 
13168 		err = release_reference(env, release_ref_obj_id);
13169 		if (err) {
13170 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13171 				func_name, meta.func_id);
13172 			return err;
13173 		}
13174 	}
13175 
13176 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13177 		if (!bpf_jit_supports_exceptions()) {
13178 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13179 				func_name, meta.func_id);
13180 			return -ENOTSUPP;
13181 		}
13182 		env->seen_exception = true;
13183 
13184 		/* In the case of the default callback, the cookie value passed
13185 		 * to bpf_throw becomes the return value of the program.
13186 		 */
13187 		if (!env->exception_callback_subprog) {
13188 			err = check_return_code(env, BPF_REG_1, "R1");
13189 			if (err < 0)
13190 				return err;
13191 		}
13192 	}
13193 
13194 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13195 		u32 regno = caller_saved[i];
13196 
13197 		bpf_mark_reg_not_init(env, &regs[regno]);
13198 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13199 	}
13200 
13201 	/* Check return type */
13202 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13203 
13204 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13205 		if (meta.btf != btf_vmlinux ||
13206 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13207 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13208 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13209 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13210 			return -EINVAL;
13211 		}
13212 	}
13213 
13214 	if (btf_type_is_scalar(t)) {
13215 		mark_reg_unknown(env, regs, BPF_REG_0);
13216 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13217 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13218 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13219 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13220 	} else if (btf_type_is_ptr(t)) {
13221 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13222 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13223 		if (err) {
13224 			if (err < 0)
13225 				return err;
13226 		} else if (btf_type_is_void(ptr_type)) {
13227 			/* kfunc returning 'void *' is equivalent to returning scalar */
13228 			mark_reg_unknown(env, regs, BPF_REG_0);
13229 		} else if (!__btf_type_is_struct(ptr_type)) {
13230 			if (!meta.r0_size) {
13231 				__u32 sz;
13232 
13233 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13234 					meta.r0_size = sz;
13235 					meta.r0_rdonly = true;
13236 				}
13237 			}
13238 			if (!meta.r0_size) {
13239 				ptr_type_name = btf_name_by_offset(desc_btf,
13240 								   ptr_type->name_off);
13241 				verbose(env,
13242 					"kernel function %s returns pointer type %s %s is not supported\n",
13243 					func_name,
13244 					btf_type_str(ptr_type),
13245 					ptr_type_name);
13246 				return -EINVAL;
13247 			}
13248 
13249 			mark_reg_known_zero(env, regs, BPF_REG_0);
13250 			regs[BPF_REG_0].type = PTR_TO_MEM;
13251 			regs[BPF_REG_0].mem_size = meta.r0_size;
13252 
13253 			if (meta.r0_rdonly)
13254 				regs[BPF_REG_0].type |= MEM_RDONLY;
13255 
13256 			/* Ensures we don't access the memory after a release_reference() */
13257 			if (meta.ref_obj_id)
13258 				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
13259 
13260 			if (is_kfunc_rcu_protected(&meta))
13261 				regs[BPF_REG_0].type |= MEM_RCU;
13262 		} else {
13263 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13264 
13265 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13266 				type |= PTR_UNTRUSTED;
13267 			else if (is_kfunc_rcu_protected(&meta) ||
13268 				 (bpf_is_iter_next_kfunc(&meta) &&
13269 				  (get_iter_from_state(env->cur_state, &meta)
13270 					   ->type & MEM_RCU))) {
13271 				/*
13272 				 * If the iterator's constructor (the _new
13273 				 * function e.g., bpf_iter_task_new) has been
13274 				 * annotated with BPF kfunc flag
13275 				 * KF_RCU_PROTECTED and was called within a RCU
13276 				 * read-side critical section, also propagate
13277 				 * the MEM_RCU flag to the pointer returned from
13278 				 * the iterator's next function (e.g.,
13279 				 * bpf_iter_task_next).
13280 				 */
13281 				type |= MEM_RCU;
13282 			} else {
13283 				/*
13284 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13285 				 * kfunc should by default be treated as
13286 				 * implicitly trusted.
13287 				 */
13288 				type |= PTR_TRUSTED;
13289 			}
13290 
13291 			mark_reg_known_zero(env, regs, BPF_REG_0);
13292 			regs[BPF_REG_0].btf = desc_btf;
13293 			regs[BPF_REG_0].type = type;
13294 			regs[BPF_REG_0].btf_id = ptr_type_id;
13295 		}
13296 
13297 		if (is_kfunc_ret_null(&meta)) {
13298 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13299 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13300 			regs[BPF_REG_0].id = ++env->id_gen;
13301 		}
13302 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13303 		if (is_kfunc_acquire(&meta)) {
13304 			int id = acquire_reference(env, insn_idx);
13305 
13306 			if (id < 0)
13307 				return id;
13308 			if (is_kfunc_ret_null(&meta))
13309 				regs[BPF_REG_0].id = id;
13310 			regs[BPF_REG_0].ref_obj_id = id;
13311 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13312 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13313 		}
13314 
13315 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13316 			regs[BPF_REG_0].id = ++env->id_gen;
13317 	} else if (btf_type_is_void(t)) {
13318 		if (meta.btf == btf_vmlinux) {
13319 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13320 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13321 				insn_aux->kptr_struct_meta =
13322 					btf_find_struct_meta(meta.arg_btf,
13323 							     meta.arg_btf_id);
13324 			}
13325 		}
13326 	}
13327 
13328 	if (bpf_is_kfunc_pkt_changing(&meta))
13329 		clear_all_pkt_pointers(env);
13330 
13331 	nargs = btf_type_vlen(meta.func_proto);
13332 	args = (const struct btf_param *)(meta.func_proto + 1);
13333 	for (i = 0; i < nargs; i++) {
13334 		u32 regno = i + 1;
13335 
13336 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13337 		if (btf_type_is_ptr(t))
13338 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13339 		else
13340 			/* scalar. ensured by check_kfunc_args() */
13341 			mark_btf_func_reg_size(env, regno, t->size);
13342 	}
13343 
13344 	if (bpf_is_iter_next_kfunc(&meta)) {
13345 		err = process_iter_next_call(env, insn_idx, &meta);
13346 		if (err)
13347 			return err;
13348 	}
13349 
13350 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13351 		env->prog->call_session_cookie = true;
13352 
13353 	if (is_bpf_throw_kfunc(insn))
13354 		return process_bpf_exit_full(env, NULL, true);
13355 
13356 	return 0;
13357 }
13358 
13359 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13360 					 const struct bpf_reg_state *reg,
13361 					 enum bpf_reg_type type)
13362 {
13363 	bool known = tnum_is_const(reg->var_off);
13364 	s64 val = reg->var_off.value;
13365 	s64 smin = reg->smin_value;
13366 
13367 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13368 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13369 			reg_type_str(env, type), val);
13370 		return false;
13371 	}
13372 
13373 	if (smin == S64_MIN) {
13374 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13375 			reg_type_str(env, type));
13376 		return false;
13377 	}
13378 
13379 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13380 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13381 			smin, reg_type_str(env, type));
13382 		return false;
13383 	}
13384 
13385 	return true;
13386 }
13387 
13388 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13389 				      const struct bpf_reg_state *reg,
13390 				      enum bpf_reg_type type)
13391 {
13392 	bool known = tnum_is_const(reg->var_off);
13393 	s64 val = reg->var_off.value;
13394 	s64 smin = reg->smin_value;
13395 
13396 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13397 		verbose(env, "%s pointer offset %lld is not allowed\n",
13398 			reg_type_str(env, type), val);
13399 		return false;
13400 	}
13401 
13402 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13403 		verbose(env, "%s pointer offset %lld is not allowed\n",
13404 			reg_type_str(env, type), smin);
13405 		return false;
13406 	}
13407 
13408 	return true;
13409 }
13410 
13411 enum {
13412 	REASON_BOUNDS	= -1,
13413 	REASON_TYPE	= -2,
13414 	REASON_PATHS	= -3,
13415 	REASON_LIMIT	= -4,
13416 	REASON_STACK	= -5,
13417 };
13418 
13419 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13420 			      u32 *alu_limit, bool mask_to_left)
13421 {
13422 	u32 max = 0, ptr_limit = 0;
13423 
13424 	switch (ptr_reg->type) {
13425 	case PTR_TO_STACK:
13426 		/* Offset 0 is out-of-bounds, but acceptable start for the
13427 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13428 		 * offset where we would need to deal with min/max bounds is
13429 		 * currently prohibited for unprivileged.
13430 		 */
13431 		max = MAX_BPF_STACK + mask_to_left;
13432 		ptr_limit = -ptr_reg->var_off.value;
13433 		break;
13434 	case PTR_TO_MAP_VALUE:
13435 		max = ptr_reg->map_ptr->value_size;
13436 		ptr_limit = mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value;
13437 		break;
13438 	default:
13439 		return REASON_TYPE;
13440 	}
13441 
13442 	if (ptr_limit >= max)
13443 		return REASON_LIMIT;
13444 	*alu_limit = ptr_limit;
13445 	return 0;
13446 }
13447 
13448 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13449 				    const struct bpf_insn *insn)
13450 {
13451 	return env->bypass_spec_v1 ||
13452 		BPF_SRC(insn->code) == BPF_K ||
13453 		cur_aux(env)->nospec;
13454 }
13455 
13456 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13457 				       u32 alu_state, u32 alu_limit)
13458 {
13459 	/* If we arrived here from different branches with different
13460 	 * state or limits to sanitize, then this won't work.
13461 	 */
13462 	if (aux->alu_state &&
13463 	    (aux->alu_state != alu_state ||
13464 	     aux->alu_limit != alu_limit))
13465 		return REASON_PATHS;
13466 
13467 	/* Corresponding fixup done in do_misc_fixups(). */
13468 	aux->alu_state = alu_state;
13469 	aux->alu_limit = alu_limit;
13470 	return 0;
13471 }
13472 
13473 static int sanitize_val_alu(struct bpf_verifier_env *env,
13474 			    struct bpf_insn *insn)
13475 {
13476 	struct bpf_insn_aux_data *aux = cur_aux(env);
13477 
13478 	if (can_skip_alu_sanitation(env, insn))
13479 		return 0;
13480 
13481 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13482 }
13483 
13484 static bool sanitize_needed(u8 opcode)
13485 {
13486 	return opcode == BPF_ADD || opcode == BPF_SUB;
13487 }
13488 
13489 struct bpf_sanitize_info {
13490 	struct bpf_insn_aux_data aux;
13491 	bool mask_to_left;
13492 };
13493 
13494 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13495 				     const struct bpf_insn *insn,
13496 				     u32 next_idx, u32 curr_idx)
13497 {
13498 	struct bpf_verifier_state *branch;
13499 	struct bpf_reg_state *regs;
13500 
13501 	branch = push_stack(env, next_idx, curr_idx, true);
13502 	if (!IS_ERR(branch) && insn) {
13503 		regs = branch->frame[branch->curframe]->regs;
13504 		if (BPF_SRC(insn->code) == BPF_K) {
13505 			mark_reg_unknown(env, regs, insn->dst_reg);
13506 		} else if (BPF_SRC(insn->code) == BPF_X) {
13507 			mark_reg_unknown(env, regs, insn->dst_reg);
13508 			mark_reg_unknown(env, regs, insn->src_reg);
13509 		}
13510 	}
13511 	return PTR_ERR_OR_ZERO(branch);
13512 }
13513 
13514 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13515 			    struct bpf_insn *insn,
13516 			    const struct bpf_reg_state *ptr_reg,
13517 			    const struct bpf_reg_state *off_reg,
13518 			    struct bpf_reg_state *dst_reg,
13519 			    struct bpf_sanitize_info *info,
13520 			    const bool commit_window)
13521 {
13522 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13523 	struct bpf_verifier_state *vstate = env->cur_state;
13524 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13525 	bool off_is_neg = off_reg->smin_value < 0;
13526 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13527 	u8 opcode = BPF_OP(insn->code);
13528 	u32 alu_state, alu_limit;
13529 	struct bpf_reg_state tmp;
13530 	int err;
13531 
13532 	if (can_skip_alu_sanitation(env, insn))
13533 		return 0;
13534 
13535 	/* We already marked aux for masking from non-speculative
13536 	 * paths, thus we got here in the first place. We only care
13537 	 * to explore bad access from here.
13538 	 */
13539 	if (vstate->speculative)
13540 		goto do_sim;
13541 
13542 	if (!commit_window) {
13543 		if (!tnum_is_const(off_reg->var_off) &&
13544 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
13545 			return REASON_BOUNDS;
13546 
13547 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13548 				     (opcode == BPF_SUB && !off_is_neg);
13549 	}
13550 
13551 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13552 	if (err < 0)
13553 		return err;
13554 
13555 	if (commit_window) {
13556 		/* In commit phase we narrow the masking window based on
13557 		 * the observed pointer move after the simulated operation.
13558 		 */
13559 		alu_state = info->aux.alu_state;
13560 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13561 	} else {
13562 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13563 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13564 		alu_state |= ptr_is_dst_reg ?
13565 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13566 
13567 		/* Limit pruning on unknown scalars to enable deep search for
13568 		 * potential masking differences from other program paths.
13569 		 */
13570 		if (!off_is_imm)
13571 			env->explore_alu_limits = true;
13572 	}
13573 
13574 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13575 	if (err < 0)
13576 		return err;
13577 do_sim:
13578 	/* If we're in commit phase, we're done here given we already
13579 	 * pushed the truncated dst_reg into the speculative verification
13580 	 * stack.
13581 	 *
13582 	 * Also, when register is a known constant, we rewrite register-based
13583 	 * operation to immediate-based, and thus do not need masking (and as
13584 	 * a consequence, do not need to simulate the zero-truncation either).
13585 	 */
13586 	if (commit_window || off_is_imm)
13587 		return 0;
13588 
13589 	/* Simulate and find potential out-of-bounds access under
13590 	 * speculative execution from truncation as a result of
13591 	 * masking when off was not within expected range. If off
13592 	 * sits in dst, then we temporarily need to move ptr there
13593 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13594 	 * for cases where we use K-based arithmetic in one direction
13595 	 * and truncated reg-based in the other in order to explore
13596 	 * bad access.
13597 	 */
13598 	if (!ptr_is_dst_reg) {
13599 		tmp = *dst_reg;
13600 		copy_register_state(dst_reg, ptr_reg);
13601 	}
13602 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13603 	if (err < 0)
13604 		return REASON_STACK;
13605 	if (!ptr_is_dst_reg)
13606 		*dst_reg = tmp;
13607 	return 0;
13608 }
13609 
13610 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13611 {
13612 	struct bpf_verifier_state *vstate = env->cur_state;
13613 
13614 	/* If we simulate paths under speculation, we don't update the
13615 	 * insn as 'seen' such that when we verify unreachable paths in
13616 	 * the non-speculative domain, sanitize_dead_code() can still
13617 	 * rewrite/sanitize them.
13618 	 */
13619 	if (!vstate->speculative)
13620 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13621 }
13622 
13623 static int sanitize_err(struct bpf_verifier_env *env,
13624 			const struct bpf_insn *insn, int reason,
13625 			const struct bpf_reg_state *off_reg,
13626 			const struct bpf_reg_state *dst_reg)
13627 {
13628 	static const char *err = "pointer arithmetic with it prohibited for !root";
13629 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13630 	u32 dst = insn->dst_reg, src = insn->src_reg;
13631 
13632 	switch (reason) {
13633 	case REASON_BOUNDS:
13634 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13635 			off_reg == dst_reg ? dst : src, err);
13636 		break;
13637 	case REASON_TYPE:
13638 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13639 			off_reg == dst_reg ? src : dst, err);
13640 		break;
13641 	case REASON_PATHS:
13642 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13643 			dst, op, err);
13644 		break;
13645 	case REASON_LIMIT:
13646 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13647 			dst, op, err);
13648 		break;
13649 	case REASON_STACK:
13650 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13651 			dst, err);
13652 		return -ENOMEM;
13653 	default:
13654 		verifier_bug(env, "unknown reason (%d)", reason);
13655 		break;
13656 	}
13657 
13658 	return -EACCES;
13659 }
13660 
13661 /* check that stack access falls within stack limits and that 'reg' doesn't
13662  * have a variable offset.
13663  *
13664  * Variable offset is prohibited for unprivileged mode for simplicity since it
13665  * requires corresponding support in Spectre masking for stack ALU.  See also
13666  * retrieve_ptr_limit().
13667  */
13668 static int check_stack_access_for_ptr_arithmetic(
13669 				struct bpf_verifier_env *env,
13670 				int regno,
13671 				const struct bpf_reg_state *reg,
13672 				int off)
13673 {
13674 	if (!tnum_is_const(reg->var_off)) {
13675 		char tn_buf[48];
13676 
13677 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13678 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13679 			regno, tn_buf, off);
13680 		return -EACCES;
13681 	}
13682 
13683 	if (off >= 0 || off < -MAX_BPF_STACK) {
13684 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13685 			"prohibited for !root; off=%d\n", regno, off);
13686 		return -EACCES;
13687 	}
13688 
13689 	return 0;
13690 }
13691 
13692 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13693 				 const struct bpf_insn *insn,
13694 				 const struct bpf_reg_state *dst_reg)
13695 {
13696 	u32 dst = insn->dst_reg;
13697 
13698 	/* For unprivileged we require that resulting offset must be in bounds
13699 	 * in order to be able to sanitize access later on.
13700 	 */
13701 	if (env->bypass_spec_v1)
13702 		return 0;
13703 
13704 	switch (dst_reg->type) {
13705 	case PTR_TO_STACK:
13706 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13707 							  dst_reg->var_off.value))
13708 			return -EACCES;
13709 		break;
13710 	case PTR_TO_MAP_VALUE:
13711 		if (check_map_access(env, dst, 0, 1, false, ACCESS_HELPER)) {
13712 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13713 				"prohibited for !root\n", dst);
13714 			return -EACCES;
13715 		}
13716 		break;
13717 	default:
13718 		return -EOPNOTSUPP;
13719 	}
13720 
13721 	return 0;
13722 }
13723 
13724 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13725  * Caller should also handle BPF_MOV case separately.
13726  * If we return -EACCES, caller may want to try again treating pointer as a
13727  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13728  */
13729 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13730 				   struct bpf_insn *insn,
13731 				   const struct bpf_reg_state *ptr_reg,
13732 				   const struct bpf_reg_state *off_reg)
13733 {
13734 	struct bpf_verifier_state *vstate = env->cur_state;
13735 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13736 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13737 	bool known = tnum_is_const(off_reg->var_off);
13738 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
13739 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
13740 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
13741 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
13742 	struct bpf_sanitize_info info = {};
13743 	u8 opcode = BPF_OP(insn->code);
13744 	u32 dst = insn->dst_reg;
13745 	int ret, bounds_ret;
13746 
13747 	dst_reg = &regs[dst];
13748 
13749 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13750 	    smin_val > smax_val || umin_val > umax_val) {
13751 		/* Taint dst register if offset had invalid bounds derived from
13752 		 * e.g. dead branches.
13753 		 */
13754 		__mark_reg_unknown(env, dst_reg);
13755 		return 0;
13756 	}
13757 
13758 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13759 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13760 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13761 			__mark_reg_unknown(env, dst_reg);
13762 			return 0;
13763 		}
13764 
13765 		verbose(env,
13766 			"R%d 32-bit pointer arithmetic prohibited\n",
13767 			dst);
13768 		return -EACCES;
13769 	}
13770 
13771 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13772 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13773 			dst, reg_type_str(env, ptr_reg->type));
13774 		return -EACCES;
13775 	}
13776 
13777 	/*
13778 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13779 	 * instructions, hence no need to track offsets.
13780 	 */
13781 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13782 		return 0;
13783 
13784 	switch (base_type(ptr_reg->type)) {
13785 	case PTR_TO_CTX:
13786 	case PTR_TO_MAP_VALUE:
13787 	case PTR_TO_MAP_KEY:
13788 	case PTR_TO_STACK:
13789 	case PTR_TO_PACKET_META:
13790 	case PTR_TO_PACKET:
13791 	case PTR_TO_TP_BUFFER:
13792 	case PTR_TO_BTF_ID:
13793 	case PTR_TO_MEM:
13794 	case PTR_TO_BUF:
13795 	case PTR_TO_FUNC:
13796 	case CONST_PTR_TO_DYNPTR:
13797 		break;
13798 	case PTR_TO_FLOW_KEYS:
13799 		if (known)
13800 			break;
13801 		fallthrough;
13802 	case CONST_PTR_TO_MAP:
13803 		/* smin_val represents the known value */
13804 		if (known && smin_val == 0 && opcode == BPF_ADD)
13805 			break;
13806 		fallthrough;
13807 	default:
13808 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13809 			dst, reg_type_str(env, ptr_reg->type));
13810 		return -EACCES;
13811 	}
13812 
13813 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13814 	 * The id may be overwritten later if we create a new variable offset.
13815 	 */
13816 	dst_reg->type = ptr_reg->type;
13817 	dst_reg->id = ptr_reg->id;
13818 
13819 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13820 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13821 		return -EINVAL;
13822 
13823 	/* pointer types do not carry 32-bit bounds at the moment. */
13824 	__mark_reg32_unbounded(dst_reg);
13825 
13826 	if (sanitize_needed(opcode)) {
13827 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13828 				       &info, false);
13829 		if (ret < 0)
13830 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13831 	}
13832 
13833 	switch (opcode) {
13834 	case BPF_ADD:
13835 		/*
13836 		 * dst_reg gets the pointer type and since some positive
13837 		 * integer value was added to the pointer, give it a new 'id'
13838 		 * if it's a PTR_TO_PACKET.
13839 		 * this creates a new 'base' pointer, off_reg (variable) gets
13840 		 * added into the variable offset, and we copy the fixed offset
13841 		 * from ptr_reg.
13842 		 */
13843 		if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
13844 		    check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
13845 			dst_reg->smin_value = S64_MIN;
13846 			dst_reg->smax_value = S64_MAX;
13847 		}
13848 		if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
13849 		    check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
13850 			dst_reg->umin_value = 0;
13851 			dst_reg->umax_value = U64_MAX;
13852 		}
13853 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13854 		dst_reg->raw = ptr_reg->raw;
13855 		if (reg_is_pkt_pointer(ptr_reg)) {
13856 			if (!known)
13857 				dst_reg->id = ++env->id_gen;
13858 			/*
13859 			 * Clear range for unknown addends since we can't know
13860 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13861 			 * BEYOND_PKT_END from prior comparison as any pointer
13862 			 * arithmetic invalidates them.
13863 			 */
13864 			if (!known || dst_reg->range < 0)
13865 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13866 		}
13867 		break;
13868 	case BPF_SUB:
13869 		if (dst_reg == off_reg) {
13870 			/* scalar -= pointer.  Creates an unknown scalar */
13871 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13872 				dst);
13873 			return -EACCES;
13874 		}
13875 		/* We don't allow subtraction from FP, because (according to
13876 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13877 		 * be able to deal with it.
13878 		 */
13879 		if (ptr_reg->type == PTR_TO_STACK) {
13880 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13881 				dst);
13882 			return -EACCES;
13883 		}
13884 		/* A new variable offset is created.  If the subtrahend is known
13885 		 * nonnegative, then any reg->range we had before is still good.
13886 		 */
13887 		if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
13888 		    check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
13889 			/* Overflow possible, we know nothing */
13890 			dst_reg->smin_value = S64_MIN;
13891 			dst_reg->smax_value = S64_MAX;
13892 		}
13893 		if (umin_ptr < umax_val) {
13894 			/* Overflow possible, we know nothing */
13895 			dst_reg->umin_value = 0;
13896 			dst_reg->umax_value = U64_MAX;
13897 		} else {
13898 			/* Cannot overflow (as long as bounds are consistent) */
13899 			dst_reg->umin_value = umin_ptr - umax_val;
13900 			dst_reg->umax_value = umax_ptr - umin_val;
13901 		}
13902 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13903 		dst_reg->raw = ptr_reg->raw;
13904 		if (reg_is_pkt_pointer(ptr_reg)) {
13905 			if (!known)
13906 				dst_reg->id = ++env->id_gen;
13907 			/*
13908 			 * Clear range if the subtrahend may be negative since
13909 			 * pkt pointer could move past its bounds. A positive
13910 			 * subtrahend moves it backwards keeping positive range
13911 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13912 			 * prior comparison as arithmetic invalidates them.
13913 			 */
13914 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13915 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13916 		}
13917 		break;
13918 	case BPF_AND:
13919 	case BPF_OR:
13920 	case BPF_XOR:
13921 		/* bitwise ops on pointers are troublesome, prohibit. */
13922 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13923 			dst, bpf_alu_string[opcode >> 4]);
13924 		return -EACCES;
13925 	default:
13926 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13927 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13928 			dst, bpf_alu_string[opcode >> 4]);
13929 		return -EACCES;
13930 	}
13931 
13932 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13933 		return -EINVAL;
13934 	reg_bounds_sync(dst_reg);
13935 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13936 	if (bounds_ret == -EACCES)
13937 		return bounds_ret;
13938 	if (sanitize_needed(opcode)) {
13939 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13940 				       &info, true);
13941 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13942 				    && !env->cur_state->speculative
13943 				    && bounds_ret
13944 				    && !ret,
13945 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13946 			return -EFAULT;
13947 		}
13948 		if (ret < 0)
13949 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13950 	}
13951 
13952 	return 0;
13953 }
13954 
13955 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13956 				 struct bpf_reg_state *src_reg)
13957 {
13958 	s32 *dst_smin = &dst_reg->s32_min_value;
13959 	s32 *dst_smax = &dst_reg->s32_max_value;
13960 	u32 *dst_umin = &dst_reg->u32_min_value;
13961 	u32 *dst_umax = &dst_reg->u32_max_value;
13962 	u32 umin_val = src_reg->u32_min_value;
13963 	u32 umax_val = src_reg->u32_max_value;
13964 	bool min_overflow, max_overflow;
13965 
13966 	if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
13967 	    check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
13968 		*dst_smin = S32_MIN;
13969 		*dst_smax = S32_MAX;
13970 	}
13971 
13972 	/* If either all additions overflow or no additions overflow, then
13973 	 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
13974 	 * dst_umax + src_umax. Otherwise (some additions overflow), set
13975 	 * the output bounds to unbounded.
13976 	 */
13977 	min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
13978 	max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
13979 
13980 	if (!min_overflow && max_overflow) {
13981 		*dst_umin = 0;
13982 		*dst_umax = U32_MAX;
13983 	}
13984 }
13985 
13986 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13987 			       struct bpf_reg_state *src_reg)
13988 {
13989 	s64 *dst_smin = &dst_reg->smin_value;
13990 	s64 *dst_smax = &dst_reg->smax_value;
13991 	u64 *dst_umin = &dst_reg->umin_value;
13992 	u64 *dst_umax = &dst_reg->umax_value;
13993 	u64 umin_val = src_reg->umin_value;
13994 	u64 umax_val = src_reg->umax_value;
13995 	bool min_overflow, max_overflow;
13996 
13997 	if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
13998 	    check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
13999 		*dst_smin = S64_MIN;
14000 		*dst_smax = S64_MAX;
14001 	}
14002 
14003 	/* If either all additions overflow or no additions overflow, then
14004 	 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
14005 	 * dst_umax + src_umax. Otherwise (some additions overflow), set
14006 	 * the output bounds to unbounded.
14007 	 */
14008 	min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
14009 	max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
14010 
14011 	if (!min_overflow && max_overflow) {
14012 		*dst_umin = 0;
14013 		*dst_umax = U64_MAX;
14014 	}
14015 }
14016 
14017 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
14018 				 struct bpf_reg_state *src_reg)
14019 {
14020 	s32 *dst_smin = &dst_reg->s32_min_value;
14021 	s32 *dst_smax = &dst_reg->s32_max_value;
14022 	u32 *dst_umin = &dst_reg->u32_min_value;
14023 	u32 *dst_umax = &dst_reg->u32_max_value;
14024 	u32 umin_val = src_reg->u32_min_value;
14025 	u32 umax_val = src_reg->u32_max_value;
14026 	bool min_underflow, max_underflow;
14027 
14028 	if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
14029 	    check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
14030 		/* Overflow possible, we know nothing */
14031 		*dst_smin = S32_MIN;
14032 		*dst_smax = S32_MAX;
14033 	}
14034 
14035 	/* If either all subtractions underflow or no subtractions
14036 	 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
14037 	 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
14038 	 * underflow), set the output bounds to unbounded.
14039 	 */
14040 	min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
14041 	max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
14042 
14043 	if (min_underflow && !max_underflow) {
14044 		*dst_umin = 0;
14045 		*dst_umax = U32_MAX;
14046 	}
14047 }
14048 
14049 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
14050 			       struct bpf_reg_state *src_reg)
14051 {
14052 	s64 *dst_smin = &dst_reg->smin_value;
14053 	s64 *dst_smax = &dst_reg->smax_value;
14054 	u64 *dst_umin = &dst_reg->umin_value;
14055 	u64 *dst_umax = &dst_reg->umax_value;
14056 	u64 umin_val = src_reg->umin_value;
14057 	u64 umax_val = src_reg->umax_value;
14058 	bool min_underflow, max_underflow;
14059 
14060 	if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
14061 	    check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
14062 		/* Overflow possible, we know nothing */
14063 		*dst_smin = S64_MIN;
14064 		*dst_smax = S64_MAX;
14065 	}
14066 
14067 	/* If either all subtractions underflow or no subtractions
14068 	 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
14069 	 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
14070 	 * underflow), set the output bounds to unbounded.
14071 	 */
14072 	min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
14073 	max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
14074 
14075 	if (min_underflow && !max_underflow) {
14076 		*dst_umin = 0;
14077 		*dst_umax = U64_MAX;
14078 	}
14079 }
14080 
14081 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
14082 				 struct bpf_reg_state *src_reg)
14083 {
14084 	s32 *dst_smin = &dst_reg->s32_min_value;
14085 	s32 *dst_smax = &dst_reg->s32_max_value;
14086 	u32 *dst_umin = &dst_reg->u32_min_value;
14087 	u32 *dst_umax = &dst_reg->u32_max_value;
14088 	s32 tmp_prod[4];
14089 
14090 	if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) ||
14091 	    check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) {
14092 		/* Overflow possible, we know nothing */
14093 		*dst_umin = 0;
14094 		*dst_umax = U32_MAX;
14095 	}
14096 	if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) ||
14097 	    check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) ||
14098 	    check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) ||
14099 	    check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) {
14100 		/* Overflow possible, we know nothing */
14101 		*dst_smin = S32_MIN;
14102 		*dst_smax = S32_MAX;
14103 	} else {
14104 		*dst_smin = min_array(tmp_prod, 4);
14105 		*dst_smax = max_array(tmp_prod, 4);
14106 	}
14107 }
14108 
14109 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
14110 			       struct bpf_reg_state *src_reg)
14111 {
14112 	s64 *dst_smin = &dst_reg->smin_value;
14113 	s64 *dst_smax = &dst_reg->smax_value;
14114 	u64 *dst_umin = &dst_reg->umin_value;
14115 	u64 *dst_umax = &dst_reg->umax_value;
14116 	s64 tmp_prod[4];
14117 
14118 	if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) ||
14119 	    check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) {
14120 		/* Overflow possible, we know nothing */
14121 		*dst_umin = 0;
14122 		*dst_umax = U64_MAX;
14123 	}
14124 	if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) ||
14125 	    check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) ||
14126 	    check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) ||
14127 	    check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) {
14128 		/* Overflow possible, we know nothing */
14129 		*dst_smin = S64_MIN;
14130 		*dst_smax = S64_MAX;
14131 	} else {
14132 		*dst_smin = min_array(tmp_prod, 4);
14133 		*dst_smax = max_array(tmp_prod, 4);
14134 	}
14135 }
14136 
14137 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
14138 				  struct bpf_reg_state *src_reg)
14139 {
14140 	u32 *dst_umin = &dst_reg->u32_min_value;
14141 	u32 *dst_umax = &dst_reg->u32_max_value;
14142 	u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
14143 
14144 	*dst_umin = *dst_umin / src_val;
14145 	*dst_umax = *dst_umax / src_val;
14146 
14147 	/* Reset other ranges/tnum to unbounded/unknown. */
14148 	dst_reg->s32_min_value = S32_MIN;
14149 	dst_reg->s32_max_value = S32_MAX;
14150 	reset_reg64_and_tnum(dst_reg);
14151 }
14152 
14153 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
14154 				struct bpf_reg_state *src_reg)
14155 {
14156 	u64 *dst_umin = &dst_reg->umin_value;
14157 	u64 *dst_umax = &dst_reg->umax_value;
14158 	u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
14159 
14160 	*dst_umin = div64_u64(*dst_umin, src_val);
14161 	*dst_umax = div64_u64(*dst_umax, src_val);
14162 
14163 	/* Reset other ranges/tnum to unbounded/unknown. */
14164 	dst_reg->smin_value = S64_MIN;
14165 	dst_reg->smax_value = S64_MAX;
14166 	reset_reg32_and_tnum(dst_reg);
14167 }
14168 
14169 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
14170 				  struct bpf_reg_state *src_reg)
14171 {
14172 	s32 *dst_smin = &dst_reg->s32_min_value;
14173 	s32 *dst_smax = &dst_reg->s32_max_value;
14174 	s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
14175 	s32 res1, res2;
14176 
14177 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
14178 	if (*dst_smin == S32_MIN && src_val == -1) {
14179 		/*
14180 		 * If the dividend range contains more than just S32_MIN,
14181 		 * we cannot precisely track the result, so it becomes unbounded.
14182 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
14183 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
14184 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
14185 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
14186 		 */
14187 		if (*dst_smax != S32_MIN) {
14188 			*dst_smin = S32_MIN;
14189 			*dst_smax = S32_MAX;
14190 		}
14191 		goto reset;
14192 	}
14193 
14194 	res1 = *dst_smin / src_val;
14195 	res2 = *dst_smax / src_val;
14196 	*dst_smin = min(res1, res2);
14197 	*dst_smax = max(res1, res2);
14198 
14199 reset:
14200 	/* Reset other ranges/tnum to unbounded/unknown. */
14201 	dst_reg->u32_min_value = 0;
14202 	dst_reg->u32_max_value = U32_MAX;
14203 	reset_reg64_and_tnum(dst_reg);
14204 }
14205 
14206 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
14207 				struct bpf_reg_state *src_reg)
14208 {
14209 	s64 *dst_smin = &dst_reg->smin_value;
14210 	s64 *dst_smax = &dst_reg->smax_value;
14211 	s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
14212 	s64 res1, res2;
14213 
14214 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
14215 	if (*dst_smin == S64_MIN && src_val == -1) {
14216 		/*
14217 		 * If the dividend range contains more than just S64_MIN,
14218 		 * we cannot precisely track the result, so it becomes unbounded.
14219 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
14220 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
14221 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
14222 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
14223 		 */
14224 		if (*dst_smax != S64_MIN) {
14225 			*dst_smin = S64_MIN;
14226 			*dst_smax = S64_MAX;
14227 		}
14228 		goto reset;
14229 	}
14230 
14231 	res1 = div64_s64(*dst_smin, src_val);
14232 	res2 = div64_s64(*dst_smax, src_val);
14233 	*dst_smin = min(res1, res2);
14234 	*dst_smax = max(res1, res2);
14235 
14236 reset:
14237 	/* Reset other ranges/tnum to unbounded/unknown. */
14238 	dst_reg->umin_value = 0;
14239 	dst_reg->umax_value = U64_MAX;
14240 	reset_reg32_and_tnum(dst_reg);
14241 }
14242 
14243 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
14244 				  struct bpf_reg_state *src_reg)
14245 {
14246 	u32 *dst_umin = &dst_reg->u32_min_value;
14247 	u32 *dst_umax = &dst_reg->u32_max_value;
14248 	u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
14249 	u32 res_max = src_val - 1;
14250 
14251 	/*
14252 	 * If dst_umax <= res_max, the result remains unchanged.
14253 	 * e.g., [2, 5] % 10 = [2, 5].
14254 	 */
14255 	if (*dst_umax <= res_max)
14256 		return;
14257 
14258 	*dst_umin = 0;
14259 	*dst_umax = min(*dst_umax, res_max);
14260 
14261 	/* Reset other ranges/tnum to unbounded/unknown. */
14262 	dst_reg->s32_min_value = S32_MIN;
14263 	dst_reg->s32_max_value = S32_MAX;
14264 	reset_reg64_and_tnum(dst_reg);
14265 }
14266 
14267 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14268 				struct bpf_reg_state *src_reg)
14269 {
14270 	u64 *dst_umin = &dst_reg->umin_value;
14271 	u64 *dst_umax = &dst_reg->umax_value;
14272 	u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
14273 	u64 res_max = src_val - 1;
14274 
14275 	/*
14276 	 * If dst_umax <= res_max, the result remains unchanged.
14277 	 * e.g., [2, 5] % 10 = [2, 5].
14278 	 */
14279 	if (*dst_umax <= res_max)
14280 		return;
14281 
14282 	*dst_umin = 0;
14283 	*dst_umax = min(*dst_umax, res_max);
14284 
14285 	/* Reset other ranges/tnum to unbounded/unknown. */
14286 	dst_reg->smin_value = S64_MIN;
14287 	dst_reg->smax_value = S64_MAX;
14288 	reset_reg32_and_tnum(dst_reg);
14289 }
14290 
14291 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14292 				  struct bpf_reg_state *src_reg)
14293 {
14294 	s32 *dst_smin = &dst_reg->s32_min_value;
14295 	s32 *dst_smax = &dst_reg->s32_max_value;
14296 	s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
14297 
14298 	/*
14299 	 * Safe absolute value calculation:
14300 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14301 	 * Here use unsigned integer to avoid overflow.
14302 	 */
14303 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14304 
14305 	/*
14306 	 * Calculate the maximum possible absolute value of the result.
14307 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14308 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14309 	 */
14310 	s32 res_max_abs = src_abs - 1;
14311 
14312 	/*
14313 	 * If the dividend is already within the result range,
14314 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14315 	 */
14316 	if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
14317 		return;
14318 
14319 	/* General case: result has the same sign as the dividend. */
14320 	if (*dst_smin >= 0) {
14321 		*dst_smin = 0;
14322 		*dst_smax = min(*dst_smax, res_max_abs);
14323 	} else if (*dst_smax <= 0) {
14324 		*dst_smax = 0;
14325 		*dst_smin = max(*dst_smin, -res_max_abs);
14326 	} else {
14327 		*dst_smin = -res_max_abs;
14328 		*dst_smax = res_max_abs;
14329 	}
14330 
14331 	/* Reset other ranges/tnum to unbounded/unknown. */
14332 	dst_reg->u32_min_value = 0;
14333 	dst_reg->u32_max_value = U32_MAX;
14334 	reset_reg64_and_tnum(dst_reg);
14335 }
14336 
14337 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14338 				struct bpf_reg_state *src_reg)
14339 {
14340 	s64 *dst_smin = &dst_reg->smin_value;
14341 	s64 *dst_smax = &dst_reg->smax_value;
14342 	s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
14343 
14344 	/*
14345 	 * Safe absolute value calculation:
14346 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14347 	 * Here use unsigned integer to avoid overflow.
14348 	 */
14349 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14350 
14351 	/*
14352 	 * Calculate the maximum possible absolute value of the result.
14353 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14354 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14355 	 */
14356 	s64 res_max_abs = src_abs - 1;
14357 
14358 	/*
14359 	 * If the dividend is already within the result range,
14360 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14361 	 */
14362 	if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
14363 		return;
14364 
14365 	/* General case: result has the same sign as the dividend. */
14366 	if (*dst_smin >= 0) {
14367 		*dst_smin = 0;
14368 		*dst_smax = min(*dst_smax, res_max_abs);
14369 	} else if (*dst_smax <= 0) {
14370 		*dst_smax = 0;
14371 		*dst_smin = max(*dst_smin, -res_max_abs);
14372 	} else {
14373 		*dst_smin = -res_max_abs;
14374 		*dst_smax = res_max_abs;
14375 	}
14376 
14377 	/* Reset other ranges/tnum to unbounded/unknown. */
14378 	dst_reg->umin_value = 0;
14379 	dst_reg->umax_value = U64_MAX;
14380 	reset_reg32_and_tnum(dst_reg);
14381 }
14382 
14383 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14384 				 struct bpf_reg_state *src_reg)
14385 {
14386 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14387 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14388 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14389 	u32 umax_val = src_reg->u32_max_value;
14390 
14391 	if (src_known && dst_known) {
14392 		__mark_reg32_known(dst_reg, var32_off.value);
14393 		return;
14394 	}
14395 
14396 	/* We get our minimum from the var_off, since that's inherently
14397 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14398 	 */
14399 	dst_reg->u32_min_value = var32_off.value;
14400 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
14401 
14402 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14403 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14404 	 */
14405 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14406 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14407 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14408 	} else {
14409 		dst_reg->s32_min_value = S32_MIN;
14410 		dst_reg->s32_max_value = S32_MAX;
14411 	}
14412 }
14413 
14414 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14415 			       struct bpf_reg_state *src_reg)
14416 {
14417 	bool src_known = tnum_is_const(src_reg->var_off);
14418 	bool dst_known = tnum_is_const(dst_reg->var_off);
14419 	u64 umax_val = src_reg->umax_value;
14420 
14421 	if (src_known && dst_known) {
14422 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14423 		return;
14424 	}
14425 
14426 	/* We get our minimum from the var_off, since that's inherently
14427 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14428 	 */
14429 	dst_reg->umin_value = dst_reg->var_off.value;
14430 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
14431 
14432 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14433 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14434 	 */
14435 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14436 		dst_reg->smin_value = dst_reg->umin_value;
14437 		dst_reg->smax_value = dst_reg->umax_value;
14438 	} else {
14439 		dst_reg->smin_value = S64_MIN;
14440 		dst_reg->smax_value = S64_MAX;
14441 	}
14442 	/* We may learn something more from the var_off */
14443 	__update_reg_bounds(dst_reg);
14444 }
14445 
14446 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14447 				struct bpf_reg_state *src_reg)
14448 {
14449 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14450 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14451 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14452 	u32 umin_val = src_reg->u32_min_value;
14453 
14454 	if (src_known && dst_known) {
14455 		__mark_reg32_known(dst_reg, var32_off.value);
14456 		return;
14457 	}
14458 
14459 	/* We get our maximum from the var_off, and our minimum is the
14460 	 * maximum of the operands' minima
14461 	 */
14462 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
14463 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14464 
14465 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14466 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14467 	 */
14468 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14469 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14470 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14471 	} else {
14472 		dst_reg->s32_min_value = S32_MIN;
14473 		dst_reg->s32_max_value = S32_MAX;
14474 	}
14475 }
14476 
14477 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14478 			      struct bpf_reg_state *src_reg)
14479 {
14480 	bool src_known = tnum_is_const(src_reg->var_off);
14481 	bool dst_known = tnum_is_const(dst_reg->var_off);
14482 	u64 umin_val = src_reg->umin_value;
14483 
14484 	if (src_known && dst_known) {
14485 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14486 		return;
14487 	}
14488 
14489 	/* We get our maximum from the var_off, and our minimum is the
14490 	 * maximum of the operands' minima
14491 	 */
14492 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
14493 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14494 
14495 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14496 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14497 	 */
14498 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14499 		dst_reg->smin_value = dst_reg->umin_value;
14500 		dst_reg->smax_value = dst_reg->umax_value;
14501 	} else {
14502 		dst_reg->smin_value = S64_MIN;
14503 		dst_reg->smax_value = S64_MAX;
14504 	}
14505 	/* We may learn something more from the var_off */
14506 	__update_reg_bounds(dst_reg);
14507 }
14508 
14509 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14510 				 struct bpf_reg_state *src_reg)
14511 {
14512 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14513 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14514 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14515 
14516 	if (src_known && dst_known) {
14517 		__mark_reg32_known(dst_reg, var32_off.value);
14518 		return;
14519 	}
14520 
14521 	/* We get both minimum and maximum from the var32_off. */
14522 	dst_reg->u32_min_value = var32_off.value;
14523 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14524 
14525 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14526 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14527 	 */
14528 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14529 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14530 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14531 	} else {
14532 		dst_reg->s32_min_value = S32_MIN;
14533 		dst_reg->s32_max_value = S32_MAX;
14534 	}
14535 }
14536 
14537 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14538 			       struct bpf_reg_state *src_reg)
14539 {
14540 	bool src_known = tnum_is_const(src_reg->var_off);
14541 	bool dst_known = tnum_is_const(dst_reg->var_off);
14542 
14543 	if (src_known && dst_known) {
14544 		/* dst_reg->var_off.value has been updated earlier */
14545 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14546 		return;
14547 	}
14548 
14549 	/* We get both minimum and maximum from the var_off. */
14550 	dst_reg->umin_value = dst_reg->var_off.value;
14551 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14552 
14553 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14554 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14555 	 */
14556 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14557 		dst_reg->smin_value = dst_reg->umin_value;
14558 		dst_reg->smax_value = dst_reg->umax_value;
14559 	} else {
14560 		dst_reg->smin_value = S64_MIN;
14561 		dst_reg->smax_value = S64_MAX;
14562 	}
14563 
14564 	__update_reg_bounds(dst_reg);
14565 }
14566 
14567 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14568 				   u64 umin_val, u64 umax_val)
14569 {
14570 	/* We lose all sign bit information (except what we can pick
14571 	 * up from var_off)
14572 	 */
14573 	dst_reg->s32_min_value = S32_MIN;
14574 	dst_reg->s32_max_value = S32_MAX;
14575 	/* If we might shift our top bit out, then we know nothing */
14576 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
14577 		dst_reg->u32_min_value = 0;
14578 		dst_reg->u32_max_value = U32_MAX;
14579 	} else {
14580 		dst_reg->u32_min_value <<= umin_val;
14581 		dst_reg->u32_max_value <<= umax_val;
14582 	}
14583 }
14584 
14585 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14586 				 struct bpf_reg_state *src_reg)
14587 {
14588 	u32 umax_val = src_reg->u32_max_value;
14589 	u32 umin_val = src_reg->u32_min_value;
14590 	/* u32 alu operation will zext upper bits */
14591 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14592 
14593 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14594 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14595 	/* Not required but being careful mark reg64 bounds as unknown so
14596 	 * that we are forced to pick them up from tnum and zext later and
14597 	 * if some path skips this step we are still safe.
14598 	 */
14599 	__mark_reg64_unbounded(dst_reg);
14600 	__update_reg32_bounds(dst_reg);
14601 }
14602 
14603 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14604 				   u64 umin_val, u64 umax_val)
14605 {
14606 	/* Special case <<32 because it is a common compiler pattern to sign
14607 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14608 	 * because s32 bounds don't flip sign when shifting to the left by
14609 	 * 32bits.
14610 	 */
14611 	if (umin_val == 32 && umax_val == 32) {
14612 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
14613 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
14614 	} else {
14615 		dst_reg->smax_value = S64_MAX;
14616 		dst_reg->smin_value = S64_MIN;
14617 	}
14618 
14619 	/* If we might shift our top bit out, then we know nothing */
14620 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
14621 		dst_reg->umin_value = 0;
14622 		dst_reg->umax_value = U64_MAX;
14623 	} else {
14624 		dst_reg->umin_value <<= umin_val;
14625 		dst_reg->umax_value <<= umax_val;
14626 	}
14627 }
14628 
14629 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14630 			       struct bpf_reg_state *src_reg)
14631 {
14632 	u64 umax_val = src_reg->umax_value;
14633 	u64 umin_val = src_reg->umin_value;
14634 
14635 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14636 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14637 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14638 
14639 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14640 	/* We may learn something more from the var_off */
14641 	__update_reg_bounds(dst_reg);
14642 }
14643 
14644 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14645 				 struct bpf_reg_state *src_reg)
14646 {
14647 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14648 	u32 umax_val = src_reg->u32_max_value;
14649 	u32 umin_val = src_reg->u32_min_value;
14650 
14651 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14652 	 * be negative, then either:
14653 	 * 1) src_reg might be zero, so the sign bit of the result is
14654 	 *    unknown, so we lose our signed bounds
14655 	 * 2) it's known negative, thus the unsigned bounds capture the
14656 	 *    signed bounds
14657 	 * 3) the signed bounds cross zero, so they tell us nothing
14658 	 *    about the result
14659 	 * If the value in dst_reg is known nonnegative, then again the
14660 	 * unsigned bounds capture the signed bounds.
14661 	 * Thus, in all cases it suffices to blow away our signed bounds
14662 	 * and rely on inferring new ones from the unsigned bounds and
14663 	 * var_off of the result.
14664 	 */
14665 	dst_reg->s32_min_value = S32_MIN;
14666 	dst_reg->s32_max_value = S32_MAX;
14667 
14668 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14669 	dst_reg->u32_min_value >>= umax_val;
14670 	dst_reg->u32_max_value >>= umin_val;
14671 
14672 	__mark_reg64_unbounded(dst_reg);
14673 	__update_reg32_bounds(dst_reg);
14674 }
14675 
14676 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14677 			       struct bpf_reg_state *src_reg)
14678 {
14679 	u64 umax_val = src_reg->umax_value;
14680 	u64 umin_val = src_reg->umin_value;
14681 
14682 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14683 	 * be negative, then either:
14684 	 * 1) src_reg might be zero, so the sign bit of the result is
14685 	 *    unknown, so we lose our signed bounds
14686 	 * 2) it's known negative, thus the unsigned bounds capture the
14687 	 *    signed bounds
14688 	 * 3) the signed bounds cross zero, so they tell us nothing
14689 	 *    about the result
14690 	 * If the value in dst_reg is known nonnegative, then again the
14691 	 * unsigned bounds capture the signed bounds.
14692 	 * Thus, in all cases it suffices to blow away our signed bounds
14693 	 * and rely on inferring new ones from the unsigned bounds and
14694 	 * var_off of the result.
14695 	 */
14696 	dst_reg->smin_value = S64_MIN;
14697 	dst_reg->smax_value = S64_MAX;
14698 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14699 	dst_reg->umin_value >>= umax_val;
14700 	dst_reg->umax_value >>= umin_val;
14701 
14702 	/* Its not easy to operate on alu32 bounds here because it depends
14703 	 * on bits being shifted in. Take easy way out and mark unbounded
14704 	 * so we can recalculate later from tnum.
14705 	 */
14706 	__mark_reg32_unbounded(dst_reg);
14707 	__update_reg_bounds(dst_reg);
14708 }
14709 
14710 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14711 				  struct bpf_reg_state *src_reg)
14712 {
14713 	u64 umin_val = src_reg->u32_min_value;
14714 
14715 	/* Upon reaching here, src_known is true and
14716 	 * umax_val is equal to umin_val.
14717 	 */
14718 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
14719 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
14720 
14721 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14722 
14723 	/* blow away the dst_reg umin_value/umax_value and rely on
14724 	 * dst_reg var_off to refine the result.
14725 	 */
14726 	dst_reg->u32_min_value = 0;
14727 	dst_reg->u32_max_value = U32_MAX;
14728 
14729 	__mark_reg64_unbounded(dst_reg);
14730 	__update_reg32_bounds(dst_reg);
14731 }
14732 
14733 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14734 				struct bpf_reg_state *src_reg)
14735 {
14736 	u64 umin_val = src_reg->umin_value;
14737 
14738 	/* Upon reaching here, src_known is true and umax_val is equal
14739 	 * to umin_val.
14740 	 */
14741 	dst_reg->smin_value >>= umin_val;
14742 	dst_reg->smax_value >>= umin_val;
14743 
14744 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14745 
14746 	/* blow away the dst_reg umin_value/umax_value and rely on
14747 	 * dst_reg var_off to refine the result.
14748 	 */
14749 	dst_reg->umin_value = 0;
14750 	dst_reg->umax_value = U64_MAX;
14751 
14752 	/* Its not easy to operate on alu32 bounds here because it depends
14753 	 * on bits being shifted in from upper 32-bits. Take easy way out
14754 	 * and mark unbounded so we can recalculate later from tnum.
14755 	 */
14756 	__mark_reg32_unbounded(dst_reg);
14757 	__update_reg_bounds(dst_reg);
14758 }
14759 
14760 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14761 {
14762 	/*
14763 	 * Byte swap operation - update var_off using tnum_bswap.
14764 	 * Three cases:
14765 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14766 	 *    unconditional swap
14767 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14768 	 *    swap on big-endian, truncation or no-op on little-endian
14769 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14770 	 *    swap on little-endian, truncation or no-op on big-endian
14771 	 */
14772 
14773 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14774 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14775 	bool is_big_endian;
14776 #ifdef CONFIG_CPU_BIG_ENDIAN
14777 	is_big_endian = true;
14778 #else
14779 	is_big_endian = false;
14780 #endif
14781 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14782 	bool need_bswap = alu64 || (to_le == is_big_endian);
14783 
14784 	/*
14785 	 * If the register is mutated, manually reset its scalar ID to break
14786 	 * any existing ties and avoid incorrect bounds propagation.
14787 	 */
14788 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14789 		clear_scalar_id(dst_reg);
14790 
14791 	if (need_bswap) {
14792 		if (insn->imm == 16)
14793 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14794 		else if (insn->imm == 32)
14795 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14796 		else if (insn->imm == 64)
14797 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14798 		/*
14799 		 * Byteswap scrambles the range, so we must reset bounds.
14800 		 * Bounds will be re-derived from the new tnum later.
14801 		 */
14802 		__mark_reg_unbounded(dst_reg);
14803 	}
14804 	/* For bswap16/32, truncate dst register to match the swapped size */
14805 	if (insn->imm == 16 || insn->imm == 32)
14806 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14807 }
14808 
14809 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14810 					     const struct bpf_reg_state *src_reg)
14811 {
14812 	bool src_is_const = false;
14813 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14814 
14815 	if (insn_bitness == 32) {
14816 		if (tnum_subreg_is_const(src_reg->var_off)
14817 		    && src_reg->s32_min_value == src_reg->s32_max_value
14818 		    && src_reg->u32_min_value == src_reg->u32_max_value)
14819 			src_is_const = true;
14820 	} else {
14821 		if (tnum_is_const(src_reg->var_off)
14822 		    && src_reg->smin_value == src_reg->smax_value
14823 		    && src_reg->umin_value == src_reg->umax_value)
14824 			src_is_const = true;
14825 	}
14826 
14827 	switch (BPF_OP(insn->code)) {
14828 	case BPF_ADD:
14829 	case BPF_SUB:
14830 	case BPF_NEG:
14831 	case BPF_AND:
14832 	case BPF_XOR:
14833 	case BPF_OR:
14834 	case BPF_MUL:
14835 	case BPF_END:
14836 		return true;
14837 
14838 	/*
14839 	 * Division and modulo operators range is only safe to compute when the
14840 	 * divisor is a constant.
14841 	 */
14842 	case BPF_DIV:
14843 	case BPF_MOD:
14844 		return src_is_const;
14845 
14846 	/* Shift operators range is only computable if shift dimension operand
14847 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14848 	 * includes shifts by a negative number.
14849 	 */
14850 	case BPF_LSH:
14851 	case BPF_RSH:
14852 	case BPF_ARSH:
14853 		return (src_is_const && src_reg->umax_value < insn_bitness);
14854 	default:
14855 		return false;
14856 	}
14857 }
14858 
14859 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14860 			      struct bpf_reg_state *dst_reg)
14861 {
14862 	struct bpf_verifier_state *branch;
14863 	struct bpf_reg_state *regs;
14864 	bool alu32;
14865 
14866 	if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0)
14867 		alu32 = false;
14868 	else if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0)
14869 		alu32 = true;
14870 	else
14871 		return 0;
14872 
14873 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14874 	if (IS_ERR(branch))
14875 		return PTR_ERR(branch);
14876 
14877 	regs = branch->frame[branch->curframe]->regs;
14878 	if (alu32) {
14879 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14880 		__mark_reg32_known(dst_reg, -1ull);
14881 	} else {
14882 		__mark_reg_known(&regs[insn->dst_reg], 0);
14883 		__mark_reg_known(dst_reg, -1ull);
14884 	}
14885 	return 0;
14886 }
14887 
14888 /* WARNING: This function does calculations on 64-bit values, but the actual
14889  * execution may occur on 32-bit values. Therefore, things like bitshifts
14890  * need extra checks in the 32-bit case.
14891  */
14892 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14893 				      struct bpf_insn *insn,
14894 				      struct bpf_reg_state *dst_reg,
14895 				      struct bpf_reg_state src_reg)
14896 {
14897 	u8 opcode = BPF_OP(insn->code);
14898 	s16 off = insn->off;
14899 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14900 	int ret;
14901 
14902 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14903 		__mark_reg_unknown(env, dst_reg);
14904 		return 0;
14905 	}
14906 
14907 	if (sanitize_needed(opcode)) {
14908 		ret = sanitize_val_alu(env, insn);
14909 		if (ret < 0)
14910 			return sanitize_err(env, insn, ret, NULL, NULL);
14911 	}
14912 
14913 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14914 	 * There are two classes of instructions: The first class we track both
14915 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14916 	 * greatest amount of precision when alu operations are mixed with jmp32
14917 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14918 	 * and BPF_OR. This is possible because these ops have fairly easy to
14919 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14920 	 * See alu32 verifier tests for examples. The second class of
14921 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14922 	 * with regards to tracking sign/unsigned bounds because the bits may
14923 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14924 	 * the reg unbounded in the subreg bound space and use the resulting
14925 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14926 	 */
14927 	switch (opcode) {
14928 	case BPF_ADD:
14929 		scalar32_min_max_add(dst_reg, &src_reg);
14930 		scalar_min_max_add(dst_reg, &src_reg);
14931 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14932 		break;
14933 	case BPF_SUB:
14934 		scalar32_min_max_sub(dst_reg, &src_reg);
14935 		scalar_min_max_sub(dst_reg, &src_reg);
14936 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14937 		break;
14938 	case BPF_NEG:
14939 		env->fake_reg[0] = *dst_reg;
14940 		__mark_reg_known(dst_reg, 0);
14941 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14942 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14943 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14944 		break;
14945 	case BPF_MUL:
14946 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14947 		scalar32_min_max_mul(dst_reg, &src_reg);
14948 		scalar_min_max_mul(dst_reg, &src_reg);
14949 		break;
14950 	case BPF_DIV:
14951 		/* BPF div specification: x / 0 = 0 */
14952 		if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) {
14953 			___mark_reg_known(dst_reg, 0);
14954 			break;
14955 		}
14956 		if (alu32)
14957 			if (off == 1)
14958 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14959 			else
14960 				scalar32_min_max_udiv(dst_reg, &src_reg);
14961 		else
14962 			if (off == 1)
14963 				scalar_min_max_sdiv(dst_reg, &src_reg);
14964 			else
14965 				scalar_min_max_udiv(dst_reg, &src_reg);
14966 		break;
14967 	case BPF_MOD:
14968 		/* BPF mod specification: x % 0 = x */
14969 		if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0))
14970 			break;
14971 		if (alu32)
14972 			if (off == 1)
14973 				scalar32_min_max_smod(dst_reg, &src_reg);
14974 			else
14975 				scalar32_min_max_umod(dst_reg, &src_reg);
14976 		else
14977 			if (off == 1)
14978 				scalar_min_max_smod(dst_reg, &src_reg);
14979 			else
14980 				scalar_min_max_umod(dst_reg, &src_reg);
14981 		break;
14982 	case BPF_AND:
14983 		if (tnum_is_const(src_reg.var_off)) {
14984 			ret = maybe_fork_scalars(env, insn, dst_reg);
14985 			if (ret)
14986 				return ret;
14987 		}
14988 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14989 		scalar32_min_max_and(dst_reg, &src_reg);
14990 		scalar_min_max_and(dst_reg, &src_reg);
14991 		break;
14992 	case BPF_OR:
14993 		if (tnum_is_const(src_reg.var_off)) {
14994 			ret = maybe_fork_scalars(env, insn, dst_reg);
14995 			if (ret)
14996 				return ret;
14997 		}
14998 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14999 		scalar32_min_max_or(dst_reg, &src_reg);
15000 		scalar_min_max_or(dst_reg, &src_reg);
15001 		break;
15002 	case BPF_XOR:
15003 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
15004 		scalar32_min_max_xor(dst_reg, &src_reg);
15005 		scalar_min_max_xor(dst_reg, &src_reg);
15006 		break;
15007 	case BPF_LSH:
15008 		if (alu32)
15009 			scalar32_min_max_lsh(dst_reg, &src_reg);
15010 		else
15011 			scalar_min_max_lsh(dst_reg, &src_reg);
15012 		break;
15013 	case BPF_RSH:
15014 		if (alu32)
15015 			scalar32_min_max_rsh(dst_reg, &src_reg);
15016 		else
15017 			scalar_min_max_rsh(dst_reg, &src_reg);
15018 		break;
15019 	case BPF_ARSH:
15020 		if (alu32)
15021 			scalar32_min_max_arsh(dst_reg, &src_reg);
15022 		else
15023 			scalar_min_max_arsh(dst_reg, &src_reg);
15024 		break;
15025 	case BPF_END:
15026 		scalar_byte_swap(dst_reg, insn);
15027 		break;
15028 	default:
15029 		break;
15030 	}
15031 
15032 	/*
15033 	 * ALU32 ops are zero extended into 64bit register.
15034 	 *
15035 	 * BPF_END is already handled inside the helper (truncation),
15036 	 * so skip zext here to avoid unexpected zero extension.
15037 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
15038 	 * This is a 64bit byte swap operation with alu32==true,
15039 	 * but we should not zero extend the result.
15040 	 */
15041 	if (alu32 && opcode != BPF_END)
15042 		zext_32_to_64(dst_reg);
15043 	reg_bounds_sync(dst_reg);
15044 	return 0;
15045 }
15046 
15047 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
15048  * and var_off.
15049  */
15050 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
15051 				   struct bpf_insn *insn)
15052 {
15053 	struct bpf_verifier_state *vstate = env->cur_state;
15054 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15055 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
15056 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
15057 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
15058 	u8 opcode = BPF_OP(insn->code);
15059 	int err;
15060 
15061 	dst_reg = &regs[insn->dst_reg];
15062 	if (BPF_SRC(insn->code) == BPF_X)
15063 		src_reg = &regs[insn->src_reg];
15064 	else
15065 		src_reg = NULL;
15066 
15067 	/* Case where at least one operand is an arena. */
15068 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
15069 		struct bpf_insn_aux_data *aux = cur_aux(env);
15070 
15071 		if (dst_reg->type != PTR_TO_ARENA)
15072 			*dst_reg = *src_reg;
15073 
15074 		dst_reg->subreg_def = env->insn_idx + 1;
15075 
15076 		if (BPF_CLASS(insn->code) == BPF_ALU64)
15077 			/*
15078 			 * 32-bit operations zero upper bits automatically.
15079 			 * 64-bit operations need to be converted to 32.
15080 			 */
15081 			aux->needs_zext = true;
15082 
15083 		/* Any arithmetic operations are allowed on arena pointers */
15084 		return 0;
15085 	}
15086 
15087 	if (dst_reg->type != SCALAR_VALUE)
15088 		ptr_reg = dst_reg;
15089 
15090 	if (BPF_SRC(insn->code) == BPF_X) {
15091 		if (src_reg->type != SCALAR_VALUE) {
15092 			if (dst_reg->type != SCALAR_VALUE) {
15093 				/* Combining two pointers by any ALU op yields
15094 				 * an arbitrary scalar. Disallow all math except
15095 				 * pointer subtraction
15096 				 */
15097 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
15098 					mark_reg_unknown(env, regs, insn->dst_reg);
15099 					return 0;
15100 				}
15101 				verbose(env, "R%d pointer %s pointer prohibited\n",
15102 					insn->dst_reg,
15103 					bpf_alu_string[opcode >> 4]);
15104 				return -EACCES;
15105 			} else {
15106 				/* scalar += pointer
15107 				 * This is legal, but we have to reverse our
15108 				 * src/dest handling in computing the range
15109 				 */
15110 				err = mark_chain_precision(env, insn->dst_reg);
15111 				if (err)
15112 					return err;
15113 				return adjust_ptr_min_max_vals(env, insn,
15114 							       src_reg, dst_reg);
15115 			}
15116 		} else if (ptr_reg) {
15117 			/* pointer += scalar */
15118 			err = mark_chain_precision(env, insn->src_reg);
15119 			if (err)
15120 				return err;
15121 			return adjust_ptr_min_max_vals(env, insn,
15122 						       dst_reg, src_reg);
15123 		} else if (dst_reg->precise) {
15124 			/* if dst_reg is precise, src_reg should be precise as well */
15125 			err = mark_chain_precision(env, insn->src_reg);
15126 			if (err)
15127 				return err;
15128 		}
15129 	} else {
15130 		/* Pretend the src is a reg with a known value, since we only
15131 		 * need to be able to read from this state.
15132 		 */
15133 		off_reg.type = SCALAR_VALUE;
15134 		__mark_reg_known(&off_reg, insn->imm);
15135 		src_reg = &off_reg;
15136 		if (ptr_reg) /* pointer += K */
15137 			return adjust_ptr_min_max_vals(env, insn,
15138 						       ptr_reg, src_reg);
15139 	}
15140 
15141 	/* Got here implies adding two SCALAR_VALUEs */
15142 	if (WARN_ON_ONCE(ptr_reg)) {
15143 		print_verifier_state(env, vstate, vstate->curframe, true);
15144 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
15145 		return -EFAULT;
15146 	}
15147 	if (WARN_ON(!src_reg)) {
15148 		print_verifier_state(env, vstate, vstate->curframe, true);
15149 		verbose(env, "verifier internal error: no src_reg\n");
15150 		return -EFAULT;
15151 	}
15152 	/*
15153 	 * For alu32 linked register tracking, we need to check dst_reg's
15154 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
15155 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
15156 	 */
15157 	u64 dst_umax = dst_reg->umax_value;
15158 
15159 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
15160 	if (err)
15161 		return err;
15162 	/*
15163 	 * Compilers can generate the code
15164 	 * r1 = r2
15165 	 * r1 += 0x1
15166 	 * if r2 < 1000 goto ...
15167 	 * use r1 in memory access
15168 	 * So remember constant delta between r2 and r1 and update r1 after
15169 	 * 'if' condition.
15170 	 */
15171 	if (env->bpf_capable &&
15172 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
15173 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
15174 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
15175 		u64 val = reg_const_value(src_reg, alu32);
15176 		s32 off;
15177 
15178 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
15179 			goto clear_id;
15180 
15181 		if (alu32 && (dst_umax > U32_MAX))
15182 			goto clear_id;
15183 
15184 		off = (s32)val;
15185 
15186 		if (BPF_OP(insn->code) == BPF_SUB) {
15187 			/* Negating S32_MIN would overflow */
15188 			if (off == S32_MIN)
15189 				goto clear_id;
15190 			off = -off;
15191 		}
15192 
15193 		if (dst_reg->id & BPF_ADD_CONST) {
15194 			/*
15195 			 * If the register already went through rX += val
15196 			 * we cannot accumulate another val into rx->off.
15197 			 */
15198 clear_id:
15199 			clear_scalar_id(dst_reg);
15200 		} else {
15201 			if (alu32)
15202 				dst_reg->id |= BPF_ADD_CONST32;
15203 			else
15204 				dst_reg->id |= BPF_ADD_CONST64;
15205 			dst_reg->delta = off;
15206 		}
15207 	} else {
15208 		/*
15209 		 * Make sure ID is cleared otherwise dst_reg min/max could be
15210 		 * incorrectly propagated into other registers by sync_linked_regs()
15211 		 */
15212 		clear_scalar_id(dst_reg);
15213 	}
15214 	return 0;
15215 }
15216 
15217 /* check validity of 32-bit and 64-bit arithmetic operations */
15218 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
15219 {
15220 	struct bpf_reg_state *regs = cur_regs(env);
15221 	u8 opcode = BPF_OP(insn->code);
15222 	int err;
15223 
15224 	if (opcode == BPF_END || opcode == BPF_NEG) {
15225 		/* check src operand */
15226 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15227 		if (err)
15228 			return err;
15229 
15230 		if (is_pointer_value(env, insn->dst_reg)) {
15231 			verbose(env, "R%d pointer arithmetic prohibited\n",
15232 				insn->dst_reg);
15233 			return -EACCES;
15234 		}
15235 
15236 		/* check dest operand */
15237 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
15238 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15239 			err = err ?: adjust_scalar_min_max_vals(env, insn,
15240 							 &regs[insn->dst_reg],
15241 							 regs[insn->dst_reg]);
15242 		} else {
15243 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
15244 		}
15245 		if (err)
15246 			return err;
15247 
15248 	} else if (opcode == BPF_MOV) {
15249 
15250 		if (BPF_SRC(insn->code) == BPF_X) {
15251 			if (insn->off == BPF_ADDR_SPACE_CAST) {
15252 				if (!env->prog->aux->arena) {
15253 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
15254 					return -EINVAL;
15255 				}
15256 			}
15257 
15258 			/* check src operand */
15259 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15260 			if (err)
15261 				return err;
15262 		}
15263 
15264 		/* check dest operand, mark as required later */
15265 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15266 		if (err)
15267 			return err;
15268 
15269 		if (BPF_SRC(insn->code) == BPF_X) {
15270 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
15271 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
15272 
15273 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15274 				if (insn->imm) {
15275 					/* off == BPF_ADDR_SPACE_CAST */
15276 					mark_reg_unknown(env, regs, insn->dst_reg);
15277 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
15278 						dst_reg->type = PTR_TO_ARENA;
15279 						/* PTR_TO_ARENA is 32-bit */
15280 						dst_reg->subreg_def = env->insn_idx + 1;
15281 					}
15282 				} else if (insn->off == 0) {
15283 					/* case: R1 = R2
15284 					 * copy register state to dest reg
15285 					 */
15286 					assign_scalar_id_before_mov(env, src_reg);
15287 					copy_register_state(dst_reg, src_reg);
15288 					dst_reg->subreg_def = DEF_NOT_SUBREG;
15289 				} else {
15290 					/* case: R1 = (s8, s16 s32)R2 */
15291 					if (is_pointer_value(env, insn->src_reg)) {
15292 						verbose(env,
15293 							"R%d sign-extension part of pointer\n",
15294 							insn->src_reg);
15295 						return -EACCES;
15296 					} else if (src_reg->type == SCALAR_VALUE) {
15297 						bool no_sext;
15298 
15299 						no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15300 						if (no_sext)
15301 							assign_scalar_id_before_mov(env, src_reg);
15302 						copy_register_state(dst_reg, src_reg);
15303 						if (!no_sext)
15304 							clear_scalar_id(dst_reg);
15305 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
15306 						dst_reg->subreg_def = DEF_NOT_SUBREG;
15307 					} else {
15308 						mark_reg_unknown(env, regs, insn->dst_reg);
15309 					}
15310 				}
15311 			} else {
15312 				/* R1 = (u32) R2 */
15313 				if (is_pointer_value(env, insn->src_reg)) {
15314 					verbose(env,
15315 						"R%d partial copy of pointer\n",
15316 						insn->src_reg);
15317 					return -EACCES;
15318 				} else if (src_reg->type == SCALAR_VALUE) {
15319 					if (insn->off == 0) {
15320 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
15321 
15322 						if (is_src_reg_u32)
15323 							assign_scalar_id_before_mov(env, src_reg);
15324 						copy_register_state(dst_reg, src_reg);
15325 						/* Make sure ID is cleared if src_reg is not in u32
15326 						 * range otherwise dst_reg min/max could be incorrectly
15327 						 * propagated into src_reg by sync_linked_regs()
15328 						 */
15329 						if (!is_src_reg_u32)
15330 							clear_scalar_id(dst_reg);
15331 						dst_reg->subreg_def = env->insn_idx + 1;
15332 					} else {
15333 						/* case: W1 = (s8, s16)W2 */
15334 						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15335 
15336 						if (no_sext)
15337 							assign_scalar_id_before_mov(env, src_reg);
15338 						copy_register_state(dst_reg, src_reg);
15339 						if (!no_sext)
15340 							clear_scalar_id(dst_reg);
15341 						dst_reg->subreg_def = env->insn_idx + 1;
15342 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15343 					}
15344 				} else {
15345 					mark_reg_unknown(env, regs,
15346 							 insn->dst_reg);
15347 				}
15348 				zext_32_to_64(dst_reg);
15349 				reg_bounds_sync(dst_reg);
15350 			}
15351 		} else {
15352 			/* case: R = imm
15353 			 * remember the value we stored into this reg
15354 			 */
15355 			/* clear any state __mark_reg_known doesn't set */
15356 			mark_reg_unknown(env, regs, insn->dst_reg);
15357 			regs[insn->dst_reg].type = SCALAR_VALUE;
15358 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15359 				__mark_reg_known(regs + insn->dst_reg,
15360 						 insn->imm);
15361 			} else {
15362 				__mark_reg_known(regs + insn->dst_reg,
15363 						 (u32)insn->imm);
15364 			}
15365 		}
15366 
15367 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15368 
15369 		if (BPF_SRC(insn->code) == BPF_X) {
15370 			/* check src1 operand */
15371 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15372 			if (err)
15373 				return err;
15374 		}
15375 
15376 		/* check src2 operand */
15377 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15378 		if (err)
15379 			return err;
15380 
15381 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15382 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15383 			verbose(env, "div by zero\n");
15384 			return -EINVAL;
15385 		}
15386 
15387 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15388 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15389 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15390 
15391 			if (insn->imm < 0 || insn->imm >= size) {
15392 				verbose(env, "invalid shift %d\n", insn->imm);
15393 				return -EINVAL;
15394 			}
15395 		}
15396 
15397 		/* check dest operand */
15398 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15399 		err = err ?: adjust_reg_min_max_vals(env, insn);
15400 		if (err)
15401 			return err;
15402 	}
15403 
15404 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15405 }
15406 
15407 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15408 				   struct bpf_reg_state *dst_reg,
15409 				   enum bpf_reg_type type,
15410 				   bool range_right_open)
15411 {
15412 	struct bpf_func_state *state;
15413 	struct bpf_reg_state *reg;
15414 	int new_range;
15415 
15416 	if (dst_reg->umax_value == 0 && range_right_open)
15417 		/* This doesn't give us any range */
15418 		return;
15419 
15420 	if (dst_reg->umax_value > MAX_PACKET_OFF)
15421 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15422 		 * than pkt_end, but that's because it's also less than pkt.
15423 		 */
15424 		return;
15425 
15426 	new_range = dst_reg->umax_value;
15427 	if (range_right_open)
15428 		new_range++;
15429 
15430 	/* Examples for register markings:
15431 	 *
15432 	 * pkt_data in dst register:
15433 	 *
15434 	 *   r2 = r3;
15435 	 *   r2 += 8;
15436 	 *   if (r2 > pkt_end) goto <handle exception>
15437 	 *   <access okay>
15438 	 *
15439 	 *   r2 = r3;
15440 	 *   r2 += 8;
15441 	 *   if (r2 < pkt_end) goto <access okay>
15442 	 *   <handle exception>
15443 	 *
15444 	 *   Where:
15445 	 *     r2 == dst_reg, pkt_end == src_reg
15446 	 *     r2=pkt(id=n,off=8,r=0)
15447 	 *     r3=pkt(id=n,off=0,r=0)
15448 	 *
15449 	 * pkt_data in src register:
15450 	 *
15451 	 *   r2 = r3;
15452 	 *   r2 += 8;
15453 	 *   if (pkt_end >= r2) goto <access okay>
15454 	 *   <handle exception>
15455 	 *
15456 	 *   r2 = r3;
15457 	 *   r2 += 8;
15458 	 *   if (pkt_end <= r2) goto <handle exception>
15459 	 *   <access okay>
15460 	 *
15461 	 *   Where:
15462 	 *     pkt_end == dst_reg, r2 == src_reg
15463 	 *     r2=pkt(id=n,off=8,r=0)
15464 	 *     r3=pkt(id=n,off=0,r=0)
15465 	 *
15466 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15467 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15468 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15469 	 * the check.
15470 	 */
15471 
15472 	/* If our ids match, then we must have the same max_value.  And we
15473 	 * don't care about the other reg's fixed offset, since if it's too big
15474 	 * the range won't allow anything.
15475 	 * dst_reg->umax_value is known < MAX_PACKET_OFF, therefore it fits in a u16.
15476 	 */
15477 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15478 		if (reg->type == type && reg->id == dst_reg->id)
15479 			/* keep the maximum range already checked */
15480 			reg->range = max(reg->range, new_range);
15481 	}));
15482 }
15483 
15484 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15485 				u8 opcode, bool is_jmp32);
15486 static u8 rev_opcode(u8 opcode);
15487 
15488 /*
15489  * Learn more information about live branches by simulating refinement on both branches.
15490  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15491  * that branch is dead.
15492  */
15493 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15494 {
15495 	/* Fallthrough (FALSE) branch */
15496 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15497 	reg_bounds_sync(&env->false_reg1);
15498 	reg_bounds_sync(&env->false_reg2);
15499 	/*
15500 	 * If there is a range bounds violation in *any* of the abstract values in either
15501 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15502 	 * TRUE branch will be taken.
15503 	 */
15504 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15505 		return 1;
15506 
15507 	/* Jump (TRUE) branch */
15508 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15509 	reg_bounds_sync(&env->true_reg1);
15510 	reg_bounds_sync(&env->true_reg2);
15511 	/*
15512 	 * If there is a range bounds violation in *any* of the abstract values in either
15513 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15514 	 * Only FALSE branch will be taken.
15515 	 */
15516 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15517 		return 0;
15518 
15519 	/* Both branches are possible, we can't determine which one will be taken. */
15520 	return -1;
15521 }
15522 
15523 /*
15524  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15525  */
15526 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15527 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15528 {
15529 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15530 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15531 	u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
15532 	u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
15533 	s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
15534 	s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
15535 	u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
15536 	u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
15537 	s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
15538 	s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
15539 
15540 	if (reg1 == reg2) {
15541 		switch (opcode) {
15542 		case BPF_JGE:
15543 		case BPF_JLE:
15544 		case BPF_JSGE:
15545 		case BPF_JSLE:
15546 		case BPF_JEQ:
15547 			return 1;
15548 		case BPF_JGT:
15549 		case BPF_JLT:
15550 		case BPF_JSGT:
15551 		case BPF_JSLT:
15552 		case BPF_JNE:
15553 			return 0;
15554 		case BPF_JSET:
15555 			if (tnum_is_const(t1))
15556 				return t1.value != 0;
15557 			else
15558 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15559 		default:
15560 			return -1;
15561 		}
15562 	}
15563 
15564 	switch (opcode) {
15565 	case BPF_JEQ:
15566 		/* constants, umin/umax and smin/smax checks would be
15567 		 * redundant in this case because they all should match
15568 		 */
15569 		if (tnum_is_const(t1) && tnum_is_const(t2))
15570 			return t1.value == t2.value;
15571 		if (!tnum_overlap(t1, t2))
15572 			return 0;
15573 		/* non-overlapping ranges */
15574 		if (umin1 > umax2 || umax1 < umin2)
15575 			return 0;
15576 		if (smin1 > smax2 || smax1 < smin2)
15577 			return 0;
15578 		if (!is_jmp32) {
15579 			/* if 64-bit ranges are inconclusive, see if we can
15580 			 * utilize 32-bit subrange knowledge to eliminate
15581 			 * branches that can't be taken a priori
15582 			 */
15583 			if (reg1->u32_min_value > reg2->u32_max_value ||
15584 			    reg1->u32_max_value < reg2->u32_min_value)
15585 				return 0;
15586 			if (reg1->s32_min_value > reg2->s32_max_value ||
15587 			    reg1->s32_max_value < reg2->s32_min_value)
15588 				return 0;
15589 		}
15590 		break;
15591 	case BPF_JNE:
15592 		/* constants, umin/umax and smin/smax checks would be
15593 		 * redundant in this case because they all should match
15594 		 */
15595 		if (tnum_is_const(t1) && tnum_is_const(t2))
15596 			return t1.value != t2.value;
15597 		if (!tnum_overlap(t1, t2))
15598 			return 1;
15599 		/* non-overlapping ranges */
15600 		if (umin1 > umax2 || umax1 < umin2)
15601 			return 1;
15602 		if (smin1 > smax2 || smax1 < smin2)
15603 			return 1;
15604 		if (!is_jmp32) {
15605 			/* if 64-bit ranges are inconclusive, see if we can
15606 			 * utilize 32-bit subrange knowledge to eliminate
15607 			 * branches that can't be taken a priori
15608 			 */
15609 			if (reg1->u32_min_value > reg2->u32_max_value ||
15610 			    reg1->u32_max_value < reg2->u32_min_value)
15611 				return 1;
15612 			if (reg1->s32_min_value > reg2->s32_max_value ||
15613 			    reg1->s32_max_value < reg2->s32_min_value)
15614 				return 1;
15615 		}
15616 		break;
15617 	case BPF_JSET:
15618 		if (!is_reg_const(reg2, is_jmp32)) {
15619 			swap(reg1, reg2);
15620 			swap(t1, t2);
15621 		}
15622 		if (!is_reg_const(reg2, is_jmp32))
15623 			return -1;
15624 		if ((~t1.mask & t1.value) & t2.value)
15625 			return 1;
15626 		if (!((t1.mask | t1.value) & t2.value))
15627 			return 0;
15628 		break;
15629 	case BPF_JGT:
15630 		if (umin1 > umax2)
15631 			return 1;
15632 		else if (umax1 <= umin2)
15633 			return 0;
15634 		break;
15635 	case BPF_JSGT:
15636 		if (smin1 > smax2)
15637 			return 1;
15638 		else if (smax1 <= smin2)
15639 			return 0;
15640 		break;
15641 	case BPF_JLT:
15642 		if (umax1 < umin2)
15643 			return 1;
15644 		else if (umin1 >= umax2)
15645 			return 0;
15646 		break;
15647 	case BPF_JSLT:
15648 		if (smax1 < smin2)
15649 			return 1;
15650 		else if (smin1 >= smax2)
15651 			return 0;
15652 		break;
15653 	case BPF_JGE:
15654 		if (umin1 >= umax2)
15655 			return 1;
15656 		else if (umax1 < umin2)
15657 			return 0;
15658 		break;
15659 	case BPF_JSGE:
15660 		if (smin1 >= smax2)
15661 			return 1;
15662 		else if (smax1 < smin2)
15663 			return 0;
15664 		break;
15665 	case BPF_JLE:
15666 		if (umax1 <= umin2)
15667 			return 1;
15668 		else if (umin1 > umax2)
15669 			return 0;
15670 		break;
15671 	case BPF_JSLE:
15672 		if (smax1 <= smin2)
15673 			return 1;
15674 		else if (smin1 > smax2)
15675 			return 0;
15676 		break;
15677 	}
15678 
15679 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15680 }
15681 
15682 static int flip_opcode(u32 opcode)
15683 {
15684 	/* How can we transform "a <op> b" into "b <op> a"? */
15685 	static const u8 opcode_flip[16] = {
15686 		/* these stay the same */
15687 		[BPF_JEQ  >> 4] = BPF_JEQ,
15688 		[BPF_JNE  >> 4] = BPF_JNE,
15689 		[BPF_JSET >> 4] = BPF_JSET,
15690 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15691 		[BPF_JGE  >> 4] = BPF_JLE,
15692 		[BPF_JGT  >> 4] = BPF_JLT,
15693 		[BPF_JLE  >> 4] = BPF_JGE,
15694 		[BPF_JLT  >> 4] = BPF_JGT,
15695 		[BPF_JSGE >> 4] = BPF_JSLE,
15696 		[BPF_JSGT >> 4] = BPF_JSLT,
15697 		[BPF_JSLE >> 4] = BPF_JSGE,
15698 		[BPF_JSLT >> 4] = BPF_JSGT
15699 	};
15700 	return opcode_flip[opcode >> 4];
15701 }
15702 
15703 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15704 				   struct bpf_reg_state *src_reg,
15705 				   u8 opcode)
15706 {
15707 	struct bpf_reg_state *pkt;
15708 
15709 	if (src_reg->type == PTR_TO_PACKET_END) {
15710 		pkt = dst_reg;
15711 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15712 		pkt = src_reg;
15713 		opcode = flip_opcode(opcode);
15714 	} else {
15715 		return -1;
15716 	}
15717 
15718 	if (pkt->range >= 0)
15719 		return -1;
15720 
15721 	switch (opcode) {
15722 	case BPF_JLE:
15723 		/* pkt <= pkt_end */
15724 		fallthrough;
15725 	case BPF_JGT:
15726 		/* pkt > pkt_end */
15727 		if (pkt->range == BEYOND_PKT_END)
15728 			/* pkt has at last one extra byte beyond pkt_end */
15729 			return opcode == BPF_JGT;
15730 		break;
15731 	case BPF_JLT:
15732 		/* pkt < pkt_end */
15733 		fallthrough;
15734 	case BPF_JGE:
15735 		/* pkt >= pkt_end */
15736 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15737 			return opcode == BPF_JGE;
15738 		break;
15739 	}
15740 	return -1;
15741 }
15742 
15743 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15744  * and return:
15745  *  1 - branch will be taken and "goto target" will be executed
15746  *  0 - branch will not be taken and fall-through to next insn
15747  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15748  *      range [0,10]
15749  */
15750 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15751 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15752 {
15753 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15754 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15755 
15756 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15757 		u64 val;
15758 
15759 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15760 		if (!is_reg_const(reg2, is_jmp32)) {
15761 			opcode = flip_opcode(opcode);
15762 			swap(reg1, reg2);
15763 		}
15764 		/* and ensure that reg2 is a constant */
15765 		if (!is_reg_const(reg2, is_jmp32))
15766 			return -1;
15767 
15768 		if (!reg_not_null(reg1))
15769 			return -1;
15770 
15771 		/* If pointer is valid tests against zero will fail so we can
15772 		 * use this to direct branch taken.
15773 		 */
15774 		val = reg_const_value(reg2, is_jmp32);
15775 		if (val != 0)
15776 			return -1;
15777 
15778 		switch (opcode) {
15779 		case BPF_JEQ:
15780 			return 0;
15781 		case BPF_JNE:
15782 			return 1;
15783 		default:
15784 			return -1;
15785 		}
15786 	}
15787 
15788 	/* now deal with two scalars, but not necessarily constants */
15789 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15790 }
15791 
15792 /* Opcode that corresponds to a *false* branch condition.
15793  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15794  */
15795 static u8 rev_opcode(u8 opcode)
15796 {
15797 	switch (opcode) {
15798 	case BPF_JEQ:		return BPF_JNE;
15799 	case BPF_JNE:		return BPF_JEQ;
15800 	/* JSET doesn't have it's reverse opcode in BPF, so add
15801 	 * BPF_X flag to denote the reverse of that operation
15802 	 */
15803 	case BPF_JSET:		return BPF_JSET | BPF_X;
15804 	case BPF_JSET | BPF_X:	return BPF_JSET;
15805 	case BPF_JGE:		return BPF_JLT;
15806 	case BPF_JGT:		return BPF_JLE;
15807 	case BPF_JLE:		return BPF_JGT;
15808 	case BPF_JLT:		return BPF_JGE;
15809 	case BPF_JSGE:		return BPF_JSLT;
15810 	case BPF_JSGT:		return BPF_JSLE;
15811 	case BPF_JSLE:		return BPF_JSGT;
15812 	case BPF_JSLT:		return BPF_JSGE;
15813 	default:		return 0;
15814 	}
15815 }
15816 
15817 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
15818 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15819 				u8 opcode, bool is_jmp32)
15820 {
15821 	struct tnum t;
15822 	u64 val;
15823 
15824 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15825 	switch (opcode) {
15826 	case BPF_JGE:
15827 	case BPF_JGT:
15828 	case BPF_JSGE:
15829 	case BPF_JSGT:
15830 		opcode = flip_opcode(opcode);
15831 		swap(reg1, reg2);
15832 		break;
15833 	default:
15834 		break;
15835 	}
15836 
15837 	switch (opcode) {
15838 	case BPF_JEQ:
15839 		if (is_jmp32) {
15840 			reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15841 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15842 			reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15843 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15844 			reg2->u32_min_value = reg1->u32_min_value;
15845 			reg2->u32_max_value = reg1->u32_max_value;
15846 			reg2->s32_min_value = reg1->s32_min_value;
15847 			reg2->s32_max_value = reg1->s32_max_value;
15848 
15849 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15850 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15851 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15852 		} else {
15853 			reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
15854 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15855 			reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
15856 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15857 			reg2->umin_value = reg1->umin_value;
15858 			reg2->umax_value = reg1->umax_value;
15859 			reg2->smin_value = reg1->smin_value;
15860 			reg2->smax_value = reg1->smax_value;
15861 
15862 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15863 			reg2->var_off = reg1->var_off;
15864 		}
15865 		break;
15866 	case BPF_JNE:
15867 		if (!is_reg_const(reg2, is_jmp32))
15868 			swap(reg1, reg2);
15869 		if (!is_reg_const(reg2, is_jmp32))
15870 			break;
15871 
15872 		/* try to recompute the bound of reg1 if reg2 is a const and
15873 		 * is exactly the edge of reg1.
15874 		 */
15875 		val = reg_const_value(reg2, is_jmp32);
15876 		if (is_jmp32) {
15877 			/* u32_min_value is not equal to 0xffffffff at this point,
15878 			 * because otherwise u32_max_value is 0xffffffff as well,
15879 			 * in such a case both reg1 and reg2 would be constants,
15880 			 * jump would be predicted and regs_refine_cond_op()
15881 			 * wouldn't be called.
15882 			 *
15883 			 * Same reasoning works for all {u,s}{min,max}{32,64} cases
15884 			 * below.
15885 			 */
15886 			if (reg1->u32_min_value == (u32)val)
15887 				reg1->u32_min_value++;
15888 			if (reg1->u32_max_value == (u32)val)
15889 				reg1->u32_max_value--;
15890 			if (reg1->s32_min_value == (s32)val)
15891 				reg1->s32_min_value++;
15892 			if (reg1->s32_max_value == (s32)val)
15893 				reg1->s32_max_value--;
15894 		} else {
15895 			if (reg1->umin_value == (u64)val)
15896 				reg1->umin_value++;
15897 			if (reg1->umax_value == (u64)val)
15898 				reg1->umax_value--;
15899 			if (reg1->smin_value == (s64)val)
15900 				reg1->smin_value++;
15901 			if (reg1->smax_value == (s64)val)
15902 				reg1->smax_value--;
15903 		}
15904 		break;
15905 	case BPF_JSET:
15906 		if (!is_reg_const(reg2, is_jmp32))
15907 			swap(reg1, reg2);
15908 		if (!is_reg_const(reg2, is_jmp32))
15909 			break;
15910 		val = reg_const_value(reg2, is_jmp32);
15911 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15912 		 * requires single bit to learn something useful. E.g., if we
15913 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15914 		 * are actually set? We can learn something definite only if
15915 		 * it's a single-bit value to begin with.
15916 		 *
15917 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15918 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15919 		 * bit 1 is set, which we can readily use in adjustments.
15920 		 */
15921 		if (!is_power_of_2(val))
15922 			break;
15923 		if (is_jmp32) {
15924 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15925 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15926 		} else {
15927 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15928 		}
15929 		break;
15930 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15931 		if (!is_reg_const(reg2, is_jmp32))
15932 			swap(reg1, reg2);
15933 		if (!is_reg_const(reg2, is_jmp32))
15934 			break;
15935 		val = reg_const_value(reg2, is_jmp32);
15936 		/* Forget the ranges before narrowing tnums, to avoid invariant
15937 		 * violations if we're on a dead branch.
15938 		 */
15939 		__mark_reg_unbounded(reg1);
15940 		if (is_jmp32) {
15941 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15942 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15943 		} else {
15944 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15945 		}
15946 		break;
15947 	case BPF_JLE:
15948 		if (is_jmp32) {
15949 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15950 			reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15951 		} else {
15952 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15953 			reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
15954 		}
15955 		break;
15956 	case BPF_JLT:
15957 		if (is_jmp32) {
15958 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
15959 			reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
15960 		} else {
15961 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
15962 			reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
15963 		}
15964 		break;
15965 	case BPF_JSLE:
15966 		if (is_jmp32) {
15967 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15968 			reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15969 		} else {
15970 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15971 			reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
15972 		}
15973 		break;
15974 	case BPF_JSLT:
15975 		if (is_jmp32) {
15976 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
15977 			reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
15978 		} else {
15979 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
15980 			reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
15981 		}
15982 		break;
15983 	default:
15984 		return;
15985 	}
15986 }
15987 
15988 /* Check for invariant violations on the registers for both branches of a condition */
15989 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15990 {
15991 	int err;
15992 
15993 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15994 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15995 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15996 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15997 	return err;
15998 }
15999 
16000 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
16001 				 struct bpf_reg_state *reg, u32 id,
16002 				 bool is_null)
16003 {
16004 	if (type_may_be_null(reg->type) && reg->id == id &&
16005 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
16006 		/* Old offset should have been known-zero, because we don't
16007 		 * allow pointer arithmetic on pointers that might be NULL.
16008 		 * If we see this happening, don't convert the register.
16009 		 *
16010 		 * But in some cases, some helpers that return local kptrs
16011 		 * advance offset for the returned pointer. In those cases,
16012 		 * it is fine to expect to see reg->var_off.
16013 		 */
16014 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
16015 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
16016 			return;
16017 		if (is_null) {
16018 			/* We don't need id and ref_obj_id from this point
16019 			 * onwards anymore, thus we should better reset it,
16020 			 * so that state pruning has chances to take effect.
16021 			 */
16022 			__mark_reg_known_zero(reg);
16023 			reg->type = SCALAR_VALUE;
16024 
16025 			return;
16026 		}
16027 
16028 		mark_ptr_not_null_reg(reg);
16029 
16030 		if (!reg_may_point_to_spin_lock(reg)) {
16031 			/* For not-NULL ptr, reg->ref_obj_id will be reset
16032 			 * in release_reference().
16033 			 *
16034 			 * reg->id is still used by spin_lock ptr. Other
16035 			 * than spin_lock ptr type, reg->id can be reset.
16036 			 */
16037 			reg->id = 0;
16038 		}
16039 	}
16040 }
16041 
16042 /* The logic is similar to find_good_pkt_pointers(), both could eventually
16043  * be folded together at some point.
16044  */
16045 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
16046 				  bool is_null)
16047 {
16048 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
16049 	struct bpf_reg_state *regs = state->regs, *reg;
16050 	u32 ref_obj_id = regs[regno].ref_obj_id;
16051 	u32 id = regs[regno].id;
16052 
16053 	if (ref_obj_id && ref_obj_id == id && is_null)
16054 		/* regs[regno] is in the " == NULL" branch.
16055 		 * No one could have freed the reference state before
16056 		 * doing the NULL check.
16057 		 */
16058 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
16059 
16060 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
16061 		mark_ptr_or_null_reg(state, reg, id, is_null);
16062 	}));
16063 }
16064 
16065 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
16066 				   struct bpf_reg_state *dst_reg,
16067 				   struct bpf_reg_state *src_reg,
16068 				   struct bpf_verifier_state *this_branch,
16069 				   struct bpf_verifier_state *other_branch)
16070 {
16071 	if (BPF_SRC(insn->code) != BPF_X)
16072 		return false;
16073 
16074 	/* Pointers are always 64-bit. */
16075 	if (BPF_CLASS(insn->code) == BPF_JMP32)
16076 		return false;
16077 
16078 	switch (BPF_OP(insn->code)) {
16079 	case BPF_JGT:
16080 		if ((dst_reg->type == PTR_TO_PACKET &&
16081 		     src_reg->type == PTR_TO_PACKET_END) ||
16082 		    (dst_reg->type == PTR_TO_PACKET_META &&
16083 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16084 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
16085 			find_good_pkt_pointers(this_branch, dst_reg,
16086 					       dst_reg->type, false);
16087 			mark_pkt_end(other_branch, insn->dst_reg, true);
16088 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16089 			    src_reg->type == PTR_TO_PACKET) ||
16090 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16091 			    src_reg->type == PTR_TO_PACKET_META)) {
16092 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
16093 			find_good_pkt_pointers(other_branch, src_reg,
16094 					       src_reg->type, true);
16095 			mark_pkt_end(this_branch, insn->src_reg, false);
16096 		} else {
16097 			return false;
16098 		}
16099 		break;
16100 	case BPF_JLT:
16101 		if ((dst_reg->type == PTR_TO_PACKET &&
16102 		     src_reg->type == PTR_TO_PACKET_END) ||
16103 		    (dst_reg->type == PTR_TO_PACKET_META &&
16104 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16105 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
16106 			find_good_pkt_pointers(other_branch, dst_reg,
16107 					       dst_reg->type, true);
16108 			mark_pkt_end(this_branch, insn->dst_reg, false);
16109 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16110 			    src_reg->type == PTR_TO_PACKET) ||
16111 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16112 			    src_reg->type == PTR_TO_PACKET_META)) {
16113 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
16114 			find_good_pkt_pointers(this_branch, src_reg,
16115 					       src_reg->type, false);
16116 			mark_pkt_end(other_branch, insn->src_reg, true);
16117 		} else {
16118 			return false;
16119 		}
16120 		break;
16121 	case BPF_JGE:
16122 		if ((dst_reg->type == PTR_TO_PACKET &&
16123 		     src_reg->type == PTR_TO_PACKET_END) ||
16124 		    (dst_reg->type == PTR_TO_PACKET_META &&
16125 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16126 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
16127 			find_good_pkt_pointers(this_branch, dst_reg,
16128 					       dst_reg->type, true);
16129 			mark_pkt_end(other_branch, insn->dst_reg, false);
16130 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16131 			    src_reg->type == PTR_TO_PACKET) ||
16132 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16133 			    src_reg->type == PTR_TO_PACKET_META)) {
16134 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
16135 			find_good_pkt_pointers(other_branch, src_reg,
16136 					       src_reg->type, false);
16137 			mark_pkt_end(this_branch, insn->src_reg, true);
16138 		} else {
16139 			return false;
16140 		}
16141 		break;
16142 	case BPF_JLE:
16143 		if ((dst_reg->type == PTR_TO_PACKET &&
16144 		     src_reg->type == PTR_TO_PACKET_END) ||
16145 		    (dst_reg->type == PTR_TO_PACKET_META &&
16146 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16147 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
16148 			find_good_pkt_pointers(other_branch, dst_reg,
16149 					       dst_reg->type, false);
16150 			mark_pkt_end(this_branch, insn->dst_reg, true);
16151 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16152 			    src_reg->type == PTR_TO_PACKET) ||
16153 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16154 			    src_reg->type == PTR_TO_PACKET_META)) {
16155 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
16156 			find_good_pkt_pointers(this_branch, src_reg,
16157 					       src_reg->type, true);
16158 			mark_pkt_end(other_branch, insn->src_reg, false);
16159 		} else {
16160 			return false;
16161 		}
16162 		break;
16163 	default:
16164 		return false;
16165 	}
16166 
16167 	return true;
16168 }
16169 
16170 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
16171 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
16172 {
16173 	struct linked_reg *e;
16174 
16175 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
16176 		return;
16177 
16178 	e = linked_regs_push(reg_set);
16179 	if (e) {
16180 		e->frameno = frameno;
16181 		e->is_reg = is_reg;
16182 		e->regno = spi_or_reg;
16183 	} else {
16184 		clear_scalar_id(reg);
16185 	}
16186 }
16187 
16188 /* For all R being scalar registers or spilled scalar registers
16189  * in verifier state, save R in linked_regs if R->id == id.
16190  * If there are too many Rs sharing same id, reset id for leftover Rs.
16191  */
16192 static void collect_linked_regs(struct bpf_verifier_env *env,
16193 				struct bpf_verifier_state *vstate,
16194 				u32 id,
16195 				struct linked_regs *linked_regs)
16196 {
16197 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
16198 	struct bpf_func_state *func;
16199 	struct bpf_reg_state *reg;
16200 	u16 live_regs;
16201 	int i, j;
16202 
16203 	id = id & ~BPF_ADD_CONST;
16204 	for (i = vstate->curframe; i >= 0; i--) {
16205 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
16206 		func = vstate->frame[i];
16207 		for (j = 0; j < BPF_REG_FP; j++) {
16208 			if (!(live_regs & BIT(j)))
16209 				continue;
16210 			reg = &func->regs[j];
16211 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
16212 		}
16213 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
16214 			if (!bpf_is_spilled_reg(&func->stack[j]))
16215 				continue;
16216 			reg = &func->stack[j].spilled_ptr;
16217 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
16218 		}
16219 	}
16220 }
16221 
16222 /* For all R in linked_regs, copy known_reg range into R
16223  * if R->id == known_reg->id.
16224  */
16225 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
16226 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
16227 {
16228 	struct bpf_reg_state fake_reg;
16229 	struct bpf_reg_state *reg;
16230 	struct linked_reg *e;
16231 	int i;
16232 
16233 	for (i = 0; i < linked_regs->cnt; ++i) {
16234 		e = &linked_regs->entries[i];
16235 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
16236 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
16237 		if (reg->type != SCALAR_VALUE || reg == known_reg)
16238 			continue;
16239 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
16240 			continue;
16241 		/*
16242 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
16243 		 * hold across different ALU widths.
16244 		 */
16245 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
16246 			continue;
16247 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
16248 		    reg->delta == known_reg->delta) {
16249 			s32 saved_subreg_def = reg->subreg_def;
16250 
16251 			copy_register_state(reg, known_reg);
16252 			reg->subreg_def = saved_subreg_def;
16253 		} else {
16254 			s32 saved_subreg_def = reg->subreg_def;
16255 			s32 saved_off = reg->delta;
16256 			u32 saved_id = reg->id;
16257 
16258 			fake_reg.type = SCALAR_VALUE;
16259 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
16260 
16261 			/* reg = known_reg; reg += delta */
16262 			copy_register_state(reg, known_reg);
16263 			/*
16264 			 * Must preserve off, id and subreg_def flag,
16265 			 * otherwise another sync_linked_regs() will be incorrect.
16266 			 */
16267 			reg->delta = saved_off;
16268 			reg->id = saved_id;
16269 			reg->subreg_def = saved_subreg_def;
16270 
16271 			scalar32_min_max_add(reg, &fake_reg);
16272 			scalar_min_max_add(reg, &fake_reg);
16273 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
16274 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
16275 				zext_32_to_64(reg);
16276 			reg_bounds_sync(reg);
16277 		}
16278 		if (e->is_reg)
16279 			mark_reg_scratched(env, e->regno);
16280 		else
16281 			mark_stack_slot_scratched(env, e->spi);
16282 	}
16283 }
16284 
16285 static int check_cond_jmp_op(struct bpf_verifier_env *env,
16286 			     struct bpf_insn *insn, int *insn_idx)
16287 {
16288 	struct bpf_verifier_state *this_branch = env->cur_state;
16289 	struct bpf_verifier_state *other_branch;
16290 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
16291 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
16292 	struct bpf_reg_state *eq_branch_regs;
16293 	struct linked_regs linked_regs = {};
16294 	u8 opcode = BPF_OP(insn->code);
16295 	int insn_flags = 0;
16296 	bool is_jmp32;
16297 	int pred = -1;
16298 	int err;
16299 
16300 	/* Only conditional jumps are expected to reach here. */
16301 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
16302 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
16303 		return -EINVAL;
16304 	}
16305 
16306 	if (opcode == BPF_JCOND) {
16307 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
16308 		int idx = *insn_idx;
16309 
16310 		prev_st = find_prev_entry(env, cur_st->parent, idx);
16311 
16312 		/* branch out 'fallthrough' insn as a new state to explore */
16313 		queued_st = push_stack(env, idx + 1, idx, false);
16314 		if (IS_ERR(queued_st))
16315 			return PTR_ERR(queued_st);
16316 
16317 		queued_st->may_goto_depth++;
16318 		if (prev_st)
16319 			widen_imprecise_scalars(env, prev_st, queued_st);
16320 		*insn_idx += insn->off;
16321 		return 0;
16322 	}
16323 
16324 	/* check src2 operand */
16325 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16326 	if (err)
16327 		return err;
16328 
16329 	dst_reg = &regs[insn->dst_reg];
16330 	if (BPF_SRC(insn->code) == BPF_X) {
16331 		/* check src1 operand */
16332 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16333 		if (err)
16334 			return err;
16335 
16336 		src_reg = &regs[insn->src_reg];
16337 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
16338 		    is_pointer_value(env, insn->src_reg)) {
16339 			verbose(env, "R%d pointer comparison prohibited\n",
16340 				insn->src_reg);
16341 			return -EACCES;
16342 		}
16343 
16344 		if (src_reg->type == PTR_TO_STACK)
16345 			insn_flags |= INSN_F_SRC_REG_STACK;
16346 		if (dst_reg->type == PTR_TO_STACK)
16347 			insn_flags |= INSN_F_DST_REG_STACK;
16348 	} else {
16349 		src_reg = &env->fake_reg[0];
16350 		memset(src_reg, 0, sizeof(*src_reg));
16351 		src_reg->type = SCALAR_VALUE;
16352 		__mark_reg_known(src_reg, insn->imm);
16353 
16354 		if (dst_reg->type == PTR_TO_STACK)
16355 			insn_flags |= INSN_F_DST_REG_STACK;
16356 	}
16357 
16358 	if (insn_flags) {
16359 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0);
16360 		if (err)
16361 			return err;
16362 	}
16363 
16364 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
16365 	copy_register_state(&env->false_reg1, dst_reg);
16366 	copy_register_state(&env->false_reg2, src_reg);
16367 	copy_register_state(&env->true_reg1, dst_reg);
16368 	copy_register_state(&env->true_reg2, src_reg);
16369 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
16370 	if (pred >= 0) {
16371 		/* If we get here with a dst_reg pointer type it is because
16372 		 * above is_branch_taken() special cased the 0 comparison.
16373 		 */
16374 		if (!__is_pointer_value(false, dst_reg))
16375 			err = mark_chain_precision(env, insn->dst_reg);
16376 		if (BPF_SRC(insn->code) == BPF_X && !err &&
16377 		    !__is_pointer_value(false, src_reg))
16378 			err = mark_chain_precision(env, insn->src_reg);
16379 		if (err)
16380 			return err;
16381 	}
16382 
16383 	if (pred == 1) {
16384 		/* Only follow the goto, ignore fall-through. If needed, push
16385 		 * the fall-through branch for simulation under speculative
16386 		 * execution.
16387 		 */
16388 		if (!env->bypass_spec_v1) {
16389 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
16390 			if (err < 0)
16391 				return err;
16392 		}
16393 		if (env->log.level & BPF_LOG_LEVEL)
16394 			print_insn_state(env, this_branch, this_branch->curframe);
16395 		*insn_idx += insn->off;
16396 		return 0;
16397 	} else if (pred == 0) {
16398 		/* Only follow the fall-through branch, since that's where the
16399 		 * program will go. If needed, push the goto branch for
16400 		 * simulation under speculative execution.
16401 		 */
16402 		if (!env->bypass_spec_v1) {
16403 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16404 							*insn_idx);
16405 			if (err < 0)
16406 				return err;
16407 		}
16408 		if (env->log.level & BPF_LOG_LEVEL)
16409 			print_insn_state(env, this_branch, this_branch->curframe);
16410 		return 0;
16411 	}
16412 
16413 	/* Push scalar registers sharing same ID to jump history,
16414 	 * do this before creating 'other_branch', so that both
16415 	 * 'this_branch' and 'other_branch' share this history
16416 	 * if parent state is created.
16417 	 */
16418 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16419 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16420 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16421 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16422 	if (linked_regs.cnt > 1) {
16423 		err = bpf_push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
16424 		if (err)
16425 			return err;
16426 	}
16427 
16428 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16429 	if (IS_ERR(other_branch))
16430 		return PTR_ERR(other_branch);
16431 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16432 
16433 	err = regs_bounds_sanity_check_branches(env);
16434 	if (err)
16435 		return err;
16436 
16437 	copy_register_state(dst_reg, &env->false_reg1);
16438 	copy_register_state(src_reg, &env->false_reg2);
16439 	copy_register_state(&other_branch_regs[insn->dst_reg], &env->true_reg1);
16440 	if (BPF_SRC(insn->code) == BPF_X)
16441 		copy_register_state(&other_branch_regs[insn->src_reg], &env->true_reg2);
16442 
16443 	if (BPF_SRC(insn->code) == BPF_X &&
16444 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16445 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16446 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16447 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16448 				 &linked_regs);
16449 	}
16450 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16451 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16452 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16453 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16454 				 &linked_regs);
16455 	}
16456 
16457 	/* if one pointer register is compared to another pointer
16458 	 * register check if PTR_MAYBE_NULL could be lifted.
16459 	 * E.g. register A - maybe null
16460 	 *      register B - not null
16461 	 * for JNE A, B, ... - A is not null in the false branch;
16462 	 * for JEQ A, B, ... - A is not null in the true branch.
16463 	 *
16464 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16465 	 * not need to be null checked by the BPF program, i.e.,
16466 	 * could be null even without PTR_MAYBE_NULL marking, so
16467 	 * only propagate nullness when neither reg is that type.
16468 	 */
16469 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16470 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16471 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16472 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16473 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16474 		eq_branch_regs = NULL;
16475 		switch (opcode) {
16476 		case BPF_JEQ:
16477 			eq_branch_regs = other_branch_regs;
16478 			break;
16479 		case BPF_JNE:
16480 			eq_branch_regs = regs;
16481 			break;
16482 		default:
16483 			/* do nothing */
16484 			break;
16485 		}
16486 		if (eq_branch_regs) {
16487 			if (type_may_be_null(src_reg->type))
16488 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16489 			else
16490 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16491 		}
16492 	}
16493 
16494 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16495 	 * Also does the same detection for a register whose the value is
16496 	 * known to be 0.
16497 	 * NOTE: these optimizations below are related with pointer comparison
16498 	 *       which will never be JMP32.
16499 	 */
16500 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16501 	    type_may_be_null(dst_reg->type) &&
16502 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16503 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16504 		/* Mark all identical registers in each branch as either
16505 		 * safe or unknown depending R == 0 or R != 0 conditional.
16506 		 */
16507 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16508 				      opcode == BPF_JNE);
16509 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16510 				      opcode == BPF_JEQ);
16511 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16512 					   this_branch, other_branch) &&
16513 		   is_pointer_value(env, insn->dst_reg)) {
16514 		verbose(env, "R%d pointer comparison prohibited\n",
16515 			insn->dst_reg);
16516 		return -EACCES;
16517 	}
16518 	if (env->log.level & BPF_LOG_LEVEL)
16519 		print_insn_state(env, this_branch, this_branch->curframe);
16520 	return 0;
16521 }
16522 
16523 /* verify BPF_LD_IMM64 instruction */
16524 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16525 {
16526 	struct bpf_insn_aux_data *aux = cur_aux(env);
16527 	struct bpf_reg_state *regs = cur_regs(env);
16528 	struct bpf_reg_state *dst_reg;
16529 	struct bpf_map *map;
16530 	int err;
16531 
16532 	if (BPF_SIZE(insn->code) != BPF_DW) {
16533 		verbose(env, "invalid BPF_LD_IMM insn\n");
16534 		return -EINVAL;
16535 	}
16536 
16537 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16538 	if (err)
16539 		return err;
16540 
16541 	dst_reg = &regs[insn->dst_reg];
16542 	if (insn->src_reg == 0) {
16543 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16544 
16545 		dst_reg->type = SCALAR_VALUE;
16546 		__mark_reg_known(&regs[insn->dst_reg], imm);
16547 		return 0;
16548 	}
16549 
16550 	/* All special src_reg cases are listed below. From this point onwards
16551 	 * we either succeed and assign a corresponding dst_reg->type after
16552 	 * zeroing the offset, or fail and reject the program.
16553 	 */
16554 	mark_reg_known_zero(env, regs, insn->dst_reg);
16555 
16556 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16557 		dst_reg->type = aux->btf_var.reg_type;
16558 		switch (base_type(dst_reg->type)) {
16559 		case PTR_TO_MEM:
16560 			dst_reg->mem_size = aux->btf_var.mem_size;
16561 			break;
16562 		case PTR_TO_BTF_ID:
16563 			dst_reg->btf = aux->btf_var.btf;
16564 			dst_reg->btf_id = aux->btf_var.btf_id;
16565 			break;
16566 		default:
16567 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16568 			return -EFAULT;
16569 		}
16570 		return 0;
16571 	}
16572 
16573 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16574 		struct bpf_prog_aux *aux = env->prog->aux;
16575 		u32 subprogno = bpf_find_subprog(env,
16576 						 env->insn_idx + insn->imm + 1);
16577 
16578 		if (!aux->func_info) {
16579 			verbose(env, "missing btf func_info\n");
16580 			return -EINVAL;
16581 		}
16582 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16583 			verbose(env, "callback function not static\n");
16584 			return -EINVAL;
16585 		}
16586 
16587 		dst_reg->type = PTR_TO_FUNC;
16588 		dst_reg->subprogno = subprogno;
16589 		return 0;
16590 	}
16591 
16592 	map = env->used_maps[aux->map_index];
16593 
16594 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16595 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16596 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16597 			__mark_reg_unknown(env, dst_reg);
16598 			dst_reg->map_ptr = map;
16599 			return 0;
16600 		}
16601 		__mark_reg_known(dst_reg, aux->map_off);
16602 		dst_reg->type = PTR_TO_MAP_VALUE;
16603 		dst_reg->map_ptr = map;
16604 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16605 			     map->max_entries != 1);
16606 		/* We want reg->id to be same (0) as map_value is not distinct */
16607 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16608 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16609 		dst_reg->type = CONST_PTR_TO_MAP;
16610 		dst_reg->map_ptr = map;
16611 	} else {
16612 		verifier_bug(env, "unexpected src reg value for ldimm64");
16613 		return -EFAULT;
16614 	}
16615 
16616 	return 0;
16617 }
16618 
16619 static bool may_access_skb(enum bpf_prog_type type)
16620 {
16621 	switch (type) {
16622 	case BPF_PROG_TYPE_SOCKET_FILTER:
16623 	case BPF_PROG_TYPE_SCHED_CLS:
16624 	case BPF_PROG_TYPE_SCHED_ACT:
16625 		return true;
16626 	default:
16627 		return false;
16628 	}
16629 }
16630 
16631 /* verify safety of LD_ABS|LD_IND instructions:
16632  * - they can only appear in the programs where ctx == skb
16633  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16634  *   preserve R6-R9, and store return value into R0
16635  *
16636  * Implicit input:
16637  *   ctx == skb == R6 == CTX
16638  *
16639  * Explicit input:
16640  *   SRC == any register
16641  *   IMM == 32-bit immediate
16642  *
16643  * Output:
16644  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16645  */
16646 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16647 {
16648 	struct bpf_reg_state *regs = cur_regs(env);
16649 	static const int ctx_reg = BPF_REG_6;
16650 	u8 mode = BPF_MODE(insn->code);
16651 	int i, err;
16652 
16653 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16654 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16655 		return -EINVAL;
16656 	}
16657 
16658 	if (!env->ops->gen_ld_abs) {
16659 		verifier_bug(env, "gen_ld_abs is null");
16660 		return -EFAULT;
16661 	}
16662 
16663 	/* check whether implicit source operand (register R6) is readable */
16664 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16665 	if (err)
16666 		return err;
16667 
16668 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16669 	 * gen_ld_abs() may terminate the program at runtime, leading to
16670 	 * reference leak.
16671 	 */
16672 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16673 	if (err)
16674 		return err;
16675 
16676 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16677 		verbose(env,
16678 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16679 		return -EINVAL;
16680 	}
16681 
16682 	if (mode == BPF_IND) {
16683 		/* check explicit source operand */
16684 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16685 		if (err)
16686 			return err;
16687 	}
16688 
16689 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16690 	if (err < 0)
16691 		return err;
16692 
16693 	/* reset caller saved regs to unreadable */
16694 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16695 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16696 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16697 	}
16698 
16699 	/* mark destination R0 register as readable, since it contains
16700 	 * the value fetched from the packet.
16701 	 * Already marked as written above.
16702 	 */
16703 	mark_reg_unknown(env, regs, BPF_REG_0);
16704 	/* ld_abs load up to 32-bit skb data. */
16705 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16706 	/*
16707 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16708 	 * which must be explored by the verifier when in a subprog.
16709 	 */
16710 	if (env->cur_state->curframe) {
16711 		struct bpf_verifier_state *branch;
16712 
16713 		mark_reg_scratched(env, BPF_REG_0);
16714 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16715 		if (IS_ERR(branch))
16716 			return PTR_ERR(branch);
16717 		mark_reg_known_zero(env, regs, BPF_REG_0);
16718 		err = prepare_func_exit(env, &env->insn_idx);
16719 		if (err)
16720 			return err;
16721 		env->insn_idx--;
16722 	}
16723 	return 0;
16724 }
16725 
16726 
16727 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16728 {
16729 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16730 
16731 	/* Default return value range. */
16732 	*range = retval_range(0, 1);
16733 
16734 	switch (prog_type) {
16735 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16736 		switch (env->prog->expected_attach_type) {
16737 		case BPF_CGROUP_UDP4_RECVMSG:
16738 		case BPF_CGROUP_UDP6_RECVMSG:
16739 		case BPF_CGROUP_UNIX_RECVMSG:
16740 		case BPF_CGROUP_INET4_GETPEERNAME:
16741 		case BPF_CGROUP_INET6_GETPEERNAME:
16742 		case BPF_CGROUP_UNIX_GETPEERNAME:
16743 		case BPF_CGROUP_INET4_GETSOCKNAME:
16744 		case BPF_CGROUP_INET6_GETSOCKNAME:
16745 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16746 			*range = retval_range(1, 1);
16747 			break;
16748 		case BPF_CGROUP_INET4_BIND:
16749 		case BPF_CGROUP_INET6_BIND:
16750 			*range = retval_range(0, 3);
16751 			break;
16752 		default:
16753 			break;
16754 		}
16755 		break;
16756 	case BPF_PROG_TYPE_CGROUP_SKB:
16757 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16758 			*range = retval_range(0, 3);
16759 		break;
16760 	case BPF_PROG_TYPE_CGROUP_SOCK:
16761 	case BPF_PROG_TYPE_SOCK_OPS:
16762 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16763 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16764 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16765 		break;
16766 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16767 		if (!env->prog->aux->attach_btf_id)
16768 			return false;
16769 		*range = retval_range(0, 0);
16770 		break;
16771 	case BPF_PROG_TYPE_TRACING:
16772 		switch (env->prog->expected_attach_type) {
16773 		case BPF_TRACE_FENTRY:
16774 		case BPF_TRACE_FEXIT:
16775 		case BPF_TRACE_FSESSION:
16776 			*range = retval_range(0, 0);
16777 			break;
16778 		case BPF_TRACE_RAW_TP:
16779 		case BPF_MODIFY_RETURN:
16780 			return false;
16781 		case BPF_TRACE_ITER:
16782 		default:
16783 			break;
16784 		}
16785 		break;
16786 	case BPF_PROG_TYPE_KPROBE:
16787 		switch (env->prog->expected_attach_type) {
16788 		case BPF_TRACE_KPROBE_SESSION:
16789 		case BPF_TRACE_UPROBE_SESSION:
16790 			break;
16791 		default:
16792 			return false;
16793 		}
16794 		break;
16795 	case BPF_PROG_TYPE_SK_LOOKUP:
16796 		*range = retval_range(SK_DROP, SK_PASS);
16797 		break;
16798 
16799 	case BPF_PROG_TYPE_LSM:
16800 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16801 			/* no range found, any return value is allowed */
16802 			if (!get_func_retval_range(env->prog, range))
16803 				return false;
16804 			/* no restricted range, any return value is allowed */
16805 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16806 				return false;
16807 			range->return_32bit = true;
16808 		} else if (!env->prog->aux->attach_func_proto->type) {
16809 			/* Make sure programs that attach to void
16810 			 * hooks don't try to modify return value.
16811 			 */
16812 			*range = retval_range(1, 1);
16813 		}
16814 		break;
16815 
16816 	case BPF_PROG_TYPE_NETFILTER:
16817 		*range = retval_range(NF_DROP, NF_ACCEPT);
16818 		break;
16819 	case BPF_PROG_TYPE_STRUCT_OPS:
16820 		*range = retval_range(0, 0);
16821 		break;
16822 	case BPF_PROG_TYPE_EXT:
16823 		/* freplace program can return anything as its return value
16824 		 * depends on the to-be-replaced kernel func or bpf program.
16825 		 */
16826 	default:
16827 		return false;
16828 	}
16829 
16830 	/* Continue calculating. */
16831 
16832 	return true;
16833 }
16834 
16835 static bool program_returns_void(struct bpf_verifier_env *env)
16836 {
16837 	const struct bpf_prog *prog = env->prog;
16838 	enum bpf_prog_type prog_type = prog->type;
16839 
16840 	switch (prog_type) {
16841 	case BPF_PROG_TYPE_LSM:
16842 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16843 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16844 		    !prog->aux->attach_func_proto->type)
16845 			return true;
16846 		break;
16847 	case BPF_PROG_TYPE_STRUCT_OPS:
16848 		if (!prog->aux->attach_func_proto->type)
16849 			return true;
16850 		break;
16851 	case BPF_PROG_TYPE_EXT:
16852 		/*
16853 		 * If the actual program is an extension, let it
16854 		 * return void - attaching will succeed only if the
16855 		 * program being replaced also returns void, and since
16856 		 * it has passed verification its actual type doesn't matter.
16857 		 */
16858 		if (subprog_returns_void(env, 0))
16859 			return true;
16860 		break;
16861 	default:
16862 		break;
16863 	}
16864 	return false;
16865 }
16866 
16867 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16868 {
16869 	const char *exit_ctx = "At program exit";
16870 	struct tnum enforce_attach_type_range = tnum_unknown;
16871 	const struct bpf_prog *prog = env->prog;
16872 	struct bpf_reg_state *reg = reg_state(env, regno);
16873 	struct bpf_retval_range range = retval_range(0, 1);
16874 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16875 	struct bpf_func_state *frame = env->cur_state->frame[0];
16876 	const struct btf_type *reg_type, *ret_type = NULL;
16877 	int err;
16878 
16879 	/* LSM and struct_ops func-ptr's return type could be "void" */
16880 	if (!frame->in_async_callback_fn && program_returns_void(env))
16881 		return 0;
16882 
16883 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16884 		/* Allow a struct_ops program to return a referenced kptr if it
16885 		 * matches the operator's return type and is in its unmodified
16886 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16887 		 */
16888 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16889 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16890 						prog->aux->attach_func_proto->type,
16891 						NULL);
16892 		if (ret_type && ret_type == reg_type && reg->ref_obj_id)
16893 			return __check_ptr_off_reg(env, reg, regno, false);
16894 	}
16895 
16896 	/* eBPF calling convention is such that R0 is used
16897 	 * to return the value from eBPF program.
16898 	 * Make sure that it's readable at this time
16899 	 * of bpf_exit, which means that program wrote
16900 	 * something into it earlier
16901 	 */
16902 	err = check_reg_arg(env, regno, SRC_OP);
16903 	if (err)
16904 		return err;
16905 
16906 	if (is_pointer_value(env, regno)) {
16907 		verbose(env, "R%d leaks addr as return value\n", regno);
16908 		return -EACCES;
16909 	}
16910 
16911 	if (frame->in_async_callback_fn) {
16912 		exit_ctx = "At async callback return";
16913 		range = frame->callback_ret_range;
16914 		goto enforce_retval;
16915 	}
16916 
16917 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16918 		return 0;
16919 
16920 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16921 		enforce_attach_type_range = tnum_range(2, 3);
16922 
16923 	if (!return_retval_range(env, &range))
16924 		return 0;
16925 
16926 enforce_retval:
16927 	if (reg->type != SCALAR_VALUE) {
16928 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16929 			exit_ctx, regno, reg_type_str(env, reg->type));
16930 		return -EINVAL;
16931 	}
16932 
16933 	err = mark_chain_precision(env, regno);
16934 	if (err)
16935 		return err;
16936 
16937 	if (!retval_range_within(range, reg)) {
16938 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16939 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16940 		    prog_type == BPF_PROG_TYPE_LSM &&
16941 		    !prog->aux->attach_func_proto->type)
16942 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16943 		return -EINVAL;
16944 	}
16945 
16946 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16947 	    tnum_in(enforce_attach_type_range, reg->var_off))
16948 		env->prog->enforce_expected_attach_type = 1;
16949 	return 0;
16950 }
16951 
16952 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16953 {
16954 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16955 	struct bpf_func_state *cur_frame = cur_func(env);
16956 	int err;
16957 
16958 	if (subprog_returns_void(env, cur_frame->subprogno))
16959 		return 0;
16960 
16961 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16962 	if (err)
16963 		return err;
16964 
16965 	if (is_pointer_value(env, BPF_REG_0)) {
16966 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16967 		return -EACCES;
16968 	}
16969 
16970 	if (reg->type != SCALAR_VALUE) {
16971 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16972 			reg_type_str(env, reg->type));
16973 		return -EINVAL;
16974 	}
16975 
16976 	return 0;
16977 }
16978 
16979 /* Bitmask with 1s for all caller saved registers */
16980 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16981 
16982 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16983  * replacement patch is presumed to follow bpf_fastcall contract
16984  * (see mark_fastcall_pattern_for_call() below).
16985  */
16986 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16987 {
16988 	switch (imm) {
16989 #ifdef CONFIG_X86_64
16990 	case BPF_FUNC_get_smp_processor_id:
16991 #ifdef CONFIG_SMP
16992 	case BPF_FUNC_get_current_task_btf:
16993 	case BPF_FUNC_get_current_task:
16994 #endif
16995 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16996 #endif
16997 	default:
16998 		return false;
16999 	}
17000 }
17001 
17002 /* If @call is a kfunc or helper call, fills @cs and returns true,
17003  * otherwise returns false.
17004  */
17005 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
17006 			  struct bpf_call_summary *cs)
17007 {
17008 	struct bpf_kfunc_call_arg_meta meta;
17009 	const struct bpf_func_proto *fn;
17010 	int i;
17011 
17012 	if (bpf_helper_call(call)) {
17013 
17014 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
17015 			/* error would be reported later */
17016 			return false;
17017 		cs->fastcall = fn->allow_fastcall &&
17018 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
17019 				bpf_jit_inlines_helper_call(call->imm));
17020 		cs->is_void = fn->ret_type == RET_VOID;
17021 		cs->num_params = 0;
17022 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
17023 			if (fn->arg_type[i] == ARG_DONTCARE)
17024 				break;
17025 			cs->num_params++;
17026 		}
17027 		return true;
17028 	}
17029 
17030 	if (bpf_pseudo_kfunc_call(call)) {
17031 		int err;
17032 
17033 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
17034 		if (err < 0)
17035 			/* error would be reported later */
17036 			return false;
17037 		cs->num_params = btf_type_vlen(meta.func_proto);
17038 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
17039 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
17040 		return true;
17041 	}
17042 
17043 	return false;
17044 }
17045 
17046 /* LLVM define a bpf_fastcall function attribute.
17047  * This attribute means that function scratches only some of
17048  * the caller saved registers defined by ABI.
17049  * For BPF the set of such registers could be defined as follows:
17050  * - R0 is scratched only if function is non-void;
17051  * - R1-R5 are scratched only if corresponding parameter type is defined
17052  *   in the function prototype.
17053  *
17054  * The contract between kernel and clang allows to simultaneously use
17055  * such functions and maintain backwards compatibility with old
17056  * kernels that don't understand bpf_fastcall calls:
17057  *
17058  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
17059  *   registers are not scratched by the call;
17060  *
17061  * - as a post-processing step, clang visits each bpf_fastcall call and adds
17062  *   spill/fill for every live r0-r5;
17063  *
17064  * - stack offsets used for the spill/fill are allocated as lowest
17065  *   stack offsets in whole function and are not used for any other
17066  *   purposes;
17067  *
17068  * - when kernel loads a program, it looks for such patterns
17069  *   (bpf_fastcall function surrounded by spills/fills) and checks if
17070  *   spill/fill stack offsets are used exclusively in fastcall patterns;
17071  *
17072  * - if so, and if verifier or current JIT inlines the call to the
17073  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
17074  *   spill/fill pairs;
17075  *
17076  * - when old kernel loads a program, presence of spill/fill pairs
17077  *   keeps BPF program valid, albeit slightly less efficient.
17078  *
17079  * For example:
17080  *
17081  *   r1 = 1;
17082  *   r2 = 2;
17083  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
17084  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
17085  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
17086  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
17087  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
17088  *   r0 = r1;                            exit;
17089  *   r0 += r2;
17090  *   exit;
17091  *
17092  * The purpose of mark_fastcall_pattern_for_call is to:
17093  * - look for such patterns;
17094  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
17095  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
17096  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
17097  *   at which bpf_fastcall spill/fill stack slots start;
17098  * - update env->subprog_info[*]->keep_fastcall_stack.
17099  *
17100  * The .fastcall_pattern and .fastcall_stack_off are used by
17101  * check_fastcall_stack_contract() to check if every stack access to
17102  * fastcall spill/fill stack slot originates from spill/fill
17103  * instructions, members of fastcall patterns.
17104  *
17105  * If such condition holds true for a subprogram, fastcall patterns could
17106  * be rewritten by remove_fastcall_spills_fills().
17107  * Otherwise bpf_fastcall patterns are not changed in the subprogram
17108  * (code, presumably, generated by an older clang version).
17109  *
17110  * For example, it is *not* safe to remove spill/fill below:
17111  *
17112  *   r1 = 1;
17113  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
17114  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
17115  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
17116  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
17117  *   r0 += r1;                           exit;
17118  *   exit;
17119  */
17120 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
17121 					   struct bpf_subprog_info *subprog,
17122 					   int insn_idx, s16 lowest_off)
17123 {
17124 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
17125 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
17126 	u32 clobbered_regs_mask;
17127 	struct bpf_call_summary cs;
17128 	u32 expected_regs_mask;
17129 	s16 off;
17130 	int i;
17131 
17132 	if (!bpf_get_call_summary(env, call, &cs))
17133 		return;
17134 
17135 	/* A bitmask specifying which caller saved registers are clobbered
17136 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
17137 	 * bpf_fastcall contract:
17138 	 * - includes R0 if function is non-void;
17139 	 * - includes R1-R5 if corresponding parameter has is described
17140 	 *   in the function prototype.
17141 	 */
17142 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
17143 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
17144 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
17145 
17146 	/* match pairs of form:
17147 	 *
17148 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
17149 	 * ...
17150 	 * call %[to_be_inlined]
17151 	 * ...
17152 	 * rX = *(u64 *)(r10 - Y)
17153 	 */
17154 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
17155 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
17156 			break;
17157 		stx = &insns[insn_idx - i];
17158 		ldx = &insns[insn_idx + i];
17159 		/* must be a stack spill/fill pair */
17160 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17161 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
17162 		    stx->dst_reg != BPF_REG_10 ||
17163 		    ldx->src_reg != BPF_REG_10)
17164 			break;
17165 		/* must be a spill/fill for the same reg */
17166 		if (stx->src_reg != ldx->dst_reg)
17167 			break;
17168 		/* must be one of the previously unseen registers */
17169 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
17170 			break;
17171 		/* must be a spill/fill for the same expected offset,
17172 		 * no need to check offset alignment, BPF_DW stack access
17173 		 * is always 8-byte aligned.
17174 		 */
17175 		if (stx->off != off || ldx->off != off)
17176 			break;
17177 		expected_regs_mask &= ~BIT(stx->src_reg);
17178 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
17179 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
17180 	}
17181 	if (i == 1)
17182 		return;
17183 
17184 	/* Conditionally set 'fastcall_spills_num' to allow forward
17185 	 * compatibility when more helper functions are marked as
17186 	 * bpf_fastcall at compile time than current kernel supports, e.g:
17187 	 *
17188 	 *   1: *(u64 *)(r10 - 8) = r1
17189 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
17190 	 *   3: r1 = *(u64 *)(r10 - 8)
17191 	 *   4: *(u64 *)(r10 - 8) = r1
17192 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
17193 	 *   6: r1 = *(u64 *)(r10 - 8)
17194 	 *
17195 	 * There is no need to block bpf_fastcall rewrite for such program.
17196 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
17197 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
17198 	 * does not remove spill/fill pair {4,6}.
17199 	 */
17200 	if (cs.fastcall)
17201 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
17202 	else
17203 		subprog->keep_fastcall_stack = 1;
17204 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
17205 }
17206 
17207 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
17208 {
17209 	struct bpf_subprog_info *subprog = env->subprog_info;
17210 	struct bpf_insn *insn;
17211 	s16 lowest_off;
17212 	int s, i;
17213 
17214 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
17215 		/* find lowest stack spill offset used in this subprog */
17216 		lowest_off = 0;
17217 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17218 			insn = env->prog->insnsi + i;
17219 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17220 			    insn->dst_reg != BPF_REG_10)
17221 				continue;
17222 			lowest_off = min(lowest_off, insn->off);
17223 		}
17224 		/* use this offset to find fastcall patterns */
17225 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17226 			insn = env->prog->insnsi + i;
17227 			if (insn->code != (BPF_JMP | BPF_CALL))
17228 				continue;
17229 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
17230 		}
17231 	}
17232 	return 0;
17233 }
17234 
17235 static void adjust_btf_func(struct bpf_verifier_env *env)
17236 {
17237 	struct bpf_prog_aux *aux = env->prog->aux;
17238 	int i;
17239 
17240 	if (!aux->func_info)
17241 		return;
17242 
17243 	/* func_info is not available for hidden subprogs */
17244 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
17245 		aux->func_info[i].insn_off = env->subprog_info[i].start;
17246 }
17247 
17248 /* Find id in idset and increment its count, or add new entry */
17249 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
17250 {
17251 	u32 i;
17252 
17253 	for (i = 0; i < idset->num_ids; i++) {
17254 		if (idset->entries[i].id == id) {
17255 			idset->entries[i].cnt++;
17256 			return;
17257 		}
17258 	}
17259 	/* New id */
17260 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
17261 		idset->entries[idset->num_ids].id = id;
17262 		idset->entries[idset->num_ids].cnt = 1;
17263 		idset->num_ids++;
17264 	}
17265 }
17266 
17267 /* Find id in idset and return its count, or 0 if not found */
17268 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
17269 {
17270 	u32 i;
17271 
17272 	for (i = 0; i < idset->num_ids; i++) {
17273 		if (idset->entries[i].id == id)
17274 			return idset->entries[i].cnt;
17275 	}
17276 	return 0;
17277 }
17278 
17279 /*
17280  * Clear singular scalar ids in a state.
17281  * A register with a non-zero id is called singular if no other register shares
17282  * the same base id. Such registers can be treated as independent (id=0).
17283  */
17284 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
17285 			    struct bpf_verifier_state *st)
17286 {
17287 	struct bpf_idset *idset = &env->idset_scratch;
17288 	struct bpf_func_state *func;
17289 	struct bpf_reg_state *reg;
17290 
17291 	idset->num_ids = 0;
17292 
17293 	bpf_for_each_reg_in_vstate(st, func, reg, ({
17294 		if (reg->type != SCALAR_VALUE)
17295 			continue;
17296 		if (!reg->id)
17297 			continue;
17298 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
17299 	}));
17300 
17301 	bpf_for_each_reg_in_vstate(st, func, reg, ({
17302 		if (reg->type != SCALAR_VALUE)
17303 			continue;
17304 		if (!reg->id)
17305 			continue;
17306 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
17307 			clear_scalar_id(reg);
17308 	}));
17309 }
17310 
17311 /* Return true if it's OK to have the same insn return a different type. */
17312 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
17313 {
17314 	switch (base_type(type)) {
17315 	case PTR_TO_CTX:
17316 	case PTR_TO_SOCKET:
17317 	case PTR_TO_SOCK_COMMON:
17318 	case PTR_TO_TCP_SOCK:
17319 	case PTR_TO_XDP_SOCK:
17320 	case PTR_TO_BTF_ID:
17321 	case PTR_TO_ARENA:
17322 		return false;
17323 	default:
17324 		return true;
17325 	}
17326 }
17327 
17328 /* If an instruction was previously used with particular pointer types, then we
17329  * need to be careful to avoid cases such as the below, where it may be ok
17330  * for one branch accessing the pointer, but not ok for the other branch:
17331  *
17332  * R1 = sock_ptr
17333  * goto X;
17334  * ...
17335  * R1 = some_other_valid_ptr;
17336  * goto X;
17337  * ...
17338  * R2 = *(u32 *)(R1 + 0);
17339  */
17340 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
17341 {
17342 	return src != prev && (!reg_type_mismatch_ok(src) ||
17343 			       !reg_type_mismatch_ok(prev));
17344 }
17345 
17346 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
17347 {
17348 	switch (base_type(type)) {
17349 	case PTR_TO_MEM:
17350 	case PTR_TO_BTF_ID:
17351 		return true;
17352 	default:
17353 		return false;
17354 	}
17355 }
17356 
17357 static bool is_ptr_to_mem(enum bpf_reg_type type)
17358 {
17359 	return base_type(type) == PTR_TO_MEM;
17360 }
17361 
17362 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
17363 			     bool allow_trust_mismatch)
17364 {
17365 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17366 	enum bpf_reg_type merged_type;
17367 
17368 	if (*prev_type == NOT_INIT) {
17369 		/* Saw a valid insn
17370 		 * dst_reg = *(u32 *)(src_reg + off)
17371 		 * save type to validate intersecting paths
17372 		 */
17373 		*prev_type = type;
17374 	} else if (reg_type_mismatch(type, *prev_type)) {
17375 		/* Abuser program is trying to use the same insn
17376 		 * dst_reg = *(u32*) (src_reg + off)
17377 		 * with different pointer types:
17378 		 * src_reg == ctx in one branch and
17379 		 * src_reg == stack|map in some other branch.
17380 		 * Reject it.
17381 		 */
17382 		if (allow_trust_mismatch &&
17383 		    is_ptr_to_mem_or_btf_id(type) &&
17384 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
17385 			/*
17386 			 * Have to support a use case when one path through
17387 			 * the program yields TRUSTED pointer while another
17388 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17389 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17390 			 * Same behavior of MEM_RDONLY flag.
17391 			 */
17392 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
17393 				merged_type = PTR_TO_MEM;
17394 			else
17395 				merged_type = PTR_TO_BTF_ID;
17396 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
17397 				merged_type |= PTR_UNTRUSTED;
17398 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17399 				merged_type |= MEM_RDONLY;
17400 			*prev_type = merged_type;
17401 		} else {
17402 			verbose(env, "same insn cannot be used with different pointers\n");
17403 			return -EINVAL;
17404 		}
17405 	}
17406 
17407 	return 0;
17408 }
17409 
17410 enum {
17411 	PROCESS_BPF_EXIT = 1,
17412 	INSN_IDX_UPDATED = 2,
17413 };
17414 
17415 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17416 				 bool *do_print_state,
17417 				 bool exception_exit)
17418 {
17419 	struct bpf_func_state *cur_frame = cur_func(env);
17420 
17421 	/* We must do check_reference_leak here before
17422 	 * prepare_func_exit to handle the case when
17423 	 * state->curframe > 0, it may be a callback function,
17424 	 * for which reference_state must match caller reference
17425 	 * state when it exits.
17426 	 */
17427 	int err = check_resource_leak(env, exception_exit,
17428 				      exception_exit || !env->cur_state->curframe,
17429 				      exception_exit ? "bpf_throw" :
17430 				      "BPF_EXIT instruction in main prog");
17431 	if (err)
17432 		return err;
17433 
17434 	/* The side effect of the prepare_func_exit which is
17435 	 * being skipped is that it frees bpf_func_state.
17436 	 * Typically, process_bpf_exit will only be hit with
17437 	 * outermost exit. copy_verifier_state in pop_stack will
17438 	 * handle freeing of any extra bpf_func_state left over
17439 	 * from not processing all nested function exits. We
17440 	 * also skip return code checks as they are not needed
17441 	 * for exceptional exits.
17442 	 */
17443 	if (exception_exit)
17444 		return PROCESS_BPF_EXIT;
17445 
17446 	if (env->cur_state->curframe) {
17447 		/* exit from nested function */
17448 		err = prepare_func_exit(env, &env->insn_idx);
17449 		if (err)
17450 			return err;
17451 		*do_print_state = true;
17452 		return INSN_IDX_UPDATED;
17453 	}
17454 
17455 	/*
17456 	 * Return from a regular global subprogram differs from return
17457 	 * from the main program or async/exception callback.
17458 	 * Main program exit implies return code restrictions
17459 	 * that depend on program type.
17460 	 * Exit from exception callback is equivalent to main program exit.
17461 	 * Exit from async callback implies return code restrictions
17462 	 * that depend on async scheduling mechanism.
17463 	 */
17464 	if (cur_frame->subprogno &&
17465 	    !cur_frame->in_async_callback_fn &&
17466 	    !cur_frame->in_exception_callback_fn)
17467 		err = check_global_subprog_return_code(env);
17468 	else
17469 		err = check_return_code(env, BPF_REG_0, "R0");
17470 	if (err)
17471 		return err;
17472 	return PROCESS_BPF_EXIT;
17473 }
17474 
17475 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17476 				       int regno,
17477 				       struct bpf_map *map,
17478 				       u32 *pmin_index, u32 *pmax_index)
17479 {
17480 	struct bpf_reg_state *reg = reg_state(env, regno);
17481 	u64 min_index = reg->umin_value;
17482 	u64 max_index = reg->umax_value;
17483 	const u32 size = 8;
17484 
17485 	if (min_index > (u64) U32_MAX * size) {
17486 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg->umin_value);
17487 		return -ERANGE;
17488 	}
17489 	if (max_index > (u64) U32_MAX * size) {
17490 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg->umax_value);
17491 		return -ERANGE;
17492 	}
17493 
17494 	min_index /= size;
17495 	max_index /= size;
17496 
17497 	if (max_index >= map->max_entries) {
17498 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17499 			     regno, min_index, max_index, map->max_entries);
17500 		return -EINVAL;
17501 	}
17502 
17503 	*pmin_index = min_index;
17504 	*pmax_index = max_index;
17505 	return 0;
17506 }
17507 
17508 /* gotox *dst_reg */
17509 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17510 {
17511 	struct bpf_verifier_state *other_branch;
17512 	struct bpf_reg_state *dst_reg;
17513 	struct bpf_map *map;
17514 	u32 min_index, max_index;
17515 	int err = 0;
17516 	int n;
17517 	int i;
17518 
17519 	dst_reg = reg_state(env, insn->dst_reg);
17520 	if (dst_reg->type != PTR_TO_INSN) {
17521 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17522 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17523 		return -EINVAL;
17524 	}
17525 
17526 	map = dst_reg->map_ptr;
17527 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17528 		return -EFAULT;
17529 
17530 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17531 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17532 		return -EFAULT;
17533 
17534 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17535 	if (err)
17536 		return err;
17537 
17538 	/* Ensure that the buffer is large enough */
17539 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17540 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17541 						        max_index - min_index + 1);
17542 		if (!env->gotox_tmp_buf)
17543 			return -ENOMEM;
17544 	}
17545 
17546 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17547 	if (n < 0)
17548 		return n;
17549 	if (n == 0) {
17550 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17551 			     insn->dst_reg, map->id);
17552 		return -EINVAL;
17553 	}
17554 
17555 	for (i = 0; i < n - 1; i++) {
17556 		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
17557 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17558 					  env->insn_idx, env->cur_state->speculative);
17559 		if (IS_ERR(other_branch))
17560 			return PTR_ERR(other_branch);
17561 	}
17562 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17563 	mark_indirect_target(env, env->insn_idx);
17564 	return INSN_IDX_UPDATED;
17565 }
17566 
17567 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17568 {
17569 	int err;
17570 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17571 	u8 class = BPF_CLASS(insn->code);
17572 
17573 	switch (class) {
17574 	case BPF_ALU:
17575 	case BPF_ALU64:
17576 		return check_alu_op(env, insn);
17577 
17578 	case BPF_LDX:
17579 		return check_load_mem(env, insn, false,
17580 				      BPF_MODE(insn->code) == BPF_MEMSX,
17581 				      true, "ldx");
17582 
17583 	case BPF_STX:
17584 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17585 			return check_atomic(env, insn);
17586 		return check_store_reg(env, insn, false);
17587 
17588 	case BPF_ST: {
17589 		enum bpf_reg_type dst_reg_type;
17590 
17591 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17592 		if (err)
17593 			return err;
17594 
17595 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17596 
17597 		err = check_mem_access(env, env->insn_idx, insn->dst_reg,
17598 				       insn->off, BPF_SIZE(insn->code),
17599 				       BPF_WRITE, -1, false, false);
17600 		if (err)
17601 			return err;
17602 
17603 		return save_aux_ptr_type(env, dst_reg_type, false);
17604 	}
17605 	case BPF_JMP:
17606 	case BPF_JMP32: {
17607 		u8 opcode = BPF_OP(insn->code);
17608 
17609 		env->jmps_processed++;
17610 		if (opcode == BPF_CALL) {
17611 			if (env->cur_state->active_locks) {
17612 				if ((insn->src_reg == BPF_REG_0 &&
17613 				     insn->imm != BPF_FUNC_spin_unlock &&
17614 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17615 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17616 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17617 					verbose(env,
17618 						"function calls are not allowed while holding a lock\n");
17619 					return -EINVAL;
17620 				}
17621 			}
17622 			mark_reg_scratched(env, BPF_REG_0);
17623 			if (insn->src_reg == BPF_PSEUDO_CALL)
17624 				return check_func_call(env, insn, &env->insn_idx);
17625 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17626 				return check_kfunc_call(env, insn, &env->insn_idx);
17627 			return check_helper_call(env, insn, &env->insn_idx);
17628 		} else if (opcode == BPF_JA) {
17629 			if (BPF_SRC(insn->code) == BPF_X)
17630 				return check_indirect_jump(env, insn);
17631 
17632 			if (class == BPF_JMP)
17633 				env->insn_idx += insn->off + 1;
17634 			else
17635 				env->insn_idx += insn->imm + 1;
17636 			return INSN_IDX_UPDATED;
17637 		} else if (opcode == BPF_EXIT) {
17638 			return process_bpf_exit_full(env, do_print_state, false);
17639 		}
17640 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17641 	}
17642 	case BPF_LD: {
17643 		u8 mode = BPF_MODE(insn->code);
17644 
17645 		if (mode == BPF_ABS || mode == BPF_IND)
17646 			return check_ld_abs(env, insn);
17647 
17648 		if (mode == BPF_IMM) {
17649 			err = check_ld_imm(env, insn);
17650 			if (err)
17651 				return err;
17652 
17653 			env->insn_idx++;
17654 			sanitize_mark_insn_seen(env);
17655 		}
17656 		return 0;
17657 	}
17658 	}
17659 	/* all class values are handled above. silence compiler warning */
17660 	return -EFAULT;
17661 }
17662 
17663 static int do_check(struct bpf_verifier_env *env)
17664 {
17665 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17666 	struct bpf_verifier_state *state = env->cur_state;
17667 	struct bpf_insn *insns = env->prog->insnsi;
17668 	int insn_cnt = env->prog->len;
17669 	bool do_print_state = false;
17670 	int prev_insn_idx = -1;
17671 
17672 	for (;;) {
17673 		struct bpf_insn *insn;
17674 		struct bpf_insn_aux_data *insn_aux;
17675 		int err;
17676 
17677 		/* reset current history entry on each new instruction */
17678 		env->cur_hist_ent = NULL;
17679 
17680 		env->prev_insn_idx = prev_insn_idx;
17681 		if (env->insn_idx >= insn_cnt) {
17682 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17683 				env->insn_idx, insn_cnt);
17684 			return -EFAULT;
17685 		}
17686 
17687 		insn = &insns[env->insn_idx];
17688 		insn_aux = &env->insn_aux_data[env->insn_idx];
17689 
17690 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17691 			verbose(env,
17692 				"BPF program is too large. Processed %d insn\n",
17693 				env->insn_processed);
17694 			return -E2BIG;
17695 		}
17696 
17697 		state->last_insn_idx = env->prev_insn_idx;
17698 		state->insn_idx = env->insn_idx;
17699 
17700 		if (bpf_is_prune_point(env, env->insn_idx)) {
17701 			err = bpf_is_state_visited(env, env->insn_idx);
17702 			if (err < 0)
17703 				return err;
17704 			if (err == 1) {
17705 				/* found equivalent state, can prune the search */
17706 				if (env->log.level & BPF_LOG_LEVEL) {
17707 					if (do_print_state)
17708 						verbose(env, "\nfrom %d to %d%s: safe\n",
17709 							env->prev_insn_idx, env->insn_idx,
17710 							env->cur_state->speculative ?
17711 							" (speculative execution)" : "");
17712 					else
17713 						verbose(env, "%d: safe\n", env->insn_idx);
17714 				}
17715 				goto process_bpf_exit;
17716 			}
17717 		}
17718 
17719 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17720 			err = bpf_push_jmp_history(env, state, 0, 0);
17721 			if (err)
17722 				return err;
17723 		}
17724 
17725 		if (signal_pending(current))
17726 			return -EAGAIN;
17727 
17728 		if (need_resched())
17729 			cond_resched();
17730 
17731 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17732 			verbose(env, "\nfrom %d to %d%s:",
17733 				env->prev_insn_idx, env->insn_idx,
17734 				env->cur_state->speculative ?
17735 				" (speculative execution)" : "");
17736 			print_verifier_state(env, state, state->curframe, true);
17737 			do_print_state = false;
17738 		}
17739 
17740 		if (env->log.level & BPF_LOG_LEVEL) {
17741 			if (verifier_state_scratched(env))
17742 				print_insn_state(env, state, state->curframe);
17743 
17744 			verbose_linfo(env, env->insn_idx, "; ");
17745 			env->prev_log_pos = env->log.end_pos;
17746 			verbose(env, "%d: ", env->insn_idx);
17747 			bpf_verbose_insn(env, insn);
17748 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17749 			env->prev_log_pos = env->log.end_pos;
17750 		}
17751 
17752 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17753 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17754 							   env->prev_insn_idx);
17755 			if (err)
17756 				return err;
17757 		}
17758 
17759 		sanitize_mark_insn_seen(env);
17760 		prev_insn_idx = env->insn_idx;
17761 
17762 		/* Sanity check: precomputed constants must match verifier state */
17763 		if (!state->speculative && insn_aux->const_reg_mask) {
17764 			struct bpf_reg_state *regs = cur_regs(env);
17765 			u16 mask = insn_aux->const_reg_mask;
17766 
17767 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17768 				u32 cval = insn_aux->const_reg_vals[r];
17769 
17770 				if (!(mask & BIT(r)))
17771 					continue;
17772 				if (regs[r].type != SCALAR_VALUE)
17773 					continue;
17774 				if (!tnum_is_const(regs[r].var_off))
17775 					continue;
17776 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17777 						    env, "const R%d: %u != %llu",
17778 						    r, cval, regs[r].var_off.value))
17779 					return -EFAULT;
17780 			}
17781 		}
17782 
17783 		/* Reduce verification complexity by stopping speculative path
17784 		 * verification when a nospec is encountered.
17785 		 */
17786 		if (state->speculative && insn_aux->nospec)
17787 			goto process_bpf_exit;
17788 
17789 		err = do_check_insn(env, &do_print_state);
17790 		if (error_recoverable_with_nospec(err) && state->speculative) {
17791 			/* Prevent this speculative path from ever reaching the
17792 			 * insn that would have been unsafe to execute.
17793 			 */
17794 			insn_aux->nospec = true;
17795 			/* If it was an ADD/SUB insn, potentially remove any
17796 			 * markings for alu sanitization.
17797 			 */
17798 			insn_aux->alu_state = 0;
17799 			goto process_bpf_exit;
17800 		} else if (err < 0) {
17801 			return err;
17802 		} else if (err == PROCESS_BPF_EXIT) {
17803 			goto process_bpf_exit;
17804 		} else if (err == INSN_IDX_UPDATED) {
17805 		} else if (err == 0) {
17806 			env->insn_idx++;
17807 		}
17808 
17809 		if (state->speculative && insn_aux->nospec_result) {
17810 			/* If we are on a path that performed a jump-op, this
17811 			 * may skip a nospec patched-in after the jump. This can
17812 			 * currently never happen because nospec_result is only
17813 			 * used for the write-ops
17814 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17815 			 * calls. These must never skip the following insn
17816 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17817 			 * is false). Still, add a warning to document this in
17818 			 * case nospec_result is used elsewhere in the future.
17819 			 *
17820 			 * All non-branch instructions have a single
17821 			 * fall-through edge. For these, nospec_result should
17822 			 * already work.
17823 			 */
17824 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17825 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17826 					    BPF_OP(insn->code) != BPF_CALL, env,
17827 					    "speculation barrier after jump instruction may not have the desired effect"))
17828 				return -EFAULT;
17829 process_bpf_exit:
17830 			mark_verifier_state_scratched(env);
17831 			err = bpf_update_branch_counts(env, env->cur_state);
17832 			if (err)
17833 				return err;
17834 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17835 					pop_log);
17836 			if (err < 0) {
17837 				if (err != -ENOENT)
17838 					return err;
17839 				break;
17840 			} else {
17841 				do_print_state = true;
17842 				continue;
17843 			}
17844 		}
17845 	}
17846 
17847 	return 0;
17848 }
17849 
17850 static int find_btf_percpu_datasec(struct btf *btf)
17851 {
17852 	const struct btf_type *t;
17853 	const char *tname;
17854 	int i, n;
17855 
17856 	/*
17857 	 * Both vmlinux and module each have their own ".data..percpu"
17858 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17859 	 * types to look at only module's own BTF types.
17860 	 */
17861 	n = btf_nr_types(btf);
17862 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17863 		t = btf_type_by_id(btf, i);
17864 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17865 			continue;
17866 
17867 		tname = btf_name_by_offset(btf, t->name_off);
17868 		if (!strcmp(tname, ".data..percpu"))
17869 			return i;
17870 	}
17871 
17872 	return -ENOENT;
17873 }
17874 
17875 /*
17876  * Add btf to the env->used_btfs array. If needed, refcount the
17877  * corresponding kernel module. To simplify caller's logic
17878  * in case of error or if btf was added before the function
17879  * decreases the btf refcount.
17880  */
17881 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17882 {
17883 	struct btf_mod_pair *btf_mod;
17884 	int ret = 0;
17885 	int i;
17886 
17887 	/* check whether we recorded this BTF (and maybe module) already */
17888 	for (i = 0; i < env->used_btf_cnt; i++)
17889 		if (env->used_btfs[i].btf == btf)
17890 			goto ret_put;
17891 
17892 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17893 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17894 			MAX_USED_BTFS);
17895 		ret = -E2BIG;
17896 		goto ret_put;
17897 	}
17898 
17899 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17900 	btf_mod->btf = btf;
17901 	btf_mod->module = NULL;
17902 
17903 	/* if we reference variables from kernel module, bump its refcount */
17904 	if (btf_is_module(btf)) {
17905 		btf_mod->module = btf_try_get_module(btf);
17906 		if (!btf_mod->module) {
17907 			ret = -ENXIO;
17908 			goto ret_put;
17909 		}
17910 	}
17911 
17912 	env->used_btf_cnt++;
17913 	return 0;
17914 
17915 ret_put:
17916 	/* Either error or this BTF was already added */
17917 	btf_put(btf);
17918 	return ret;
17919 }
17920 
17921 /* replace pseudo btf_id with kernel symbol address */
17922 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17923 				 struct bpf_insn *insn,
17924 				 struct bpf_insn_aux_data *aux,
17925 				 struct btf *btf)
17926 {
17927 	const struct btf_var_secinfo *vsi;
17928 	const struct btf_type *datasec;
17929 	const struct btf_type *t;
17930 	const char *sym_name;
17931 	bool percpu = false;
17932 	u32 type, id = insn->imm;
17933 	s32 datasec_id;
17934 	u64 addr;
17935 	int i;
17936 
17937 	t = btf_type_by_id(btf, id);
17938 	if (!t) {
17939 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17940 		return -ENOENT;
17941 	}
17942 
17943 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17944 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17945 		return -EINVAL;
17946 	}
17947 
17948 	sym_name = btf_name_by_offset(btf, t->name_off);
17949 	addr = kallsyms_lookup_name(sym_name);
17950 	if (!addr) {
17951 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17952 			sym_name);
17953 		return -ENOENT;
17954 	}
17955 	insn[0].imm = (u32)addr;
17956 	insn[1].imm = addr >> 32;
17957 
17958 	if (btf_type_is_func(t)) {
17959 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17960 		aux->btf_var.mem_size = 0;
17961 		return 0;
17962 	}
17963 
17964 	datasec_id = find_btf_percpu_datasec(btf);
17965 	if (datasec_id > 0) {
17966 		datasec = btf_type_by_id(btf, datasec_id);
17967 		for_each_vsi(i, datasec, vsi) {
17968 			if (vsi->type == id) {
17969 				percpu = true;
17970 				break;
17971 			}
17972 		}
17973 	}
17974 
17975 	type = t->type;
17976 	t = btf_type_skip_modifiers(btf, type, NULL);
17977 	if (percpu) {
17978 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17979 		aux->btf_var.btf = btf;
17980 		aux->btf_var.btf_id = type;
17981 	} else if (!btf_type_is_struct(t)) {
17982 		const struct btf_type *ret;
17983 		const char *tname;
17984 		u32 tsize;
17985 
17986 		/* resolve the type size of ksym. */
17987 		ret = btf_resolve_size(btf, t, &tsize);
17988 		if (IS_ERR(ret)) {
17989 			tname = btf_name_by_offset(btf, t->name_off);
17990 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17991 				tname, PTR_ERR(ret));
17992 			return -EINVAL;
17993 		}
17994 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17995 		aux->btf_var.mem_size = tsize;
17996 	} else {
17997 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17998 		aux->btf_var.btf = btf;
17999 		aux->btf_var.btf_id = type;
18000 	}
18001 
18002 	return 0;
18003 }
18004 
18005 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
18006 			       struct bpf_insn *insn,
18007 			       struct bpf_insn_aux_data *aux)
18008 {
18009 	struct btf *btf;
18010 	int btf_fd;
18011 	int err;
18012 
18013 	btf_fd = insn[1].imm;
18014 	if (btf_fd) {
18015 		btf = btf_get_by_fd(btf_fd);
18016 		if (IS_ERR(btf)) {
18017 			verbose(env, "invalid module BTF object FD specified.\n");
18018 			return -EINVAL;
18019 		}
18020 	} else {
18021 		if (!btf_vmlinux) {
18022 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
18023 			return -EINVAL;
18024 		}
18025 		btf_get(btf_vmlinux);
18026 		btf = btf_vmlinux;
18027 	}
18028 
18029 	err = __check_pseudo_btf_id(env, insn, aux, btf);
18030 	if (err) {
18031 		btf_put(btf);
18032 		return err;
18033 	}
18034 
18035 	return __add_used_btf(env, btf);
18036 }
18037 
18038 static bool is_tracing_prog_type(enum bpf_prog_type type)
18039 {
18040 	switch (type) {
18041 	case BPF_PROG_TYPE_KPROBE:
18042 	case BPF_PROG_TYPE_TRACEPOINT:
18043 	case BPF_PROG_TYPE_PERF_EVENT:
18044 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
18045 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
18046 		return true;
18047 	default:
18048 		return false;
18049 	}
18050 }
18051 
18052 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
18053 {
18054 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
18055 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
18056 }
18057 
18058 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
18059 					struct bpf_map *map,
18060 					struct bpf_prog *prog)
18061 
18062 {
18063 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
18064 
18065 	if (map->excl_prog_sha &&
18066 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
18067 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
18068 		return -EACCES;
18069 	}
18070 
18071 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
18072 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
18073 		if (is_tracing_prog_type(prog_type)) {
18074 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
18075 			return -EINVAL;
18076 		}
18077 	}
18078 
18079 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
18080 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
18081 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
18082 			return -EINVAL;
18083 		}
18084 
18085 		if (is_tracing_prog_type(prog_type)) {
18086 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
18087 			return -EINVAL;
18088 		}
18089 	}
18090 
18091 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
18092 	    !bpf_offload_prog_map_match(prog, map)) {
18093 		verbose(env, "offload device mismatch between prog and map\n");
18094 		return -EINVAL;
18095 	}
18096 
18097 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
18098 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
18099 		return -EINVAL;
18100 	}
18101 
18102 	if (prog->sleepable)
18103 		switch (map->map_type) {
18104 		case BPF_MAP_TYPE_HASH:
18105 		case BPF_MAP_TYPE_LRU_HASH:
18106 		case BPF_MAP_TYPE_ARRAY:
18107 		case BPF_MAP_TYPE_PERCPU_HASH:
18108 		case BPF_MAP_TYPE_PERCPU_ARRAY:
18109 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
18110 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
18111 		case BPF_MAP_TYPE_HASH_OF_MAPS:
18112 		case BPF_MAP_TYPE_RINGBUF:
18113 		case BPF_MAP_TYPE_USER_RINGBUF:
18114 		case BPF_MAP_TYPE_INODE_STORAGE:
18115 		case BPF_MAP_TYPE_SK_STORAGE:
18116 		case BPF_MAP_TYPE_TASK_STORAGE:
18117 		case BPF_MAP_TYPE_CGRP_STORAGE:
18118 		case BPF_MAP_TYPE_QUEUE:
18119 		case BPF_MAP_TYPE_STACK:
18120 		case BPF_MAP_TYPE_ARENA:
18121 		case BPF_MAP_TYPE_INSN_ARRAY:
18122 		case BPF_MAP_TYPE_PROG_ARRAY:
18123 			break;
18124 		default:
18125 			verbose(env,
18126 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
18127 			return -EINVAL;
18128 		}
18129 
18130 	if (bpf_map_is_cgroup_storage(map) &&
18131 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
18132 		verbose(env, "only one cgroup storage of each type is allowed\n");
18133 		return -EBUSY;
18134 	}
18135 
18136 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
18137 		if (env->prog->aux->arena) {
18138 			verbose(env, "Only one arena per program\n");
18139 			return -EBUSY;
18140 		}
18141 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
18142 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
18143 			return -EPERM;
18144 		}
18145 		if (!env->prog->jit_requested) {
18146 			verbose(env, "JIT is required to use arena\n");
18147 			return -EOPNOTSUPP;
18148 		}
18149 		if (!bpf_jit_supports_arena()) {
18150 			verbose(env, "JIT doesn't support arena\n");
18151 			return -EOPNOTSUPP;
18152 		}
18153 		env->prog->aux->arena = (void *)map;
18154 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
18155 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
18156 			return -EINVAL;
18157 		}
18158 	}
18159 
18160 	return 0;
18161 }
18162 
18163 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
18164 {
18165 	int i, err;
18166 
18167 	/* check whether we recorded this map already */
18168 	for (i = 0; i < env->used_map_cnt; i++)
18169 		if (env->used_maps[i] == map)
18170 			return i;
18171 
18172 	if (env->used_map_cnt >= MAX_USED_MAPS) {
18173 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
18174 			MAX_USED_MAPS);
18175 		return -E2BIG;
18176 	}
18177 
18178 	err = check_map_prog_compatibility(env, map, env->prog);
18179 	if (err)
18180 		return err;
18181 
18182 	if (env->prog->sleepable)
18183 		atomic64_inc(&map->sleepable_refcnt);
18184 
18185 	/* hold the map. If the program is rejected by verifier,
18186 	 * the map will be released by release_maps() or it
18187 	 * will be used by the valid program until it's unloaded
18188 	 * and all maps are released in bpf_free_used_maps()
18189 	 */
18190 	bpf_map_inc(map);
18191 
18192 	env->used_maps[env->used_map_cnt++] = map;
18193 
18194 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
18195 		err = bpf_insn_array_init(map, env->prog);
18196 		if (err) {
18197 			verbose(env, "Failed to properly initialize insn array\n");
18198 			return err;
18199 		}
18200 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
18201 	}
18202 
18203 	return env->used_map_cnt - 1;
18204 }
18205 
18206 /* Add map behind fd to used maps list, if it's not already there, and return
18207  * its index.
18208  * Returns <0 on error, or >= 0 index, on success.
18209  */
18210 static int add_used_map(struct bpf_verifier_env *env, int fd)
18211 {
18212 	struct bpf_map *map;
18213 	CLASS(fd, f)(fd);
18214 
18215 	map = __bpf_map_get(f);
18216 	if (IS_ERR(map)) {
18217 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
18218 		return PTR_ERR(map);
18219 	}
18220 
18221 	return __add_used_map(env, map);
18222 }
18223 
18224 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18225 {
18226 	u8 class = BPF_CLASS(insn->code);
18227 	u8 opcode = BPF_OP(insn->code);
18228 
18229 	switch (opcode) {
18230 	case BPF_NEG:
18231 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
18232 		    insn->off != 0 || insn->imm != 0) {
18233 			verbose(env, "BPF_NEG uses reserved fields\n");
18234 			return -EINVAL;
18235 		}
18236 		return 0;
18237 	case BPF_END:
18238 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
18239 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
18240 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
18241 			verbose(env, "BPF_END uses reserved fields\n");
18242 			return -EINVAL;
18243 		}
18244 		return 0;
18245 	case BPF_MOV:
18246 		if (BPF_SRC(insn->code) == BPF_X) {
18247 			if (class == BPF_ALU) {
18248 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
18249 				    insn->imm) {
18250 					verbose(env, "BPF_MOV uses reserved fields\n");
18251 					return -EINVAL;
18252 				}
18253 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
18254 				if (insn->imm != 1 && insn->imm != 1u << 16) {
18255 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
18256 					return -EINVAL;
18257 				}
18258 			} else if ((insn->off != 0 && insn->off != 8 &&
18259 				    insn->off != 16 && insn->off != 32) || insn->imm) {
18260 				verbose(env, "BPF_MOV uses reserved fields\n");
18261 				return -EINVAL;
18262 			}
18263 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
18264 			verbose(env, "BPF_MOV uses reserved fields\n");
18265 			return -EINVAL;
18266 		}
18267 		return 0;
18268 	case BPF_ADD:
18269 	case BPF_SUB:
18270 	case BPF_AND:
18271 	case BPF_OR:
18272 	case BPF_XOR:
18273 	case BPF_LSH:
18274 	case BPF_RSH:
18275 	case BPF_ARSH:
18276 	case BPF_MUL:
18277 	case BPF_DIV:
18278 	case BPF_MOD:
18279 		if (BPF_SRC(insn->code) == BPF_X) {
18280 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
18281 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
18282 				verbose(env, "BPF_ALU uses reserved fields\n");
18283 				return -EINVAL;
18284 			}
18285 		} else if (insn->src_reg != BPF_REG_0 ||
18286 			   (insn->off != 0 && insn->off != 1) ||
18287 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
18288 			verbose(env, "BPF_ALU uses reserved fields\n");
18289 			return -EINVAL;
18290 		}
18291 		return 0;
18292 	default:
18293 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
18294 		return -EINVAL;
18295 	}
18296 }
18297 
18298 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18299 {
18300 	u8 class = BPF_CLASS(insn->code);
18301 	u8 opcode = BPF_OP(insn->code);
18302 
18303 	switch (opcode) {
18304 	case BPF_CALL:
18305 		if (BPF_SRC(insn->code) != BPF_K ||
18306 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
18307 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
18308 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
18309 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
18310 			verbose(env, "BPF_CALL uses reserved fields\n");
18311 			return -EINVAL;
18312 		}
18313 		return 0;
18314 	case BPF_JA:
18315 		if (BPF_SRC(insn->code) == BPF_X) {
18316 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
18317 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
18318 				return -EINVAL;
18319 			}
18320 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
18321 			   (class == BPF_JMP && insn->imm != 0) ||
18322 			   (class == BPF_JMP32 && insn->off != 0)) {
18323 			verbose(env, "BPF_JA uses reserved fields\n");
18324 			return -EINVAL;
18325 		}
18326 		return 0;
18327 	case BPF_EXIT:
18328 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
18329 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
18330 		    class == BPF_JMP32) {
18331 			verbose(env, "BPF_EXIT uses reserved fields\n");
18332 			return -EINVAL;
18333 		}
18334 		return 0;
18335 	case BPF_JCOND:
18336 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
18337 		    insn->dst_reg || insn->imm) {
18338 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
18339 			return -EINVAL;
18340 		}
18341 		return 0;
18342 	default:
18343 		if (BPF_SRC(insn->code) == BPF_X) {
18344 			if (insn->imm != 0) {
18345 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18346 				return -EINVAL;
18347 			}
18348 		} else if (insn->src_reg != BPF_REG_0) {
18349 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18350 			return -EINVAL;
18351 		}
18352 		return 0;
18353 	}
18354 }
18355 
18356 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18357 {
18358 	switch (BPF_CLASS(insn->code)) {
18359 	case BPF_ALU:
18360 	case BPF_ALU64:
18361 		return check_alu_fields(env, insn);
18362 	case BPF_LDX:
18363 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
18364 		    insn->imm != 0) {
18365 			verbose(env, "BPF_LDX uses reserved fields\n");
18366 			return -EINVAL;
18367 		}
18368 		return 0;
18369 	case BPF_STX:
18370 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
18371 			return 0;
18372 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
18373 			verbose(env, "BPF_STX uses reserved fields\n");
18374 			return -EINVAL;
18375 		}
18376 		return 0;
18377 	case BPF_ST:
18378 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
18379 			verbose(env, "BPF_ST uses reserved fields\n");
18380 			return -EINVAL;
18381 		}
18382 		return 0;
18383 	case BPF_JMP:
18384 	case BPF_JMP32:
18385 		return check_jmp_fields(env, insn);
18386 	case BPF_LD: {
18387 		u8 mode = BPF_MODE(insn->code);
18388 
18389 		if (mode == BPF_ABS || mode == BPF_IND) {
18390 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18391 			    BPF_SIZE(insn->code) == BPF_DW ||
18392 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18393 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18394 				return -EINVAL;
18395 			}
18396 		} else if (mode != BPF_IMM) {
18397 			verbose(env, "invalid BPF_LD mode\n");
18398 			return -EINVAL;
18399 		}
18400 		return 0;
18401 	}
18402 	default:
18403 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18404 		return -EINVAL;
18405 	}
18406 }
18407 
18408 /*
18409  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18410  *
18411  * 1. if it accesses map FD, replace it with actual map pointer.
18412  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18413  *
18414  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18415  */
18416 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18417 {
18418 	struct bpf_insn *insn = env->prog->insnsi;
18419 	int insn_cnt = env->prog->len;
18420 	int i, err;
18421 
18422 	err = bpf_prog_calc_tag(env->prog);
18423 	if (err)
18424 		return err;
18425 
18426 	for (i = 0; i < insn_cnt; i++, insn++) {
18427 		if (insn->dst_reg >= MAX_BPF_REG) {
18428 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18429 			return -EINVAL;
18430 		}
18431 		if (insn->src_reg >= MAX_BPF_REG) {
18432 			verbose(env, "R%d is invalid\n", insn->src_reg);
18433 			return -EINVAL;
18434 		}
18435 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18436 			struct bpf_insn_aux_data *aux;
18437 			struct bpf_map *map;
18438 			int map_idx;
18439 			u64 addr;
18440 			u32 fd;
18441 
18442 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18443 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18444 			    insn[1].off != 0) {
18445 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18446 				return -EINVAL;
18447 			}
18448 
18449 			if (insn[0].off != 0) {
18450 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18451 				return -EINVAL;
18452 			}
18453 
18454 			if (insn[0].src_reg == 0)
18455 				/* valid generic load 64-bit imm */
18456 				goto next_insn;
18457 
18458 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18459 				aux = &env->insn_aux_data[i];
18460 				err = check_pseudo_btf_id(env, insn, aux);
18461 				if (err)
18462 					return err;
18463 				goto next_insn;
18464 			}
18465 
18466 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18467 				aux = &env->insn_aux_data[i];
18468 				aux->ptr_type = PTR_TO_FUNC;
18469 				goto next_insn;
18470 			}
18471 
18472 			/* In final convert_pseudo_ld_imm64() step, this is
18473 			 * converted into regular 64-bit imm load insn.
18474 			 */
18475 			switch (insn[0].src_reg) {
18476 			case BPF_PSEUDO_MAP_VALUE:
18477 			case BPF_PSEUDO_MAP_IDX_VALUE:
18478 				break;
18479 			case BPF_PSEUDO_MAP_FD:
18480 			case BPF_PSEUDO_MAP_IDX:
18481 				if (insn[1].imm == 0)
18482 					break;
18483 				fallthrough;
18484 			default:
18485 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18486 				return -EINVAL;
18487 			}
18488 
18489 			switch (insn[0].src_reg) {
18490 			case BPF_PSEUDO_MAP_IDX_VALUE:
18491 			case BPF_PSEUDO_MAP_IDX:
18492 				if (bpfptr_is_null(env->fd_array)) {
18493 					verbose(env, "fd_idx without fd_array is invalid\n");
18494 					return -EPROTO;
18495 				}
18496 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18497 							    insn[0].imm * sizeof(fd),
18498 							    sizeof(fd)))
18499 					return -EFAULT;
18500 				break;
18501 			default:
18502 				fd = insn[0].imm;
18503 				break;
18504 			}
18505 
18506 			map_idx = add_used_map(env, fd);
18507 			if (map_idx < 0)
18508 				return map_idx;
18509 			map = env->used_maps[map_idx];
18510 
18511 			aux = &env->insn_aux_data[i];
18512 			aux->map_index = map_idx;
18513 
18514 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18515 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18516 				addr = (unsigned long)map;
18517 			} else {
18518 				u32 off = insn[1].imm;
18519 
18520 				if (!map->ops->map_direct_value_addr) {
18521 					verbose(env, "no direct value access support for this map type\n");
18522 					return -EINVAL;
18523 				}
18524 
18525 				err = map->ops->map_direct_value_addr(map, &addr, off);
18526 				if (err) {
18527 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18528 						map->value_size, off);
18529 					return err;
18530 				}
18531 
18532 				aux->map_off = off;
18533 				addr += off;
18534 			}
18535 
18536 			insn[0].imm = (u32)addr;
18537 			insn[1].imm = addr >> 32;
18538 
18539 next_insn:
18540 			insn++;
18541 			i++;
18542 			continue;
18543 		}
18544 
18545 		/* Basic sanity check before we invest more work here. */
18546 		if (!bpf_opcode_in_insntable(insn->code)) {
18547 			verbose(env, "unknown opcode %02x\n", insn->code);
18548 			return -EINVAL;
18549 		}
18550 
18551 		err = check_insn_fields(env, insn);
18552 		if (err)
18553 			return err;
18554 	}
18555 
18556 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18557 	 * 'struct bpf_map *' into a register instead of user map_fd.
18558 	 * These pointers will be used later by verifier to validate map access.
18559 	 */
18560 	return 0;
18561 }
18562 
18563 /* drop refcnt of maps used by the rejected program */
18564 static void release_maps(struct bpf_verifier_env *env)
18565 {
18566 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18567 			     env->used_map_cnt);
18568 }
18569 
18570 /* drop refcnt of maps used by the rejected program */
18571 static void release_btfs(struct bpf_verifier_env *env)
18572 {
18573 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18574 }
18575 
18576 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18577 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18578 {
18579 	struct bpf_insn *insn = env->prog->insnsi;
18580 	int insn_cnt = env->prog->len;
18581 	int i;
18582 
18583 	for (i = 0; i < insn_cnt; i++, insn++) {
18584 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18585 			continue;
18586 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18587 			continue;
18588 		insn->src_reg = 0;
18589 	}
18590 }
18591 
18592 static void release_insn_arrays(struct bpf_verifier_env *env)
18593 {
18594 	int i;
18595 
18596 	for (i = 0; i < env->insn_array_map_cnt; i++)
18597 		bpf_insn_array_release(env->insn_array_maps[i]);
18598 }
18599 
18600 
18601 
18602 /* The verifier does more data flow analysis than llvm and will not
18603  * explore branches that are dead at run time. Malicious programs can
18604  * have dead code too. Therefore replace all dead at-run-time code
18605  * with 'ja -1'.
18606  *
18607  * Just nops are not optimal, e.g. if they would sit at the end of the
18608  * program and through another bug we would manage to jump there, then
18609  * we'd execute beyond program memory otherwise. Returning exception
18610  * code also wouldn't work since we can have subprogs where the dead
18611  * code could be located.
18612  */
18613 static void sanitize_dead_code(struct bpf_verifier_env *env)
18614 {
18615 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18616 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18617 	struct bpf_insn *insn = env->prog->insnsi;
18618 	const int insn_cnt = env->prog->len;
18619 	int i;
18620 
18621 	for (i = 0; i < insn_cnt; i++) {
18622 		if (aux_data[i].seen)
18623 			continue;
18624 		memcpy(insn + i, &trap, sizeof(trap));
18625 		aux_data[i].zext_dst = false;
18626 	}
18627 }
18628 
18629 
18630 
18631 static void free_states(struct bpf_verifier_env *env)
18632 {
18633 	struct bpf_verifier_state_list *sl;
18634 	struct list_head *head, *pos, *tmp;
18635 	struct bpf_scc_info *info;
18636 	int i, j;
18637 
18638 	bpf_free_verifier_state(env->cur_state, true);
18639 	env->cur_state = NULL;
18640 	while (!pop_stack(env, NULL, NULL, false));
18641 
18642 	list_for_each_safe(pos, tmp, &env->free_list) {
18643 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18644 		bpf_free_verifier_state(&sl->state, false);
18645 		kfree(sl);
18646 	}
18647 	INIT_LIST_HEAD(&env->free_list);
18648 
18649 	for (i = 0; i < env->scc_cnt; ++i) {
18650 		info = env->scc_info[i];
18651 		if (!info)
18652 			continue;
18653 		for (j = 0; j < info->num_visits; j++)
18654 			bpf_free_backedges(&info->visits[j]);
18655 		kvfree(info);
18656 		env->scc_info[i] = NULL;
18657 	}
18658 
18659 	if (!env->explored_states)
18660 		return;
18661 
18662 	for (i = 0; i < state_htab_size(env); i++) {
18663 		head = &env->explored_states[i];
18664 
18665 		list_for_each_safe(pos, tmp, head) {
18666 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18667 			bpf_free_verifier_state(&sl->state, false);
18668 			kfree(sl);
18669 		}
18670 		INIT_LIST_HEAD(&env->explored_states[i]);
18671 	}
18672 }
18673 
18674 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18675 {
18676 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18677 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18678 	struct bpf_prog_aux *aux = env->prog->aux;
18679 	struct bpf_verifier_state *state;
18680 	struct bpf_reg_state *regs;
18681 	int ret, i;
18682 
18683 	env->prev_linfo = NULL;
18684 	env->pass_cnt++;
18685 
18686 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18687 	if (!state)
18688 		return -ENOMEM;
18689 	state->curframe = 0;
18690 	state->speculative = false;
18691 	state->branches = 1;
18692 	state->in_sleepable = env->prog->sleepable;
18693 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18694 	if (!state->frame[0]) {
18695 		kfree(state);
18696 		return -ENOMEM;
18697 	}
18698 	env->cur_state = state;
18699 	init_func_state(env, state->frame[0],
18700 			BPF_MAIN_FUNC /* callsite */,
18701 			0 /* frameno */,
18702 			subprog);
18703 	state->first_insn_idx = env->subprog_info[subprog].start;
18704 	state->last_insn_idx = -1;
18705 
18706 	regs = state->frame[state->curframe]->regs;
18707 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18708 		const char *sub_name = subprog_name(env, subprog);
18709 		struct bpf_subprog_arg_info *arg;
18710 		struct bpf_reg_state *reg;
18711 
18712 		if (env->log.level & BPF_LOG_LEVEL)
18713 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18714 		ret = btf_prepare_func_args(env, subprog);
18715 		if (ret)
18716 			goto out;
18717 
18718 		if (subprog_is_exc_cb(env, subprog)) {
18719 			state->frame[0]->in_exception_callback_fn = true;
18720 
18721 			/*
18722 			 * Global functions are scalar or void, make sure
18723 			 * we return a scalar.
18724 			 */
18725 			if (subprog_returns_void(env, subprog)) {
18726 				verbose(env, "exception cb cannot return void\n");
18727 				ret = -EINVAL;
18728 				goto out;
18729 			}
18730 
18731 			/* Also ensure the callback only has a single scalar argument. */
18732 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18733 				verbose(env, "exception cb only supports single integer argument\n");
18734 				ret = -EINVAL;
18735 				goto out;
18736 			}
18737 		}
18738 		for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
18739 			arg = &sub->args[i - BPF_REG_1];
18740 			reg = &regs[i];
18741 
18742 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18743 				reg->type = PTR_TO_CTX;
18744 				mark_reg_known_zero(env, regs, i);
18745 			} else if (arg->arg_type == ARG_ANYTHING) {
18746 				reg->type = SCALAR_VALUE;
18747 				mark_reg_unknown(env, regs, i);
18748 			} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
18749 				/* assume unspecial LOCAL dynptr type */
18750 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
18751 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18752 				reg->type = PTR_TO_MEM;
18753 				reg->type |= arg->arg_type &
18754 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18755 				mark_reg_known_zero(env, regs, i);
18756 				reg->mem_size = arg->mem_size;
18757 				if (arg->arg_type & PTR_MAYBE_NULL)
18758 					reg->id = ++env->id_gen;
18759 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18760 				reg->type = PTR_TO_BTF_ID;
18761 				if (arg->arg_type & PTR_MAYBE_NULL)
18762 					reg->type |= PTR_MAYBE_NULL;
18763 				if (arg->arg_type & PTR_UNTRUSTED)
18764 					reg->type |= PTR_UNTRUSTED;
18765 				if (arg->arg_type & PTR_TRUSTED)
18766 					reg->type |= PTR_TRUSTED;
18767 				mark_reg_known_zero(env, regs, i);
18768 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18769 				reg->btf_id = arg->btf_id;
18770 				reg->id = ++env->id_gen;
18771 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18772 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18773 				mark_reg_unknown(env, regs, i);
18774 			} else {
18775 				verifier_bug(env, "unhandled arg#%d type %d",
18776 					     i - BPF_REG_1, arg->arg_type);
18777 				ret = -EFAULT;
18778 				goto out;
18779 			}
18780 		}
18781 	} else {
18782 		/* if main BPF program has associated BTF info, validate that
18783 		 * it's matching expected signature, and otherwise mark BTF
18784 		 * info for main program as unreliable
18785 		 */
18786 		if (env->prog->aux->func_info_aux) {
18787 			ret = btf_prepare_func_args(env, 0);
18788 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
18789 				env->prog->aux->func_info_aux[0].unreliable = true;
18790 		}
18791 
18792 		/* 1st arg to a function */
18793 		regs[BPF_REG_1].type = PTR_TO_CTX;
18794 		mark_reg_known_zero(env, regs, BPF_REG_1);
18795 	}
18796 
18797 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18798 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18799 		for (i = 0; i < aux->ctx_arg_info_size; i++)
18800 			aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
18801 							  acquire_reference(env, 0) : 0;
18802 	}
18803 
18804 	ret = do_check(env);
18805 out:
18806 	if (!ret && pop_log)
18807 		bpf_vlog_reset(&env->log, 0);
18808 	free_states(env);
18809 	return ret;
18810 }
18811 
18812 /* Lazily verify all global functions based on their BTF, if they are called
18813  * from main BPF program or any of subprograms transitively.
18814  * BPF global subprogs called from dead code are not validated.
18815  * All callable global functions must pass verification.
18816  * Otherwise the whole program is rejected.
18817  * Consider:
18818  * int bar(int);
18819  * int foo(int f)
18820  * {
18821  *    return bar(f);
18822  * }
18823  * int bar(int b)
18824  * {
18825  *    ...
18826  * }
18827  * foo() will be verified first for R1=any_scalar_value. During verification it
18828  * will be assumed that bar() already verified successfully and call to bar()
18829  * from foo() will be checked for type match only. Later bar() will be verified
18830  * independently to check that it's safe for R1=any_scalar_value.
18831  */
18832 static int do_check_subprogs(struct bpf_verifier_env *env)
18833 {
18834 	struct bpf_prog_aux *aux = env->prog->aux;
18835 	struct bpf_func_info_aux *sub_aux;
18836 	int i, ret, new_cnt;
18837 
18838 	if (!aux->func_info)
18839 		return 0;
18840 
18841 	/* exception callback is presumed to be always called */
18842 	if (env->exception_callback_subprog)
18843 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18844 
18845 again:
18846 	new_cnt = 0;
18847 	for (i = 1; i < env->subprog_cnt; i++) {
18848 		if (!bpf_subprog_is_global(env, i))
18849 			continue;
18850 
18851 		sub_aux = subprog_aux(env, i);
18852 		if (!sub_aux->called || sub_aux->verified)
18853 			continue;
18854 
18855 		env->insn_idx = env->subprog_info[i].start;
18856 		WARN_ON_ONCE(env->insn_idx == 0);
18857 		ret = do_check_common(env, i);
18858 		if (ret) {
18859 			return ret;
18860 		} else if (env->log.level & BPF_LOG_LEVEL) {
18861 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18862 				i, subprog_name(env, i));
18863 		}
18864 
18865 		/* We verified new global subprog, it might have called some
18866 		 * more global subprogs that we haven't verified yet, so we
18867 		 * need to do another pass over subprogs to verify those.
18868 		 */
18869 		sub_aux->verified = true;
18870 		new_cnt++;
18871 	}
18872 
18873 	/* We can't loop forever as we verify at least one global subprog on
18874 	 * each pass.
18875 	 */
18876 	if (new_cnt)
18877 		goto again;
18878 
18879 	return 0;
18880 }
18881 
18882 static int do_check_main(struct bpf_verifier_env *env)
18883 {
18884 	int ret;
18885 
18886 	env->insn_idx = 0;
18887 	ret = do_check_common(env, 0);
18888 	if (!ret)
18889 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18890 	return ret;
18891 }
18892 
18893 
18894 static void print_verification_stats(struct bpf_verifier_env *env)
18895 {
18896 	int i;
18897 
18898 	if (env->log.level & BPF_LOG_STATS) {
18899 		verbose(env, "verification time %lld usec\n",
18900 			div_u64(env->verification_time, 1000));
18901 		verbose(env, "stack depth ");
18902 		for (i = 0; i < env->subprog_cnt; i++) {
18903 			u32 depth = env->subprog_info[i].stack_depth;
18904 
18905 			verbose(env, "%d", depth);
18906 			if (i + 1 < env->subprog_cnt)
18907 				verbose(env, "+");
18908 		}
18909 		verbose(env, "\n");
18910 	}
18911 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18912 		"total_states %d peak_states %d mark_read %d\n",
18913 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18914 		env->max_states_per_insn, env->total_states,
18915 		env->peak_states, env->longest_mark_read_walk);
18916 }
18917 
18918 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18919 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18920 {
18921 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18922 	prog->aux->ctx_arg_info_size = cnt;
18923 
18924 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18925 }
18926 
18927 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18928 {
18929 	const struct btf_type *t, *func_proto;
18930 	const struct bpf_struct_ops_desc *st_ops_desc;
18931 	const struct bpf_struct_ops *st_ops;
18932 	const struct btf_member *member;
18933 	struct bpf_prog *prog = env->prog;
18934 	bool has_refcounted_arg = false;
18935 	u32 btf_id, member_idx, member_off;
18936 	struct btf *btf;
18937 	const char *mname;
18938 	int i, err;
18939 
18940 	if (!prog->gpl_compatible) {
18941 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18942 		return -EINVAL;
18943 	}
18944 
18945 	if (!prog->aux->attach_btf_id)
18946 		return -ENOTSUPP;
18947 
18948 	btf = prog->aux->attach_btf;
18949 	if (btf_is_module(btf)) {
18950 		/* Make sure st_ops is valid through the lifetime of env */
18951 		env->attach_btf_mod = btf_try_get_module(btf);
18952 		if (!env->attach_btf_mod) {
18953 			verbose(env, "struct_ops module %s is not found\n",
18954 				btf_get_name(btf));
18955 			return -ENOTSUPP;
18956 		}
18957 	}
18958 
18959 	btf_id = prog->aux->attach_btf_id;
18960 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18961 	if (!st_ops_desc) {
18962 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18963 			btf_id);
18964 		return -ENOTSUPP;
18965 	}
18966 	st_ops = st_ops_desc->st_ops;
18967 
18968 	t = st_ops_desc->type;
18969 	member_idx = prog->expected_attach_type;
18970 	if (member_idx >= btf_type_vlen(t)) {
18971 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18972 			member_idx, st_ops->name);
18973 		return -EINVAL;
18974 	}
18975 
18976 	member = &btf_type_member(t)[member_idx];
18977 	mname = btf_name_by_offset(btf, member->name_off);
18978 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18979 					       NULL);
18980 	if (!func_proto) {
18981 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18982 			mname, member_idx, st_ops->name);
18983 		return -EINVAL;
18984 	}
18985 
18986 	member_off = __btf_member_bit_offset(t, member) / 8;
18987 	err = bpf_struct_ops_supported(st_ops, member_off);
18988 	if (err) {
18989 		verbose(env, "attach to unsupported member %s of struct %s\n",
18990 			mname, st_ops->name);
18991 		return err;
18992 	}
18993 
18994 	if (st_ops->check_member) {
18995 		err = st_ops->check_member(t, member, prog);
18996 
18997 		if (err) {
18998 			verbose(env, "attach to unsupported member %s of struct %s\n",
18999 				mname, st_ops->name);
19000 			return err;
19001 		}
19002 	}
19003 
19004 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
19005 		verbose(env, "Private stack not supported by jit\n");
19006 		return -EACCES;
19007 	}
19008 
19009 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
19010 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
19011 			has_refcounted_arg = true;
19012 			break;
19013 		}
19014 	}
19015 
19016 	/* Tail call is not allowed for programs with refcounted arguments since we
19017 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
19018 	 */
19019 	for (i = 0; i < env->subprog_cnt; i++) {
19020 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
19021 			verbose(env, "program with __ref argument cannot tail call\n");
19022 			return -EINVAL;
19023 		}
19024 	}
19025 
19026 	prog->aux->st_ops = st_ops;
19027 	prog->aux->attach_st_ops_member_off = member_off;
19028 
19029 	prog->aux->attach_func_proto = func_proto;
19030 	prog->aux->attach_func_name = mname;
19031 	env->ops = st_ops->verifier_ops;
19032 
19033 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
19034 					  st_ops_desc->arg_info[member_idx].cnt);
19035 }
19036 #define SECURITY_PREFIX "security_"
19037 
19038 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
19039 
19040 /* list of non-sleepable functions that are otherwise on
19041  * ALLOW_ERROR_INJECTION list
19042  */
19043 BTF_SET_START(btf_non_sleepable_error_inject)
19044 /* Three functions below can be called from sleepable and non-sleepable context.
19045  * Assume non-sleepable from bpf safety point of view.
19046  */
19047 BTF_ID(func, __filemap_add_folio)
19048 #ifdef CONFIG_FAIL_PAGE_ALLOC
19049 BTF_ID(func, should_fail_alloc_page)
19050 #endif
19051 #ifdef CONFIG_FAILSLAB
19052 BTF_ID(func, should_failslab)
19053 #endif
19054 BTF_SET_END(btf_non_sleepable_error_inject)
19055 
19056 static int check_non_sleepable_error_inject(u32 btf_id)
19057 {
19058 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
19059 }
19060 
19061 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
19062 {
19063 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
19064 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
19065 	 */
19066 	if (!check_non_sleepable_error_inject(btf_id) &&
19067 	    within_error_injection_list(addr))
19068 		return 0;
19069 
19070 	return -EINVAL;
19071 }
19072 
19073 static int check_attach_modify_return(unsigned long addr, const char *func_name)
19074 {
19075 	if (within_error_injection_list(addr) ||
19076 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19077 		return 0;
19078 
19079 	return -EINVAL;
19080 }
19081 
19082 #else
19083 
19084 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
19085  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
19086  * but that just compares two concrete function names.
19087  */
19088 static bool has_arch_syscall_prefix(const char *func_name)
19089 {
19090 #if defined(__x86_64__)
19091 	return !strncmp(func_name, "__x64_", 6);
19092 #elif defined(__i386__)
19093 	return !strncmp(func_name, "__ia32_", 7);
19094 #elif defined(__s390x__)
19095 	return !strncmp(func_name, "__s390x_", 8);
19096 #elif defined(__aarch64__)
19097 	return !strncmp(func_name, "__arm64_", 8);
19098 #elif defined(__riscv)
19099 	return !strncmp(func_name, "__riscv_", 8);
19100 #elif defined(__powerpc__) || defined(__powerpc64__)
19101 	return !strncmp(func_name, "sys_", 4);
19102 #elif defined(__loongarch__)
19103 	return !strncmp(func_name, "sys_", 4);
19104 #else
19105 	return false;
19106 #endif
19107 }
19108 
19109 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
19110 
19111 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
19112 {
19113 	if (has_arch_syscall_prefix(func_name))
19114 		return 0;
19115 
19116 	return -EINVAL;
19117 }
19118 
19119 static int check_attach_modify_return(unsigned long addr, const char *func_name)
19120 {
19121 	if (has_arch_syscall_prefix(func_name) ||
19122 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19123 		return 0;
19124 
19125 	return -EINVAL;
19126 }
19127 
19128 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
19129 
19130 int bpf_check_attach_target(struct bpf_verifier_log *log,
19131 			    const struct bpf_prog *prog,
19132 			    const struct bpf_prog *tgt_prog,
19133 			    u32 btf_id,
19134 			    struct bpf_attach_target_info *tgt_info)
19135 {
19136 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
19137 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
19138 	char trace_symbol[KSYM_SYMBOL_LEN];
19139 	const char prefix[] = "btf_trace_";
19140 	struct bpf_raw_event_map *btp;
19141 	int ret = 0, subprog = -1, i;
19142 	const struct btf_type *t;
19143 	bool conservative = true;
19144 	const char *tname, *fname;
19145 	struct btf *btf;
19146 	long addr = 0;
19147 	struct module *mod = NULL;
19148 
19149 	if (!btf_id) {
19150 		bpf_log(log, "Tracing programs must provide btf_id\n");
19151 		return -EINVAL;
19152 	}
19153 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
19154 	if (!btf) {
19155 		bpf_log(log,
19156 			"Tracing program can only be attached to another program annotated with BTF\n");
19157 		return -EINVAL;
19158 	}
19159 	t = btf_type_by_id(btf, btf_id);
19160 	if (!t) {
19161 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
19162 		return -EINVAL;
19163 	}
19164 	tname = btf_name_by_offset(btf, t->name_off);
19165 	if (!tname) {
19166 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
19167 		return -EINVAL;
19168 	}
19169 	if (tgt_prog) {
19170 		struct bpf_prog_aux *aux = tgt_prog->aux;
19171 		bool tgt_changes_pkt_data;
19172 		bool tgt_might_sleep;
19173 
19174 		if (bpf_prog_is_dev_bound(prog->aux) &&
19175 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
19176 			bpf_log(log, "Target program bound device mismatch");
19177 			return -EINVAL;
19178 		}
19179 
19180 		for (i = 0; i < aux->func_info_cnt; i++)
19181 			if (aux->func_info[i].type_id == btf_id) {
19182 				subprog = i;
19183 				break;
19184 			}
19185 		if (subprog == -1) {
19186 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
19187 			return -EINVAL;
19188 		}
19189 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
19190 			bpf_log(log,
19191 				"%s programs cannot attach to exception callback\n",
19192 				prog_extension ? "Extension" : "Tracing");
19193 			return -EINVAL;
19194 		}
19195 		conservative = aux->func_info_aux[subprog].unreliable;
19196 		if (prog_extension) {
19197 			if (conservative) {
19198 				bpf_log(log,
19199 					"Cannot replace static functions\n");
19200 				return -EINVAL;
19201 			}
19202 			if (!prog->jit_requested) {
19203 				bpf_log(log,
19204 					"Extension programs should be JITed\n");
19205 				return -EINVAL;
19206 			}
19207 			tgt_changes_pkt_data = aux->func
19208 					       ? aux->func[subprog]->aux->changes_pkt_data
19209 					       : aux->changes_pkt_data;
19210 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
19211 				bpf_log(log,
19212 					"Extension program changes packet data, while original does not\n");
19213 				return -EINVAL;
19214 			}
19215 
19216 			tgt_might_sleep = aux->func
19217 					  ? aux->func[subprog]->aux->might_sleep
19218 					  : aux->might_sleep;
19219 			if (prog->aux->might_sleep && !tgt_might_sleep) {
19220 				bpf_log(log,
19221 					"Extension program may sleep, while original does not\n");
19222 				return -EINVAL;
19223 			}
19224 		}
19225 		if (!tgt_prog->jited) {
19226 			bpf_log(log, "Can attach to only JITed progs\n");
19227 			return -EINVAL;
19228 		}
19229 		if (prog_tracing) {
19230 			if (aux->attach_tracing_prog) {
19231 				/*
19232 				 * Target program is an fentry/fexit which is already attached
19233 				 * to another tracing program. More levels of nesting
19234 				 * attachment are not allowed.
19235 				 */
19236 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
19237 				return -EINVAL;
19238 			}
19239 		} else if (tgt_prog->type == prog->type) {
19240 			/*
19241 			 * To avoid potential call chain cycles, prevent attaching of a
19242 			 * program extension to another extension. It's ok to attach
19243 			 * fentry/fexit to extension program.
19244 			 */
19245 			bpf_log(log, "Cannot recursively attach\n");
19246 			return -EINVAL;
19247 		}
19248 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
19249 		    prog_extension &&
19250 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
19251 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
19252 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
19253 			/* Program extensions can extend all program types
19254 			 * except fentry/fexit. The reason is the following.
19255 			 * The fentry/fexit programs are used for performance
19256 			 * analysis, stats and can be attached to any program
19257 			 * type. When extension program is replacing XDP function
19258 			 * it is necessary to allow performance analysis of all
19259 			 * functions. Both original XDP program and its program
19260 			 * extension. Hence attaching fentry/fexit to
19261 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
19262 			 * fentry/fexit was allowed it would be possible to create
19263 			 * long call chain fentry->extension->fentry->extension
19264 			 * beyond reasonable stack size. Hence extending fentry
19265 			 * is not allowed.
19266 			 */
19267 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
19268 			return -EINVAL;
19269 		}
19270 	} else {
19271 		if (prog_extension) {
19272 			bpf_log(log, "Cannot replace kernel functions\n");
19273 			return -EINVAL;
19274 		}
19275 	}
19276 
19277 	switch (prog->expected_attach_type) {
19278 	case BPF_TRACE_RAW_TP:
19279 		if (tgt_prog) {
19280 			bpf_log(log,
19281 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
19282 			return -EINVAL;
19283 		}
19284 		if (!btf_type_is_typedef(t)) {
19285 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
19286 				btf_id);
19287 			return -EINVAL;
19288 		}
19289 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19290 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19291 				btf_id, tname);
19292 			return -EINVAL;
19293 		}
19294 		tname += sizeof(prefix) - 1;
19295 
19296 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
19297 		 * names. Thus using bpf_raw_event_map to get argument names.
19298 		 */
19299 		btp = bpf_get_raw_tracepoint(tname);
19300 		if (!btp)
19301 			return -EINVAL;
19302 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
19303 					trace_symbol);
19304 		bpf_put_raw_tracepoint(btp);
19305 
19306 		if (fname)
19307 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
19308 
19309 		if (!fname || ret < 0) {
19310 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
19311 				prefix, tname);
19312 			t = btf_type_by_id(btf, t->type);
19313 			if (!btf_type_is_ptr(t))
19314 				/* should never happen in valid vmlinux build */
19315 				return -EINVAL;
19316 		} else {
19317 			t = btf_type_by_id(btf, ret);
19318 			if (!btf_type_is_func(t))
19319 				/* should never happen in valid vmlinux build */
19320 				return -EINVAL;
19321 		}
19322 
19323 		t = btf_type_by_id(btf, t->type);
19324 		if (!btf_type_is_func_proto(t))
19325 			/* should never happen in valid vmlinux build */
19326 			return -EINVAL;
19327 
19328 		break;
19329 	case BPF_TRACE_ITER:
19330 		if (!btf_type_is_func(t)) {
19331 			bpf_log(log, "attach_btf_id %u is not a function\n",
19332 				btf_id);
19333 			return -EINVAL;
19334 		}
19335 		t = btf_type_by_id(btf, t->type);
19336 		if (!btf_type_is_func_proto(t))
19337 			return -EINVAL;
19338 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19339 		if (ret)
19340 			return ret;
19341 		break;
19342 	default:
19343 		if (!prog_extension)
19344 			return -EINVAL;
19345 		fallthrough;
19346 	case BPF_MODIFY_RETURN:
19347 	case BPF_LSM_MAC:
19348 	case BPF_LSM_CGROUP:
19349 	case BPF_TRACE_FENTRY:
19350 	case BPF_TRACE_FEXIT:
19351 	case BPF_TRACE_FSESSION:
19352 		if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
19353 		    !bpf_jit_supports_fsession()) {
19354 			bpf_log(log, "JIT does not support fsession\n");
19355 			return -EOPNOTSUPP;
19356 		}
19357 		if (!btf_type_is_func(t)) {
19358 			bpf_log(log, "attach_btf_id %u is not a function\n",
19359 				btf_id);
19360 			return -EINVAL;
19361 		}
19362 		if (prog_extension &&
19363 		    btf_check_type_match(log, prog, btf, t))
19364 			return -EINVAL;
19365 		t = btf_type_by_id(btf, t->type);
19366 		if (!btf_type_is_func_proto(t))
19367 			return -EINVAL;
19368 
19369 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19370 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19371 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19372 			return -EINVAL;
19373 
19374 		if (tgt_prog && conservative)
19375 			t = NULL;
19376 
19377 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19378 		if (ret < 0)
19379 			return ret;
19380 
19381 		if (tgt_prog) {
19382 			if (subprog == 0)
19383 				addr = (long) tgt_prog->bpf_func;
19384 			else
19385 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19386 		} else {
19387 			if (btf_is_module(btf)) {
19388 				mod = btf_try_get_module(btf);
19389 				if (mod)
19390 					addr = find_kallsyms_symbol_value(mod, tname);
19391 				else
19392 					addr = 0;
19393 			} else {
19394 				addr = kallsyms_lookup_name(tname);
19395 			}
19396 			if (!addr) {
19397 				module_put(mod);
19398 				bpf_log(log,
19399 					"The address of function %s cannot be found\n",
19400 					tname);
19401 				return -ENOENT;
19402 			}
19403 		}
19404 
19405 		if (prog->sleepable) {
19406 			ret = -EINVAL;
19407 			switch (prog->type) {
19408 			case BPF_PROG_TYPE_TRACING:
19409 				if (!check_attach_sleepable(btf_id, addr, tname))
19410 					ret = 0;
19411 				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
19412 				 * in the fmodret id set with the KF_SLEEPABLE flag.
19413 				 */
19414 				else {
19415 					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
19416 										prog);
19417 
19418 					if (flags && (*flags & KF_SLEEPABLE))
19419 						ret = 0;
19420 				}
19421 				break;
19422 			case BPF_PROG_TYPE_LSM:
19423 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
19424 				 * Only some of them are sleepable.
19425 				 */
19426 				if (bpf_lsm_is_sleepable_hook(btf_id))
19427 					ret = 0;
19428 				break;
19429 			default:
19430 				break;
19431 			}
19432 			if (ret) {
19433 				module_put(mod);
19434 				bpf_log(log, "%s is not sleepable\n", tname);
19435 				return ret;
19436 			}
19437 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19438 			if (tgt_prog) {
19439 				module_put(mod);
19440 				bpf_log(log, "can't modify return codes of BPF programs\n");
19441 				return -EINVAL;
19442 			}
19443 			ret = -EINVAL;
19444 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19445 			    !check_attach_modify_return(addr, tname))
19446 				ret = 0;
19447 			if (ret) {
19448 				module_put(mod);
19449 				bpf_log(log, "%s() is not modifiable\n", tname);
19450 				return ret;
19451 			}
19452 		}
19453 
19454 		break;
19455 	}
19456 	tgt_info->tgt_addr = addr;
19457 	tgt_info->tgt_name = tname;
19458 	tgt_info->tgt_type = t;
19459 	tgt_info->tgt_mod = mod;
19460 	return 0;
19461 }
19462 
19463 BTF_SET_START(btf_id_deny)
19464 BTF_ID_UNUSED
19465 #ifdef CONFIG_SMP
19466 BTF_ID(func, ___migrate_enable)
19467 BTF_ID(func, migrate_disable)
19468 BTF_ID(func, migrate_enable)
19469 #endif
19470 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19471 BTF_ID(func, rcu_read_unlock_strict)
19472 #endif
19473 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19474 BTF_ID(func, preempt_count_add)
19475 BTF_ID(func, preempt_count_sub)
19476 #endif
19477 #ifdef CONFIG_PREEMPT_RCU
19478 BTF_ID(func, __rcu_read_lock)
19479 BTF_ID(func, __rcu_read_unlock)
19480 #endif
19481 BTF_SET_END(btf_id_deny)
19482 
19483 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19484  * Currently, we must manually list all __noreturn functions here. Once a more
19485  * robust solution is implemented, this workaround can be removed.
19486  */
19487 BTF_SET_START(noreturn_deny)
19488 #ifdef CONFIG_IA32_EMULATION
19489 BTF_ID(func, __ia32_sys_exit)
19490 BTF_ID(func, __ia32_sys_exit_group)
19491 #endif
19492 #ifdef CONFIG_KUNIT
19493 BTF_ID(func, __kunit_abort)
19494 BTF_ID(func, kunit_try_catch_throw)
19495 #endif
19496 #ifdef CONFIG_MODULES
19497 BTF_ID(func, __module_put_and_kthread_exit)
19498 #endif
19499 #ifdef CONFIG_X86_64
19500 BTF_ID(func, __x64_sys_exit)
19501 BTF_ID(func, __x64_sys_exit_group)
19502 #endif
19503 BTF_ID(func, do_exit)
19504 BTF_ID(func, do_group_exit)
19505 BTF_ID(func, kthread_complete_and_exit)
19506 BTF_ID(func, make_task_dead)
19507 BTF_SET_END(noreturn_deny)
19508 
19509 static bool can_be_sleepable(struct bpf_prog *prog)
19510 {
19511 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19512 		switch (prog->expected_attach_type) {
19513 		case BPF_TRACE_FENTRY:
19514 		case BPF_TRACE_FEXIT:
19515 		case BPF_MODIFY_RETURN:
19516 		case BPF_TRACE_ITER:
19517 		case BPF_TRACE_FSESSION:
19518 			return true;
19519 		default:
19520 			return false;
19521 		}
19522 	}
19523 	return prog->type == BPF_PROG_TYPE_LSM ||
19524 	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19525 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
19526 }
19527 
19528 static int check_attach_btf_id(struct bpf_verifier_env *env)
19529 {
19530 	struct bpf_prog *prog = env->prog;
19531 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19532 	struct bpf_attach_target_info tgt_info = {};
19533 	u32 btf_id = prog->aux->attach_btf_id;
19534 	struct bpf_trampoline *tr;
19535 	int ret;
19536 	u64 key;
19537 
19538 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19539 		if (prog->sleepable)
19540 			/* attach_btf_id checked to be zero already */
19541 			return 0;
19542 		verbose(env, "Syscall programs can only be sleepable\n");
19543 		return -EINVAL;
19544 	}
19545 
19546 	if (prog->sleepable && !can_be_sleepable(prog)) {
19547 		verbose(env, "Only fentry/fexit/fsession/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
19548 		return -EINVAL;
19549 	}
19550 
19551 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19552 		return check_struct_ops_btf_id(env);
19553 
19554 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19555 	    prog->type != BPF_PROG_TYPE_LSM &&
19556 	    prog->type != BPF_PROG_TYPE_EXT)
19557 		return 0;
19558 
19559 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19560 	if (ret)
19561 		return ret;
19562 
19563 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19564 		/* to make freplace equivalent to their targets, they need to
19565 		 * inherit env->ops and expected_attach_type for the rest of the
19566 		 * verification
19567 		 */
19568 		env->ops = bpf_verifier_ops[tgt_prog->type];
19569 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19570 	}
19571 
19572 	/* store info about the attachment target that will be used later */
19573 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19574 	prog->aux->attach_func_name = tgt_info.tgt_name;
19575 	prog->aux->mod = tgt_info.tgt_mod;
19576 
19577 	if (tgt_prog) {
19578 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19579 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19580 	}
19581 
19582 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19583 		prog->aux->attach_btf_trace = true;
19584 		return 0;
19585 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19586 		return bpf_iter_prog_supported(prog);
19587 	}
19588 
19589 	if (prog->type == BPF_PROG_TYPE_LSM) {
19590 		ret = bpf_lsm_verify_prog(&env->log, prog);
19591 		if (ret < 0)
19592 			return ret;
19593 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19594 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19595 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19596 			tgt_info.tgt_name);
19597 		return -EINVAL;
19598 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19599 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19600 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19601 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19602 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19603 			tgt_info.tgt_name);
19604 		return -EINVAL;
19605 	}
19606 
19607 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19608 	tr = bpf_trampoline_get(key, &tgt_info);
19609 	if (!tr)
19610 		return -ENOMEM;
19611 
19612 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19613 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19614 
19615 	prog->aux->dst_trampoline = tr;
19616 	return 0;
19617 }
19618 
19619 struct btf *bpf_get_btf_vmlinux(void)
19620 {
19621 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19622 		mutex_lock(&bpf_verifier_lock);
19623 		if (!btf_vmlinux)
19624 			btf_vmlinux = btf_parse_vmlinux();
19625 		mutex_unlock(&bpf_verifier_lock);
19626 	}
19627 	return btf_vmlinux;
19628 }
19629 
19630 /*
19631  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19632  * this case expect that every file descriptor in the array is either a map or
19633  * a BTF. Everything else is considered to be trash.
19634  */
19635 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19636 {
19637 	struct bpf_map *map;
19638 	struct btf *btf;
19639 	CLASS(fd, f)(fd);
19640 	int err;
19641 
19642 	map = __bpf_map_get(f);
19643 	if (!IS_ERR(map)) {
19644 		err = __add_used_map(env, map);
19645 		if (err < 0)
19646 			return err;
19647 		return 0;
19648 	}
19649 
19650 	btf = __btf_get_by_fd(f);
19651 	if (!IS_ERR(btf)) {
19652 		btf_get(btf);
19653 		return __add_used_btf(env, btf);
19654 	}
19655 
19656 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19657 	return PTR_ERR(map);
19658 }
19659 
19660 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19661 {
19662 	size_t size = sizeof(int);
19663 	int ret;
19664 	int fd;
19665 	u32 i;
19666 
19667 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19668 
19669 	/*
19670 	 * The only difference between old (no fd_array_cnt is given) and new
19671 	 * APIs is that in the latter case the fd_array is expected to be
19672 	 * continuous and is scanned for map fds right away
19673 	 */
19674 	if (!attr->fd_array_cnt)
19675 		return 0;
19676 
19677 	/* Check for integer overflow */
19678 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19679 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19680 		return -EINVAL;
19681 	}
19682 
19683 	for (i = 0; i < attr->fd_array_cnt; i++) {
19684 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19685 			return -EFAULT;
19686 
19687 		ret = add_fd_from_fd_array(env, fd);
19688 		if (ret)
19689 			return ret;
19690 	}
19691 
19692 	return 0;
19693 }
19694 
19695 /* replace a generic kfunc with a specialized version if necessary */
19696 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19697 {
19698 	struct bpf_prog *prog = env->prog;
19699 	bool seen_direct_write;
19700 	void *xdp_kfunc;
19701 	bool is_rdonly;
19702 	u32 func_id = desc->func_id;
19703 	u16 offset = desc->offset;
19704 	unsigned long addr = desc->addr;
19705 
19706 	if (offset) /* return if module BTF is used */
19707 		return 0;
19708 
19709 	if (bpf_dev_bound_kfunc_id(func_id)) {
19710 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19711 		if (xdp_kfunc)
19712 			addr = (unsigned long)xdp_kfunc;
19713 		/* fallback to default kfunc when not supported by netdev */
19714 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19715 		seen_direct_write = env->seen_direct_write;
19716 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19717 
19718 		if (is_rdonly)
19719 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19720 
19721 		/* restore env->seen_direct_write to its original value, since
19722 		 * may_access_direct_pkt_data mutates it
19723 		 */
19724 		env->seen_direct_write = seen_direct_write;
19725 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19726 		if (bpf_lsm_has_d_inode_locked(prog))
19727 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19728 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19729 		if (bpf_lsm_has_d_inode_locked(prog))
19730 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19731 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19732 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19733 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19734 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19735 		if (env->insn_aux_data[insn_idx].non_sleepable)
19736 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19737 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19738 		if (env->insn_aux_data[insn_idx].non_sleepable)
19739 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19740 	}
19741 	desc->addr = addr;
19742 	return 0;
19743 }
19744 
19745 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19746 					    u16 struct_meta_reg,
19747 					    u16 node_offset_reg,
19748 					    struct bpf_insn *insn,
19749 					    struct bpf_insn *insn_buf,
19750 					    int *cnt)
19751 {
19752 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19753 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19754 
19755 	insn_buf[0] = addr[0];
19756 	insn_buf[1] = addr[1];
19757 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19758 	insn_buf[3] = *insn;
19759 	*cnt = 4;
19760 }
19761 
19762 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19763 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19764 {
19765 	struct bpf_kfunc_desc *desc;
19766 	int err;
19767 
19768 	if (!insn->imm) {
19769 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19770 		return -EINVAL;
19771 	}
19772 
19773 	*cnt = 0;
19774 
19775 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19776 	 * __bpf_call_base, unless the JIT needs to call functions that are
19777 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19778 	 */
19779 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19780 	if (!desc) {
19781 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19782 			     insn->imm);
19783 		return -EFAULT;
19784 	}
19785 
19786 	err = specialize_kfunc(env, desc, insn_idx);
19787 	if (err)
19788 		return err;
19789 
19790 	if (!bpf_jit_supports_far_kfunc_call())
19791 		insn->imm = BPF_CALL_IMM(desc->addr);
19792 
19793 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19794 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19795 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19796 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19797 
19798 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19799 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19800 				     insn_idx);
19801 			return -EFAULT;
19802 		}
19803 
19804 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19805 		insn_buf[1] = addr[0];
19806 		insn_buf[2] = addr[1];
19807 		insn_buf[3] = *insn;
19808 		*cnt = 4;
19809 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19810 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19811 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19812 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19813 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19814 
19815 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19816 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19817 				     insn_idx);
19818 			return -EFAULT;
19819 		}
19820 
19821 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19822 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19823 				     insn_idx);
19824 			return -EFAULT;
19825 		}
19826 
19827 		insn_buf[0] = addr[0];
19828 		insn_buf[1] = addr[1];
19829 		insn_buf[2] = *insn;
19830 		*cnt = 3;
19831 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19832 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19833 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19834 		int struct_meta_reg = BPF_REG_3;
19835 		int node_offset_reg = BPF_REG_4;
19836 
19837 		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
19838 		if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
19839 			struct_meta_reg = BPF_REG_4;
19840 			node_offset_reg = BPF_REG_5;
19841 		}
19842 
19843 		if (!kptr_struct_meta) {
19844 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19845 				     insn_idx);
19846 			return -EFAULT;
19847 		}
19848 
19849 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19850 						node_offset_reg, insn, insn_buf, cnt);
19851 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19852 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19853 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19854 		*cnt = 1;
19855 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19856 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19857 		/*
19858 		 * inline the bpf_session_is_return() for fsession:
19859 		 *   bool bpf_session_is_return(void *ctx)
19860 		 *   {
19861 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19862 		 *   }
19863 		 */
19864 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19865 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19866 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19867 		*cnt = 3;
19868 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19869 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19870 		/*
19871 		 * inline bpf_session_cookie() for fsession:
19872 		 *   __u64 *bpf_session_cookie(void *ctx)
19873 		 *   {
19874 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19875 		 *       return &((u64 *)ctx)[-off];
19876 		 *   }
19877 		 */
19878 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19879 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19880 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19881 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19882 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19883 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19884 		*cnt = 6;
19885 	}
19886 
19887 	if (env->insn_aux_data[insn_idx].arg_prog) {
19888 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19889 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19890 		int idx = *cnt;
19891 
19892 		insn_buf[idx++] = ld_addrs[0];
19893 		insn_buf[idx++] = ld_addrs[1];
19894 		insn_buf[idx++] = *insn;
19895 		*cnt = idx;
19896 	}
19897 	return 0;
19898 }
19899 
19900 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
19901 {
19902 	u64 start_time = ktime_get_ns();
19903 	struct bpf_verifier_env *env;
19904 	int i, len, ret = -EINVAL, err;
19905 	u32 log_true_size;
19906 	bool is_priv;
19907 
19908 	BTF_TYPE_EMIT(enum bpf_features);
19909 
19910 	/* no program is valid */
19911 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19912 		return -EINVAL;
19913 
19914 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19915 	 * allocate/free it every time bpf_check() is called
19916 	 */
19917 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19918 	if (!env)
19919 		return -ENOMEM;
19920 
19921 	env->bt.env = env;
19922 
19923 	len = (*prog)->len;
19924 	env->insn_aux_data =
19925 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19926 	ret = -ENOMEM;
19927 	if (!env->insn_aux_data)
19928 		goto err_free_env;
19929 	for (i = 0; i < len; i++)
19930 		env->insn_aux_data[i].orig_idx = i;
19931 	env->succ = bpf_iarray_realloc(NULL, 2);
19932 	if (!env->succ)
19933 		goto err_free_env;
19934 	env->prog = *prog;
19935 	env->ops = bpf_verifier_ops[env->prog->type];
19936 
19937 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19938 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19939 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19940 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19941 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19942 
19943 	bpf_get_btf_vmlinux();
19944 
19945 	/* grab the mutex to protect few globals used by verifier */
19946 	if (!is_priv)
19947 		mutex_lock(&bpf_verifier_lock);
19948 
19949 	/* user could have requested verbose verifier output
19950 	 * and supplied buffer to store the verification trace
19951 	 */
19952 	ret = bpf_vlog_init(&env->log, attr->log_level,
19953 			    (char __user *) (unsigned long) attr->log_buf,
19954 			    attr->log_size);
19955 	if (ret)
19956 		goto err_unlock;
19957 
19958 	ret = process_fd_array(env, attr, uattr);
19959 	if (ret)
19960 		goto skip_full_check;
19961 
19962 	mark_verifier_state_clean(env);
19963 
19964 	if (IS_ERR(btf_vmlinux)) {
19965 		/* Either gcc or pahole or kernel are broken. */
19966 		verbose(env, "in-kernel BTF is malformed\n");
19967 		ret = PTR_ERR(btf_vmlinux);
19968 		goto skip_full_check;
19969 	}
19970 
19971 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19972 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19973 		env->strict_alignment = true;
19974 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19975 		env->strict_alignment = false;
19976 
19977 	if (is_priv)
19978 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19979 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19980 
19981 	env->explored_states = kvzalloc_objs(struct list_head,
19982 					     state_htab_size(env),
19983 					     GFP_KERNEL_ACCOUNT);
19984 	ret = -ENOMEM;
19985 	if (!env->explored_states)
19986 		goto skip_full_check;
19987 
19988 	for (i = 0; i < state_htab_size(env); i++)
19989 		INIT_LIST_HEAD(&env->explored_states[i]);
19990 	INIT_LIST_HEAD(&env->free_list);
19991 
19992 	ret = bpf_check_btf_info_early(env, attr, uattr);
19993 	if (ret < 0)
19994 		goto skip_full_check;
19995 
19996 	ret = add_subprog_and_kfunc(env);
19997 	if (ret < 0)
19998 		goto skip_full_check;
19999 
20000 	ret = check_subprogs(env);
20001 	if (ret < 0)
20002 		goto skip_full_check;
20003 
20004 	ret = bpf_check_btf_info(env, attr, uattr);
20005 	if (ret < 0)
20006 		goto skip_full_check;
20007 
20008 	ret = check_and_resolve_insns(env);
20009 	if (ret < 0)
20010 		goto skip_full_check;
20011 
20012 	if (bpf_prog_is_offloaded(env->prog->aux)) {
20013 		ret = bpf_prog_offload_verifier_prep(env->prog);
20014 		if (ret)
20015 			goto skip_full_check;
20016 	}
20017 
20018 	ret = bpf_check_cfg(env);
20019 	if (ret < 0)
20020 		goto skip_full_check;
20021 
20022 	ret = bpf_compute_postorder(env);
20023 	if (ret < 0)
20024 		goto skip_full_check;
20025 
20026 	ret = bpf_stack_liveness_init(env);
20027 	if (ret)
20028 		goto skip_full_check;
20029 
20030 	ret = check_attach_btf_id(env);
20031 	if (ret)
20032 		goto skip_full_check;
20033 
20034 	ret = bpf_compute_const_regs(env);
20035 	if (ret < 0)
20036 		goto skip_full_check;
20037 
20038 	ret = bpf_prune_dead_branches(env);
20039 	if (ret < 0)
20040 		goto skip_full_check;
20041 
20042 	ret = sort_subprogs_topo(env);
20043 	if (ret < 0)
20044 		goto skip_full_check;
20045 
20046 	ret = bpf_compute_scc(env);
20047 	if (ret < 0)
20048 		goto skip_full_check;
20049 
20050 	ret = bpf_compute_live_registers(env);
20051 	if (ret < 0)
20052 		goto skip_full_check;
20053 
20054 	ret = mark_fastcall_patterns(env);
20055 	if (ret < 0)
20056 		goto skip_full_check;
20057 
20058 	ret = do_check_main(env);
20059 	ret = ret ?: do_check_subprogs(env);
20060 
20061 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
20062 		ret = bpf_prog_offload_finalize(env);
20063 
20064 skip_full_check:
20065 	kvfree(env->explored_states);
20066 
20067 	/* might decrease stack depth, keep it before passes that
20068 	 * allocate additional slots.
20069 	 */
20070 	if (ret == 0)
20071 		ret = bpf_remove_fastcall_spills_fills(env);
20072 
20073 	if (ret == 0)
20074 		ret = check_max_stack_depth(env);
20075 
20076 	/* instruction rewrites happen after this point */
20077 	if (ret == 0)
20078 		ret = bpf_optimize_bpf_loop(env);
20079 
20080 	if (is_priv) {
20081 		if (ret == 0)
20082 			bpf_opt_hard_wire_dead_code_branches(env);
20083 		if (ret == 0)
20084 			ret = bpf_opt_remove_dead_code(env);
20085 		if (ret == 0)
20086 			ret = bpf_opt_remove_nops(env);
20087 	} else {
20088 		if (ret == 0)
20089 			sanitize_dead_code(env);
20090 	}
20091 
20092 	if (ret == 0)
20093 		/* program is valid, convert *(u32*)(ctx + off) accesses */
20094 		ret = bpf_convert_ctx_accesses(env);
20095 
20096 	if (ret == 0)
20097 		ret = bpf_do_misc_fixups(env);
20098 
20099 	/* do 32-bit optimization after insn patching has done so those patched
20100 	 * insns could be handled correctly.
20101 	 */
20102 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
20103 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
20104 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
20105 								     : false;
20106 	}
20107 
20108 	if (ret == 0)
20109 		ret = bpf_fixup_call_args(env);
20110 
20111 	env->verification_time = ktime_get_ns() - start_time;
20112 	print_verification_stats(env);
20113 	env->prog->aux->verified_insns = env->insn_processed;
20114 
20115 	/* preserve original error even if log finalization is successful */
20116 	err = bpf_vlog_finalize(&env->log, &log_true_size);
20117 	if (err)
20118 		ret = err;
20119 
20120 	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
20121 	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
20122 				  &log_true_size, sizeof(log_true_size))) {
20123 		ret = -EFAULT;
20124 		goto err_release_maps;
20125 	}
20126 
20127 	if (ret)
20128 		goto err_release_maps;
20129 
20130 	if (env->used_map_cnt) {
20131 		/* if program passed verifier, update used_maps in bpf_prog_info */
20132 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
20133 							 env->used_map_cnt,
20134 							 GFP_KERNEL_ACCOUNT);
20135 
20136 		if (!env->prog->aux->used_maps) {
20137 			ret = -ENOMEM;
20138 			goto err_release_maps;
20139 		}
20140 
20141 		memcpy(env->prog->aux->used_maps, env->used_maps,
20142 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
20143 		env->prog->aux->used_map_cnt = env->used_map_cnt;
20144 	}
20145 	if (env->used_btf_cnt) {
20146 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
20147 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
20148 							 env->used_btf_cnt,
20149 							 GFP_KERNEL_ACCOUNT);
20150 		if (!env->prog->aux->used_btfs) {
20151 			ret = -ENOMEM;
20152 			goto err_release_maps;
20153 		}
20154 
20155 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
20156 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
20157 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
20158 	}
20159 	if (env->used_map_cnt || env->used_btf_cnt) {
20160 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
20161 		 * bpf_ld_imm64 instructions
20162 		 */
20163 		convert_pseudo_ld_imm64(env);
20164 	}
20165 
20166 	adjust_btf_func(env);
20167 
20168 	/* extension progs temporarily inherit the attach_type of their targets
20169 	   for verification purposes, so set it back to zero before returning
20170 	 */
20171 	if (env->prog->type == BPF_PROG_TYPE_EXT)
20172 		env->prog->expected_attach_type = 0;
20173 
20174 	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
20175 
20176 err_release_maps:
20177 	if (ret)
20178 		release_insn_arrays(env);
20179 	if (!env->prog->aux->used_maps)
20180 		/* if we didn't copy map pointers into bpf_prog_info, release
20181 		 * them now. Otherwise free_used_maps() will release them.
20182 		 */
20183 		release_maps(env);
20184 	if (!env->prog->aux->used_btfs)
20185 		release_btfs(env);
20186 
20187 	*prog = env->prog;
20188 
20189 	module_put(env->attach_btf_mod);
20190 err_unlock:
20191 	if (!is_priv)
20192 		mutex_unlock(&bpf_verifier_lock);
20193 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
20194 	vfree(env->insn_aux_data);
20195 err_free_env:
20196 	bpf_stack_liveness_free(env);
20197 	kvfree(env->cfg.insn_postorder);
20198 	kvfree(env->scc_info);
20199 	kvfree(env->succ);
20200 	kvfree(env->gotox_tmp_buf);
20201 	kvfree(env);
20202 	return ret;
20203 }
20204