xref: /linux/kernel/bpf/verifier.c (revision 06b9cce42634a50f2840777a66553b02320db5ef)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 
27 #include "disasm.h"
28 
29 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
30 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
31 	[_id] = & _name ## _verifier_ops,
32 #define BPF_MAP_TYPE(_id, _ops)
33 #define BPF_LINK_TYPE(_id, _name)
34 #include <linux/bpf_types.h>
35 #undef BPF_PROG_TYPE
36 #undef BPF_MAP_TYPE
37 #undef BPF_LINK_TYPE
38 };
39 
40 /* bpf_check() is a static code analyzer that walks eBPF program
41  * instruction by instruction and updates register/stack state.
42  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
43  *
44  * The first pass is depth-first-search to check that the program is a DAG.
45  * It rejects the following programs:
46  * - larger than BPF_MAXINSNS insns
47  * - if loop is present (detected via back-edge)
48  * - unreachable insns exist (shouldn't be a forest. program = one function)
49  * - out of bounds or malformed jumps
50  * The second pass is all possible path descent from the 1st insn.
51  * Since it's analyzing all paths through the program, the length of the
52  * analysis is limited to 64k insn, which may be hit even if total number of
53  * insn is less then 4K, but there are too many branches that change stack/regs.
54  * Number of 'branches to be analyzed' is limited to 1k
55  *
56  * On entry to each instruction, each register has a type, and the instruction
57  * changes the types of the registers depending on instruction semantics.
58  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
59  * copied to R1.
60  *
61  * All registers are 64-bit.
62  * R0 - return register
63  * R1-R5 argument passing registers
64  * R6-R9 callee saved registers
65  * R10 - frame pointer read-only
66  *
67  * At the start of BPF program the register R1 contains a pointer to bpf_context
68  * and has type PTR_TO_CTX.
69  *
70  * Verifier tracks arithmetic operations on pointers in case:
71  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
72  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
73  * 1st insn copies R10 (which has FRAME_PTR) type into R1
74  * and 2nd arithmetic instruction is pattern matched to recognize
75  * that it wants to construct a pointer to some element within stack.
76  * So after 2nd insn, the register R1 has type PTR_TO_STACK
77  * (and -20 constant is saved for further stack bounds checking).
78  * Meaning that this reg is a pointer to stack plus known immediate constant.
79  *
80  * Most of the time the registers have SCALAR_VALUE type, which
81  * means the register has some value, but it's not a valid pointer.
82  * (like pointer plus pointer becomes SCALAR_VALUE type)
83  *
84  * When verifier sees load or store instructions the type of base register
85  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
86  * four pointer types recognized by check_mem_access() function.
87  *
88  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
89  * and the range of [ptr, ptr + map's value_size) is accessible.
90  *
91  * registers used to pass values to function calls are checked against
92  * function argument constraints.
93  *
94  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
95  * It means that the register type passed to this function must be
96  * PTR_TO_STACK and it will be used inside the function as
97  * 'pointer to map element key'
98  *
99  * For example the argument constraints for bpf_map_lookup_elem():
100  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
101  *   .arg1_type = ARG_CONST_MAP_PTR,
102  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
103  *
104  * ret_type says that this function returns 'pointer to map elem value or null'
105  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
106  * 2nd argument should be a pointer to stack, which will be used inside
107  * the helper function as a pointer to map element key.
108  *
109  * On the kernel side the helper function looks like:
110  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
111  * {
112  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
113  *    void *key = (void *) (unsigned long) r2;
114  *    void *value;
115  *
116  *    here kernel can access 'key' and 'map' pointers safely, knowing that
117  *    [key, key + map->key_size) bytes are valid and were initialized on
118  *    the stack of eBPF program.
119  * }
120  *
121  * Corresponding eBPF program may look like:
122  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
123  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
124  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
125  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
126  * here verifier looks at prototype of map_lookup_elem() and sees:
127  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
128  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
129  *
130  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
131  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
132  * and were initialized prior to this call.
133  * If it's ok, then verifier allows this BPF_CALL insn and looks at
134  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
135  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
136  * returns either pointer to map value or NULL.
137  *
138  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
139  * insn, the register holding that pointer in the true branch changes state to
140  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
141  * branch. See check_cond_jmp_op().
142  *
143  * After the call R0 is set to return type of the function and registers R1-R5
144  * are set to NOT_INIT to indicate that they are no longer readable.
145  *
146  * The following reference types represent a potential reference to a kernel
147  * resource which, after first being allocated, must be checked and freed by
148  * the BPF program:
149  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
150  *
151  * When the verifier sees a helper call return a reference type, it allocates a
152  * pointer id for the reference and stores it in the current function state.
153  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
154  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
155  * passes through a NULL-check conditional. For the branch wherein the state is
156  * changed to CONST_IMM, the verifier releases the reference.
157  *
158  * For each helper function that allocates a reference, such as
159  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
160  * bpf_sk_release(). When a reference type passes into the release function,
161  * the verifier also releases the reference. If any unchecked or unreleased
162  * reference remains at the end of the program, the verifier rejects it.
163  */
164 
165 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
166 struct bpf_verifier_stack_elem {
167 	/* verifer state is 'st'
168 	 * before processing instruction 'insn_idx'
169 	 * and after processing instruction 'prev_insn_idx'
170 	 */
171 	struct bpf_verifier_state st;
172 	int insn_idx;
173 	int prev_insn_idx;
174 	struct bpf_verifier_stack_elem *next;
175 	/* length of verifier log at the time this state was pushed on stack */
176 	u32 log_pos;
177 };
178 
179 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
180 #define BPF_COMPLEXITY_LIMIT_STATES	64
181 
182 #define BPF_MAP_KEY_POISON	(1ULL << 63)
183 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
184 
185 #define BPF_MAP_PTR_UNPRIV	1UL
186 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
187 					  POISON_POINTER_DELTA))
188 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
189 
190 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
191 {
192 	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
193 }
194 
195 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
196 {
197 	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
198 }
199 
200 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
201 			      const struct bpf_map *map, bool unpriv)
202 {
203 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
204 	unpriv |= bpf_map_ptr_unpriv(aux);
205 	aux->map_ptr_state = (unsigned long)map |
206 			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
207 }
208 
209 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
210 {
211 	return aux->map_key_state & BPF_MAP_KEY_POISON;
212 }
213 
214 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
215 {
216 	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
217 }
218 
219 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
220 {
221 	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
222 }
223 
224 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
225 {
226 	bool poisoned = bpf_map_key_poisoned(aux);
227 
228 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
229 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
230 }
231 
232 static bool bpf_pseudo_call(const struct bpf_insn *insn)
233 {
234 	return insn->code == (BPF_JMP | BPF_CALL) &&
235 	       insn->src_reg == BPF_PSEUDO_CALL;
236 }
237 
238 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
239 {
240 	return insn->code == (BPF_JMP | BPF_CALL) &&
241 	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
242 }
243 
244 struct bpf_call_arg_meta {
245 	struct bpf_map *map_ptr;
246 	bool raw_mode;
247 	bool pkt_access;
248 	int regno;
249 	int access_size;
250 	int mem_size;
251 	u64 msize_max_value;
252 	int ref_obj_id;
253 	int map_uid;
254 	int func_id;
255 	struct btf *btf;
256 	u32 btf_id;
257 	struct btf *ret_btf;
258 	u32 ret_btf_id;
259 	u32 subprogno;
260 };
261 
262 struct btf *btf_vmlinux;
263 
264 static DEFINE_MUTEX(bpf_verifier_lock);
265 
266 static const struct bpf_line_info *
267 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
268 {
269 	const struct bpf_line_info *linfo;
270 	const struct bpf_prog *prog;
271 	u32 i, nr_linfo;
272 
273 	prog = env->prog;
274 	nr_linfo = prog->aux->nr_linfo;
275 
276 	if (!nr_linfo || insn_off >= prog->len)
277 		return NULL;
278 
279 	linfo = prog->aux->linfo;
280 	for (i = 1; i < nr_linfo; i++)
281 		if (insn_off < linfo[i].insn_off)
282 			break;
283 
284 	return &linfo[i - 1];
285 }
286 
287 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
288 		       va_list args)
289 {
290 	unsigned int n;
291 
292 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
293 
294 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
295 		  "verifier log line truncated - local buffer too short\n");
296 
297 	if (log->level == BPF_LOG_KERNEL) {
298 		bool newline = n > 0 && log->kbuf[n - 1] == '\n';
299 
300 		pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
301 		return;
302 	}
303 
304 	n = min(log->len_total - log->len_used - 1, n);
305 	log->kbuf[n] = '\0';
306 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
307 		log->len_used += n;
308 	else
309 		log->ubuf = NULL;
310 }
311 
312 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
313 {
314 	char zero = 0;
315 
316 	if (!bpf_verifier_log_needed(log))
317 		return;
318 
319 	log->len_used = new_pos;
320 	if (put_user(zero, log->ubuf + new_pos))
321 		log->ubuf = NULL;
322 }
323 
324 /* log_level controls verbosity level of eBPF verifier.
325  * bpf_verifier_log_write() is used to dump the verification trace to the log,
326  * so the user can figure out what's wrong with the program
327  */
328 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
329 					   const char *fmt, ...)
330 {
331 	va_list args;
332 
333 	if (!bpf_verifier_log_needed(&env->log))
334 		return;
335 
336 	va_start(args, fmt);
337 	bpf_verifier_vlog(&env->log, fmt, args);
338 	va_end(args);
339 }
340 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
341 
342 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
343 {
344 	struct bpf_verifier_env *env = private_data;
345 	va_list args;
346 
347 	if (!bpf_verifier_log_needed(&env->log))
348 		return;
349 
350 	va_start(args, fmt);
351 	bpf_verifier_vlog(&env->log, fmt, args);
352 	va_end(args);
353 }
354 
355 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
356 			    const char *fmt, ...)
357 {
358 	va_list args;
359 
360 	if (!bpf_verifier_log_needed(log))
361 		return;
362 
363 	va_start(args, fmt);
364 	bpf_verifier_vlog(log, fmt, args);
365 	va_end(args);
366 }
367 
368 static const char *ltrim(const char *s)
369 {
370 	while (isspace(*s))
371 		s++;
372 
373 	return s;
374 }
375 
376 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
377 					 u32 insn_off,
378 					 const char *prefix_fmt, ...)
379 {
380 	const struct bpf_line_info *linfo;
381 
382 	if (!bpf_verifier_log_needed(&env->log))
383 		return;
384 
385 	linfo = find_linfo(env, insn_off);
386 	if (!linfo || linfo == env->prev_linfo)
387 		return;
388 
389 	if (prefix_fmt) {
390 		va_list args;
391 
392 		va_start(args, prefix_fmt);
393 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
394 		va_end(args);
395 	}
396 
397 	verbose(env, "%s\n",
398 		ltrim(btf_name_by_offset(env->prog->aux->btf,
399 					 linfo->line_off)));
400 
401 	env->prev_linfo = linfo;
402 }
403 
404 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
405 				   struct bpf_reg_state *reg,
406 				   struct tnum *range, const char *ctx,
407 				   const char *reg_name)
408 {
409 	char tn_buf[48];
410 
411 	verbose(env, "At %s the register %s ", ctx, reg_name);
412 	if (!tnum_is_unknown(reg->var_off)) {
413 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
414 		verbose(env, "has value %s", tn_buf);
415 	} else {
416 		verbose(env, "has unknown scalar value");
417 	}
418 	tnum_strn(tn_buf, sizeof(tn_buf), *range);
419 	verbose(env, " should have been in %s\n", tn_buf);
420 }
421 
422 static bool type_is_pkt_pointer(enum bpf_reg_type type)
423 {
424 	return type == PTR_TO_PACKET ||
425 	       type == PTR_TO_PACKET_META;
426 }
427 
428 static bool type_is_sk_pointer(enum bpf_reg_type type)
429 {
430 	return type == PTR_TO_SOCKET ||
431 		type == PTR_TO_SOCK_COMMON ||
432 		type == PTR_TO_TCP_SOCK ||
433 		type == PTR_TO_XDP_SOCK;
434 }
435 
436 static bool reg_type_not_null(enum bpf_reg_type type)
437 {
438 	return type == PTR_TO_SOCKET ||
439 		type == PTR_TO_TCP_SOCK ||
440 		type == PTR_TO_MAP_VALUE ||
441 		type == PTR_TO_MAP_KEY ||
442 		type == PTR_TO_SOCK_COMMON;
443 }
444 
445 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
446 {
447 	return reg->type == PTR_TO_MAP_VALUE &&
448 		map_value_has_spin_lock(reg->map_ptr);
449 }
450 
451 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
452 {
453 	return base_type(type) == PTR_TO_SOCKET ||
454 		base_type(type) == PTR_TO_TCP_SOCK ||
455 		base_type(type) == PTR_TO_MEM ||
456 		base_type(type) == PTR_TO_BTF_ID;
457 }
458 
459 static bool type_is_rdonly_mem(u32 type)
460 {
461 	return type & MEM_RDONLY;
462 }
463 
464 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
465 {
466 	return type == ARG_PTR_TO_SOCK_COMMON;
467 }
468 
469 static bool type_may_be_null(u32 type)
470 {
471 	return type & PTR_MAYBE_NULL;
472 }
473 
474 /* Determine whether the function releases some resources allocated by another
475  * function call. The first reference type argument will be assumed to be
476  * released by release_reference().
477  */
478 static bool is_release_function(enum bpf_func_id func_id)
479 {
480 	return func_id == BPF_FUNC_sk_release ||
481 	       func_id == BPF_FUNC_ringbuf_submit ||
482 	       func_id == BPF_FUNC_ringbuf_discard;
483 }
484 
485 static bool may_be_acquire_function(enum bpf_func_id func_id)
486 {
487 	return func_id == BPF_FUNC_sk_lookup_tcp ||
488 		func_id == BPF_FUNC_sk_lookup_udp ||
489 		func_id == BPF_FUNC_skc_lookup_tcp ||
490 		func_id == BPF_FUNC_map_lookup_elem ||
491 	        func_id == BPF_FUNC_ringbuf_reserve;
492 }
493 
494 static bool is_acquire_function(enum bpf_func_id func_id,
495 				const struct bpf_map *map)
496 {
497 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
498 
499 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
500 	    func_id == BPF_FUNC_sk_lookup_udp ||
501 	    func_id == BPF_FUNC_skc_lookup_tcp ||
502 	    func_id == BPF_FUNC_ringbuf_reserve)
503 		return true;
504 
505 	if (func_id == BPF_FUNC_map_lookup_elem &&
506 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
507 	     map_type == BPF_MAP_TYPE_SOCKHASH))
508 		return true;
509 
510 	return false;
511 }
512 
513 static bool is_ptr_cast_function(enum bpf_func_id func_id)
514 {
515 	return func_id == BPF_FUNC_tcp_sock ||
516 		func_id == BPF_FUNC_sk_fullsock ||
517 		func_id == BPF_FUNC_skc_to_tcp_sock ||
518 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
519 		func_id == BPF_FUNC_skc_to_udp6_sock ||
520 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
521 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
522 }
523 
524 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
525 {
526 	return BPF_CLASS(insn->code) == BPF_STX &&
527 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
528 	       insn->imm == BPF_CMPXCHG;
529 }
530 
531 /* string representation of 'enum bpf_reg_type'
532  *
533  * Note that reg_type_str() can not appear more than once in a single verbose()
534  * statement.
535  */
536 static const char *reg_type_str(struct bpf_verifier_env *env,
537 				enum bpf_reg_type type)
538 {
539 	char postfix[16] = {0}, prefix[32] = {0};
540 	static const char * const str[] = {
541 		[NOT_INIT]		= "?",
542 		[SCALAR_VALUE]		= "inv",
543 		[PTR_TO_CTX]		= "ctx",
544 		[CONST_PTR_TO_MAP]	= "map_ptr",
545 		[PTR_TO_MAP_VALUE]	= "map_value",
546 		[PTR_TO_STACK]		= "fp",
547 		[PTR_TO_PACKET]		= "pkt",
548 		[PTR_TO_PACKET_META]	= "pkt_meta",
549 		[PTR_TO_PACKET_END]	= "pkt_end",
550 		[PTR_TO_FLOW_KEYS]	= "flow_keys",
551 		[PTR_TO_SOCKET]		= "sock",
552 		[PTR_TO_SOCK_COMMON]	= "sock_common",
553 		[PTR_TO_TCP_SOCK]	= "tcp_sock",
554 		[PTR_TO_TP_BUFFER]	= "tp_buffer",
555 		[PTR_TO_XDP_SOCK]	= "xdp_sock",
556 		[PTR_TO_BTF_ID]		= "ptr_",
557 		[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
558 		[PTR_TO_MEM]		= "mem",
559 		[PTR_TO_BUF]		= "buf",
560 		[PTR_TO_FUNC]		= "func",
561 		[PTR_TO_MAP_KEY]	= "map_key",
562 	};
563 
564 	if (type & PTR_MAYBE_NULL) {
565 		if (base_type(type) == PTR_TO_BTF_ID ||
566 		    base_type(type) == PTR_TO_PERCPU_BTF_ID)
567 			strncpy(postfix, "or_null_", 16);
568 		else
569 			strncpy(postfix, "_or_null", 16);
570 	}
571 
572 	if (type & MEM_RDONLY)
573 		strncpy(prefix, "rdonly_", 32);
574 	if (type & MEM_ALLOC)
575 		strncpy(prefix, "alloc_", 32);
576 	if (type & MEM_USER)
577 		strncpy(prefix, "user_", 32);
578 
579 	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
580 		 prefix, str[base_type(type)], postfix);
581 	return env->type_str_buf;
582 }
583 
584 static char slot_type_char[] = {
585 	[STACK_INVALID]	= '?',
586 	[STACK_SPILL]	= 'r',
587 	[STACK_MISC]	= 'm',
588 	[STACK_ZERO]	= '0',
589 };
590 
591 static void print_liveness(struct bpf_verifier_env *env,
592 			   enum bpf_reg_liveness live)
593 {
594 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
595 	    verbose(env, "_");
596 	if (live & REG_LIVE_READ)
597 		verbose(env, "r");
598 	if (live & REG_LIVE_WRITTEN)
599 		verbose(env, "w");
600 	if (live & REG_LIVE_DONE)
601 		verbose(env, "D");
602 }
603 
604 static struct bpf_func_state *func(struct bpf_verifier_env *env,
605 				   const struct bpf_reg_state *reg)
606 {
607 	struct bpf_verifier_state *cur = env->cur_state;
608 
609 	return cur->frame[reg->frameno];
610 }
611 
612 static const char *kernel_type_name(const struct btf* btf, u32 id)
613 {
614 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
615 }
616 
617 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
618 {
619 	env->scratched_regs |= 1U << regno;
620 }
621 
622 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
623 {
624 	env->scratched_stack_slots |= 1ULL << spi;
625 }
626 
627 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
628 {
629 	return (env->scratched_regs >> regno) & 1;
630 }
631 
632 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
633 {
634 	return (env->scratched_stack_slots >> regno) & 1;
635 }
636 
637 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
638 {
639 	return env->scratched_regs || env->scratched_stack_slots;
640 }
641 
642 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
643 {
644 	env->scratched_regs = 0U;
645 	env->scratched_stack_slots = 0ULL;
646 }
647 
648 /* Used for printing the entire verifier state. */
649 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
650 {
651 	env->scratched_regs = ~0U;
652 	env->scratched_stack_slots = ~0ULL;
653 }
654 
655 /* The reg state of a pointer or a bounded scalar was saved when
656  * it was spilled to the stack.
657  */
658 static bool is_spilled_reg(const struct bpf_stack_state *stack)
659 {
660 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
661 }
662 
663 static void scrub_spilled_slot(u8 *stype)
664 {
665 	if (*stype != STACK_INVALID)
666 		*stype = STACK_MISC;
667 }
668 
669 static void print_verifier_state(struct bpf_verifier_env *env,
670 				 const struct bpf_func_state *state,
671 				 bool print_all)
672 {
673 	const struct bpf_reg_state *reg;
674 	enum bpf_reg_type t;
675 	int i;
676 
677 	if (state->frameno)
678 		verbose(env, " frame%d:", state->frameno);
679 	for (i = 0; i < MAX_BPF_REG; i++) {
680 		reg = &state->regs[i];
681 		t = reg->type;
682 		if (t == NOT_INIT)
683 			continue;
684 		if (!print_all && !reg_scratched(env, i))
685 			continue;
686 		verbose(env, " R%d", i);
687 		print_liveness(env, reg->live);
688 		verbose(env, "=%s", reg_type_str(env, t));
689 		if (t == SCALAR_VALUE && reg->precise)
690 			verbose(env, "P");
691 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
692 		    tnum_is_const(reg->var_off)) {
693 			/* reg->off should be 0 for SCALAR_VALUE */
694 			verbose(env, "%lld", reg->var_off.value + reg->off);
695 		} else {
696 			if (base_type(t) == PTR_TO_BTF_ID ||
697 			    base_type(t) == PTR_TO_PERCPU_BTF_ID)
698 				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
699 			verbose(env, "(id=%d", reg->id);
700 			if (reg_type_may_be_refcounted_or_null(t))
701 				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
702 			if (t != SCALAR_VALUE)
703 				verbose(env, ",off=%d", reg->off);
704 			if (type_is_pkt_pointer(t))
705 				verbose(env, ",r=%d", reg->range);
706 			else if (base_type(t) == CONST_PTR_TO_MAP ||
707 				 base_type(t) == PTR_TO_MAP_KEY ||
708 				 base_type(t) == PTR_TO_MAP_VALUE)
709 				verbose(env, ",ks=%d,vs=%d",
710 					reg->map_ptr->key_size,
711 					reg->map_ptr->value_size);
712 			if (tnum_is_const(reg->var_off)) {
713 				/* Typically an immediate SCALAR_VALUE, but
714 				 * could be a pointer whose offset is too big
715 				 * for reg->off
716 				 */
717 				verbose(env, ",imm=%llx", reg->var_off.value);
718 			} else {
719 				if (reg->smin_value != reg->umin_value &&
720 				    reg->smin_value != S64_MIN)
721 					verbose(env, ",smin_value=%lld",
722 						(long long)reg->smin_value);
723 				if (reg->smax_value != reg->umax_value &&
724 				    reg->smax_value != S64_MAX)
725 					verbose(env, ",smax_value=%lld",
726 						(long long)reg->smax_value);
727 				if (reg->umin_value != 0)
728 					verbose(env, ",umin_value=%llu",
729 						(unsigned long long)reg->umin_value);
730 				if (reg->umax_value != U64_MAX)
731 					verbose(env, ",umax_value=%llu",
732 						(unsigned long long)reg->umax_value);
733 				if (!tnum_is_unknown(reg->var_off)) {
734 					char tn_buf[48];
735 
736 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
737 					verbose(env, ",var_off=%s", tn_buf);
738 				}
739 				if (reg->s32_min_value != reg->smin_value &&
740 				    reg->s32_min_value != S32_MIN)
741 					verbose(env, ",s32_min_value=%d",
742 						(int)(reg->s32_min_value));
743 				if (reg->s32_max_value != reg->smax_value &&
744 				    reg->s32_max_value != S32_MAX)
745 					verbose(env, ",s32_max_value=%d",
746 						(int)(reg->s32_max_value));
747 				if (reg->u32_min_value != reg->umin_value &&
748 				    reg->u32_min_value != U32_MIN)
749 					verbose(env, ",u32_min_value=%d",
750 						(int)(reg->u32_min_value));
751 				if (reg->u32_max_value != reg->umax_value &&
752 				    reg->u32_max_value != U32_MAX)
753 					verbose(env, ",u32_max_value=%d",
754 						(int)(reg->u32_max_value));
755 			}
756 			verbose(env, ")");
757 		}
758 	}
759 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
760 		char types_buf[BPF_REG_SIZE + 1];
761 		bool valid = false;
762 		int j;
763 
764 		for (j = 0; j < BPF_REG_SIZE; j++) {
765 			if (state->stack[i].slot_type[j] != STACK_INVALID)
766 				valid = true;
767 			types_buf[j] = slot_type_char[
768 					state->stack[i].slot_type[j]];
769 		}
770 		types_buf[BPF_REG_SIZE] = 0;
771 		if (!valid)
772 			continue;
773 		if (!print_all && !stack_slot_scratched(env, i))
774 			continue;
775 		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
776 		print_liveness(env, state->stack[i].spilled_ptr.live);
777 		if (is_spilled_reg(&state->stack[i])) {
778 			reg = &state->stack[i].spilled_ptr;
779 			t = reg->type;
780 			verbose(env, "=%s", reg_type_str(env, t));
781 			if (t == SCALAR_VALUE && reg->precise)
782 				verbose(env, "P");
783 			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
784 				verbose(env, "%lld", reg->var_off.value + reg->off);
785 		} else {
786 			verbose(env, "=%s", types_buf);
787 		}
788 	}
789 	if (state->acquired_refs && state->refs[0].id) {
790 		verbose(env, " refs=%d", state->refs[0].id);
791 		for (i = 1; i < state->acquired_refs; i++)
792 			if (state->refs[i].id)
793 				verbose(env, ",%d", state->refs[i].id);
794 	}
795 	if (state->in_callback_fn)
796 		verbose(env, " cb");
797 	if (state->in_async_callback_fn)
798 		verbose(env, " async_cb");
799 	verbose(env, "\n");
800 	mark_verifier_state_clean(env);
801 }
802 
803 static inline u32 vlog_alignment(u32 pos)
804 {
805 	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
806 			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
807 }
808 
809 static void print_insn_state(struct bpf_verifier_env *env,
810 			     const struct bpf_func_state *state)
811 {
812 	if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
813 		/* remove new line character */
814 		bpf_vlog_reset(&env->log, env->prev_log_len - 1);
815 		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
816 	} else {
817 		verbose(env, "%d:", env->insn_idx);
818 	}
819 	print_verifier_state(env, state, false);
820 }
821 
822 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
823  * small to hold src. This is different from krealloc since we don't want to preserve
824  * the contents of dst.
825  *
826  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
827  * not be allocated.
828  */
829 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
830 {
831 	size_t bytes;
832 
833 	if (ZERO_OR_NULL_PTR(src))
834 		goto out;
835 
836 	if (unlikely(check_mul_overflow(n, size, &bytes)))
837 		return NULL;
838 
839 	if (ksize(dst) < bytes) {
840 		kfree(dst);
841 		dst = kmalloc_track_caller(bytes, flags);
842 		if (!dst)
843 			return NULL;
844 	}
845 
846 	memcpy(dst, src, bytes);
847 out:
848 	return dst ? dst : ZERO_SIZE_PTR;
849 }
850 
851 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
852  * small to hold new_n items. new items are zeroed out if the array grows.
853  *
854  * Contrary to krealloc_array, does not free arr if new_n is zero.
855  */
856 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
857 {
858 	if (!new_n || old_n == new_n)
859 		goto out;
860 
861 	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
862 	if (!arr)
863 		return NULL;
864 
865 	if (new_n > old_n)
866 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
867 
868 out:
869 	return arr ? arr : ZERO_SIZE_PTR;
870 }
871 
872 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
873 {
874 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
875 			       sizeof(struct bpf_reference_state), GFP_KERNEL);
876 	if (!dst->refs)
877 		return -ENOMEM;
878 
879 	dst->acquired_refs = src->acquired_refs;
880 	return 0;
881 }
882 
883 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
884 {
885 	size_t n = src->allocated_stack / BPF_REG_SIZE;
886 
887 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
888 				GFP_KERNEL);
889 	if (!dst->stack)
890 		return -ENOMEM;
891 
892 	dst->allocated_stack = src->allocated_stack;
893 	return 0;
894 }
895 
896 static int resize_reference_state(struct bpf_func_state *state, size_t n)
897 {
898 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
899 				    sizeof(struct bpf_reference_state));
900 	if (!state->refs)
901 		return -ENOMEM;
902 
903 	state->acquired_refs = n;
904 	return 0;
905 }
906 
907 static int grow_stack_state(struct bpf_func_state *state, int size)
908 {
909 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
910 
911 	if (old_n >= n)
912 		return 0;
913 
914 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
915 	if (!state->stack)
916 		return -ENOMEM;
917 
918 	state->allocated_stack = size;
919 	return 0;
920 }
921 
922 /* Acquire a pointer id from the env and update the state->refs to include
923  * this new pointer reference.
924  * On success, returns a valid pointer id to associate with the register
925  * On failure, returns a negative errno.
926  */
927 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
928 {
929 	struct bpf_func_state *state = cur_func(env);
930 	int new_ofs = state->acquired_refs;
931 	int id, err;
932 
933 	err = resize_reference_state(state, state->acquired_refs + 1);
934 	if (err)
935 		return err;
936 	id = ++env->id_gen;
937 	state->refs[new_ofs].id = id;
938 	state->refs[new_ofs].insn_idx = insn_idx;
939 
940 	return id;
941 }
942 
943 /* release function corresponding to acquire_reference_state(). Idempotent. */
944 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
945 {
946 	int i, last_idx;
947 
948 	last_idx = state->acquired_refs - 1;
949 	for (i = 0; i < state->acquired_refs; i++) {
950 		if (state->refs[i].id == ptr_id) {
951 			if (last_idx && i != last_idx)
952 				memcpy(&state->refs[i], &state->refs[last_idx],
953 				       sizeof(*state->refs));
954 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
955 			state->acquired_refs--;
956 			return 0;
957 		}
958 	}
959 	return -EINVAL;
960 }
961 
962 static void free_func_state(struct bpf_func_state *state)
963 {
964 	if (!state)
965 		return;
966 	kfree(state->refs);
967 	kfree(state->stack);
968 	kfree(state);
969 }
970 
971 static void clear_jmp_history(struct bpf_verifier_state *state)
972 {
973 	kfree(state->jmp_history);
974 	state->jmp_history = NULL;
975 	state->jmp_history_cnt = 0;
976 }
977 
978 static void free_verifier_state(struct bpf_verifier_state *state,
979 				bool free_self)
980 {
981 	int i;
982 
983 	for (i = 0; i <= state->curframe; i++) {
984 		free_func_state(state->frame[i]);
985 		state->frame[i] = NULL;
986 	}
987 	clear_jmp_history(state);
988 	if (free_self)
989 		kfree(state);
990 }
991 
992 /* copy verifier state from src to dst growing dst stack space
993  * when necessary to accommodate larger src stack
994  */
995 static int copy_func_state(struct bpf_func_state *dst,
996 			   const struct bpf_func_state *src)
997 {
998 	int err;
999 
1000 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1001 	err = copy_reference_state(dst, src);
1002 	if (err)
1003 		return err;
1004 	return copy_stack_state(dst, src);
1005 }
1006 
1007 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1008 			       const struct bpf_verifier_state *src)
1009 {
1010 	struct bpf_func_state *dst;
1011 	int i, err;
1012 
1013 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1014 					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1015 					    GFP_USER);
1016 	if (!dst_state->jmp_history)
1017 		return -ENOMEM;
1018 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1019 
1020 	/* if dst has more stack frames then src frame, free them */
1021 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1022 		free_func_state(dst_state->frame[i]);
1023 		dst_state->frame[i] = NULL;
1024 	}
1025 	dst_state->speculative = src->speculative;
1026 	dst_state->curframe = src->curframe;
1027 	dst_state->active_spin_lock = src->active_spin_lock;
1028 	dst_state->branches = src->branches;
1029 	dst_state->parent = src->parent;
1030 	dst_state->first_insn_idx = src->first_insn_idx;
1031 	dst_state->last_insn_idx = src->last_insn_idx;
1032 	for (i = 0; i <= src->curframe; i++) {
1033 		dst = dst_state->frame[i];
1034 		if (!dst) {
1035 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1036 			if (!dst)
1037 				return -ENOMEM;
1038 			dst_state->frame[i] = dst;
1039 		}
1040 		err = copy_func_state(dst, src->frame[i]);
1041 		if (err)
1042 			return err;
1043 	}
1044 	return 0;
1045 }
1046 
1047 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1048 {
1049 	while (st) {
1050 		u32 br = --st->branches;
1051 
1052 		/* WARN_ON(br > 1) technically makes sense here,
1053 		 * but see comment in push_stack(), hence:
1054 		 */
1055 		WARN_ONCE((int)br < 0,
1056 			  "BUG update_branch_counts:branches_to_explore=%d\n",
1057 			  br);
1058 		if (br)
1059 			break;
1060 		st = st->parent;
1061 	}
1062 }
1063 
1064 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1065 		     int *insn_idx, bool pop_log)
1066 {
1067 	struct bpf_verifier_state *cur = env->cur_state;
1068 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1069 	int err;
1070 
1071 	if (env->head == NULL)
1072 		return -ENOENT;
1073 
1074 	if (cur) {
1075 		err = copy_verifier_state(cur, &head->st);
1076 		if (err)
1077 			return err;
1078 	}
1079 	if (pop_log)
1080 		bpf_vlog_reset(&env->log, head->log_pos);
1081 	if (insn_idx)
1082 		*insn_idx = head->insn_idx;
1083 	if (prev_insn_idx)
1084 		*prev_insn_idx = head->prev_insn_idx;
1085 	elem = head->next;
1086 	free_verifier_state(&head->st, false);
1087 	kfree(head);
1088 	env->head = elem;
1089 	env->stack_size--;
1090 	return 0;
1091 }
1092 
1093 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1094 					     int insn_idx, int prev_insn_idx,
1095 					     bool speculative)
1096 {
1097 	struct bpf_verifier_state *cur = env->cur_state;
1098 	struct bpf_verifier_stack_elem *elem;
1099 	int err;
1100 
1101 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1102 	if (!elem)
1103 		goto err;
1104 
1105 	elem->insn_idx = insn_idx;
1106 	elem->prev_insn_idx = prev_insn_idx;
1107 	elem->next = env->head;
1108 	elem->log_pos = env->log.len_used;
1109 	env->head = elem;
1110 	env->stack_size++;
1111 	err = copy_verifier_state(&elem->st, cur);
1112 	if (err)
1113 		goto err;
1114 	elem->st.speculative |= speculative;
1115 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1116 		verbose(env, "The sequence of %d jumps is too complex.\n",
1117 			env->stack_size);
1118 		goto err;
1119 	}
1120 	if (elem->st.parent) {
1121 		++elem->st.parent->branches;
1122 		/* WARN_ON(branches > 2) technically makes sense here,
1123 		 * but
1124 		 * 1. speculative states will bump 'branches' for non-branch
1125 		 * instructions
1126 		 * 2. is_state_visited() heuristics may decide not to create
1127 		 * a new state for a sequence of branches and all such current
1128 		 * and cloned states will be pointing to a single parent state
1129 		 * which might have large 'branches' count.
1130 		 */
1131 	}
1132 	return &elem->st;
1133 err:
1134 	free_verifier_state(env->cur_state, true);
1135 	env->cur_state = NULL;
1136 	/* pop all elements and return */
1137 	while (!pop_stack(env, NULL, NULL, false));
1138 	return NULL;
1139 }
1140 
1141 #define CALLER_SAVED_REGS 6
1142 static const int caller_saved[CALLER_SAVED_REGS] = {
1143 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1144 };
1145 
1146 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1147 				struct bpf_reg_state *reg);
1148 
1149 /* This helper doesn't clear reg->id */
1150 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1151 {
1152 	reg->var_off = tnum_const(imm);
1153 	reg->smin_value = (s64)imm;
1154 	reg->smax_value = (s64)imm;
1155 	reg->umin_value = imm;
1156 	reg->umax_value = imm;
1157 
1158 	reg->s32_min_value = (s32)imm;
1159 	reg->s32_max_value = (s32)imm;
1160 	reg->u32_min_value = (u32)imm;
1161 	reg->u32_max_value = (u32)imm;
1162 }
1163 
1164 /* Mark the unknown part of a register (variable offset or scalar value) as
1165  * known to have the value @imm.
1166  */
1167 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1168 {
1169 	/* Clear id, off, and union(map_ptr, range) */
1170 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1171 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1172 	___mark_reg_known(reg, imm);
1173 }
1174 
1175 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1176 {
1177 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1178 	reg->s32_min_value = (s32)imm;
1179 	reg->s32_max_value = (s32)imm;
1180 	reg->u32_min_value = (u32)imm;
1181 	reg->u32_max_value = (u32)imm;
1182 }
1183 
1184 /* Mark the 'variable offset' part of a register as zero.  This should be
1185  * used only on registers holding a pointer type.
1186  */
1187 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1188 {
1189 	__mark_reg_known(reg, 0);
1190 }
1191 
1192 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1193 {
1194 	__mark_reg_known(reg, 0);
1195 	reg->type = SCALAR_VALUE;
1196 }
1197 
1198 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1199 				struct bpf_reg_state *regs, u32 regno)
1200 {
1201 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1202 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1203 		/* Something bad happened, let's kill all regs */
1204 		for (regno = 0; regno < MAX_BPF_REG; regno++)
1205 			__mark_reg_not_init(env, regs + regno);
1206 		return;
1207 	}
1208 	__mark_reg_known_zero(regs + regno);
1209 }
1210 
1211 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1212 {
1213 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1214 		const struct bpf_map *map = reg->map_ptr;
1215 
1216 		if (map->inner_map_meta) {
1217 			reg->type = CONST_PTR_TO_MAP;
1218 			reg->map_ptr = map->inner_map_meta;
1219 			/* transfer reg's id which is unique for every map_lookup_elem
1220 			 * as UID of the inner map.
1221 			 */
1222 			if (map_value_has_timer(map->inner_map_meta))
1223 				reg->map_uid = reg->id;
1224 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1225 			reg->type = PTR_TO_XDP_SOCK;
1226 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1227 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1228 			reg->type = PTR_TO_SOCKET;
1229 		} else {
1230 			reg->type = PTR_TO_MAP_VALUE;
1231 		}
1232 		return;
1233 	}
1234 
1235 	reg->type &= ~PTR_MAYBE_NULL;
1236 }
1237 
1238 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1239 {
1240 	return type_is_pkt_pointer(reg->type);
1241 }
1242 
1243 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1244 {
1245 	return reg_is_pkt_pointer(reg) ||
1246 	       reg->type == PTR_TO_PACKET_END;
1247 }
1248 
1249 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1250 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1251 				    enum bpf_reg_type which)
1252 {
1253 	/* The register can already have a range from prior markings.
1254 	 * This is fine as long as it hasn't been advanced from its
1255 	 * origin.
1256 	 */
1257 	return reg->type == which &&
1258 	       reg->id == 0 &&
1259 	       reg->off == 0 &&
1260 	       tnum_equals_const(reg->var_off, 0);
1261 }
1262 
1263 /* Reset the min/max bounds of a register */
1264 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1265 {
1266 	reg->smin_value = S64_MIN;
1267 	reg->smax_value = S64_MAX;
1268 	reg->umin_value = 0;
1269 	reg->umax_value = U64_MAX;
1270 
1271 	reg->s32_min_value = S32_MIN;
1272 	reg->s32_max_value = S32_MAX;
1273 	reg->u32_min_value = 0;
1274 	reg->u32_max_value = U32_MAX;
1275 }
1276 
1277 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1278 {
1279 	reg->smin_value = S64_MIN;
1280 	reg->smax_value = S64_MAX;
1281 	reg->umin_value = 0;
1282 	reg->umax_value = U64_MAX;
1283 }
1284 
1285 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1286 {
1287 	reg->s32_min_value = S32_MIN;
1288 	reg->s32_max_value = S32_MAX;
1289 	reg->u32_min_value = 0;
1290 	reg->u32_max_value = U32_MAX;
1291 }
1292 
1293 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1294 {
1295 	struct tnum var32_off = tnum_subreg(reg->var_off);
1296 
1297 	/* min signed is max(sign bit) | min(other bits) */
1298 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1299 			var32_off.value | (var32_off.mask & S32_MIN));
1300 	/* max signed is min(sign bit) | max(other bits) */
1301 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1302 			var32_off.value | (var32_off.mask & S32_MAX));
1303 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1304 	reg->u32_max_value = min(reg->u32_max_value,
1305 				 (u32)(var32_off.value | var32_off.mask));
1306 }
1307 
1308 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1309 {
1310 	/* min signed is max(sign bit) | min(other bits) */
1311 	reg->smin_value = max_t(s64, reg->smin_value,
1312 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1313 	/* max signed is min(sign bit) | max(other bits) */
1314 	reg->smax_value = min_t(s64, reg->smax_value,
1315 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1316 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1317 	reg->umax_value = min(reg->umax_value,
1318 			      reg->var_off.value | reg->var_off.mask);
1319 }
1320 
1321 static void __update_reg_bounds(struct bpf_reg_state *reg)
1322 {
1323 	__update_reg32_bounds(reg);
1324 	__update_reg64_bounds(reg);
1325 }
1326 
1327 /* Uses signed min/max values to inform unsigned, and vice-versa */
1328 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1329 {
1330 	/* Learn sign from signed bounds.
1331 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1332 	 * are the same, so combine.  This works even in the negative case, e.g.
1333 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1334 	 */
1335 	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1336 		reg->s32_min_value = reg->u32_min_value =
1337 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1338 		reg->s32_max_value = reg->u32_max_value =
1339 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1340 		return;
1341 	}
1342 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1343 	 * boundary, so we must be careful.
1344 	 */
1345 	if ((s32)reg->u32_max_value >= 0) {
1346 		/* Positive.  We can't learn anything from the smin, but smax
1347 		 * is positive, hence safe.
1348 		 */
1349 		reg->s32_min_value = reg->u32_min_value;
1350 		reg->s32_max_value = reg->u32_max_value =
1351 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1352 	} else if ((s32)reg->u32_min_value < 0) {
1353 		/* Negative.  We can't learn anything from the smax, but smin
1354 		 * is negative, hence safe.
1355 		 */
1356 		reg->s32_min_value = reg->u32_min_value =
1357 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1358 		reg->s32_max_value = reg->u32_max_value;
1359 	}
1360 }
1361 
1362 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1363 {
1364 	/* Learn sign from signed bounds.
1365 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1366 	 * are the same, so combine.  This works even in the negative case, e.g.
1367 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1368 	 */
1369 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
1370 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1371 							  reg->umin_value);
1372 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1373 							  reg->umax_value);
1374 		return;
1375 	}
1376 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1377 	 * boundary, so we must be careful.
1378 	 */
1379 	if ((s64)reg->umax_value >= 0) {
1380 		/* Positive.  We can't learn anything from the smin, but smax
1381 		 * is positive, hence safe.
1382 		 */
1383 		reg->smin_value = reg->umin_value;
1384 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1385 							  reg->umax_value);
1386 	} else if ((s64)reg->umin_value < 0) {
1387 		/* Negative.  We can't learn anything from the smax, but smin
1388 		 * is negative, hence safe.
1389 		 */
1390 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1391 							  reg->umin_value);
1392 		reg->smax_value = reg->umax_value;
1393 	}
1394 }
1395 
1396 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1397 {
1398 	__reg32_deduce_bounds(reg);
1399 	__reg64_deduce_bounds(reg);
1400 }
1401 
1402 /* Attempts to improve var_off based on unsigned min/max information */
1403 static void __reg_bound_offset(struct bpf_reg_state *reg)
1404 {
1405 	struct tnum var64_off = tnum_intersect(reg->var_off,
1406 					       tnum_range(reg->umin_value,
1407 							  reg->umax_value));
1408 	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1409 						tnum_range(reg->u32_min_value,
1410 							   reg->u32_max_value));
1411 
1412 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1413 }
1414 
1415 static bool __reg32_bound_s64(s32 a)
1416 {
1417 	return a >= 0 && a <= S32_MAX;
1418 }
1419 
1420 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1421 {
1422 	reg->umin_value = reg->u32_min_value;
1423 	reg->umax_value = reg->u32_max_value;
1424 
1425 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1426 	 * be positive otherwise set to worse case bounds and refine later
1427 	 * from tnum.
1428 	 */
1429 	if (__reg32_bound_s64(reg->s32_min_value) &&
1430 	    __reg32_bound_s64(reg->s32_max_value)) {
1431 		reg->smin_value = reg->s32_min_value;
1432 		reg->smax_value = reg->s32_max_value;
1433 	} else {
1434 		reg->smin_value = 0;
1435 		reg->smax_value = U32_MAX;
1436 	}
1437 }
1438 
1439 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1440 {
1441 	/* special case when 64-bit register has upper 32-bit register
1442 	 * zeroed. Typically happens after zext or <<32, >>32 sequence
1443 	 * allowing us to use 32-bit bounds directly,
1444 	 */
1445 	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1446 		__reg_assign_32_into_64(reg);
1447 	} else {
1448 		/* Otherwise the best we can do is push lower 32bit known and
1449 		 * unknown bits into register (var_off set from jmp logic)
1450 		 * then learn as much as possible from the 64-bit tnum
1451 		 * known and unknown bits. The previous smin/smax bounds are
1452 		 * invalid here because of jmp32 compare so mark them unknown
1453 		 * so they do not impact tnum bounds calculation.
1454 		 */
1455 		__mark_reg64_unbounded(reg);
1456 		__update_reg_bounds(reg);
1457 	}
1458 
1459 	/* Intersecting with the old var_off might have improved our bounds
1460 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1461 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1462 	 */
1463 	__reg_deduce_bounds(reg);
1464 	__reg_bound_offset(reg);
1465 	__update_reg_bounds(reg);
1466 }
1467 
1468 static bool __reg64_bound_s32(s64 a)
1469 {
1470 	return a >= S32_MIN && a <= S32_MAX;
1471 }
1472 
1473 static bool __reg64_bound_u32(u64 a)
1474 {
1475 	return a >= U32_MIN && a <= U32_MAX;
1476 }
1477 
1478 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1479 {
1480 	__mark_reg32_unbounded(reg);
1481 
1482 	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1483 		reg->s32_min_value = (s32)reg->smin_value;
1484 		reg->s32_max_value = (s32)reg->smax_value;
1485 	}
1486 	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1487 		reg->u32_min_value = (u32)reg->umin_value;
1488 		reg->u32_max_value = (u32)reg->umax_value;
1489 	}
1490 
1491 	/* Intersecting with the old var_off might have improved our bounds
1492 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1493 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1494 	 */
1495 	__reg_deduce_bounds(reg);
1496 	__reg_bound_offset(reg);
1497 	__update_reg_bounds(reg);
1498 }
1499 
1500 /* Mark a register as having a completely unknown (scalar) value. */
1501 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1502 			       struct bpf_reg_state *reg)
1503 {
1504 	/*
1505 	 * Clear type, id, off, and union(map_ptr, range) and
1506 	 * padding between 'type' and union
1507 	 */
1508 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1509 	reg->type = SCALAR_VALUE;
1510 	reg->var_off = tnum_unknown;
1511 	reg->frameno = 0;
1512 	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1513 	__mark_reg_unbounded(reg);
1514 }
1515 
1516 static void mark_reg_unknown(struct bpf_verifier_env *env,
1517 			     struct bpf_reg_state *regs, u32 regno)
1518 {
1519 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1520 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1521 		/* Something bad happened, let's kill all regs except FP */
1522 		for (regno = 0; regno < BPF_REG_FP; regno++)
1523 			__mark_reg_not_init(env, regs + regno);
1524 		return;
1525 	}
1526 	__mark_reg_unknown(env, regs + regno);
1527 }
1528 
1529 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1530 				struct bpf_reg_state *reg)
1531 {
1532 	__mark_reg_unknown(env, reg);
1533 	reg->type = NOT_INIT;
1534 }
1535 
1536 static void mark_reg_not_init(struct bpf_verifier_env *env,
1537 			      struct bpf_reg_state *regs, u32 regno)
1538 {
1539 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1540 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1541 		/* Something bad happened, let's kill all regs except FP */
1542 		for (regno = 0; regno < BPF_REG_FP; regno++)
1543 			__mark_reg_not_init(env, regs + regno);
1544 		return;
1545 	}
1546 	__mark_reg_not_init(env, regs + regno);
1547 }
1548 
1549 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1550 			    struct bpf_reg_state *regs, u32 regno,
1551 			    enum bpf_reg_type reg_type,
1552 			    struct btf *btf, u32 btf_id,
1553 			    enum bpf_type_flag flag)
1554 {
1555 	if (reg_type == SCALAR_VALUE) {
1556 		mark_reg_unknown(env, regs, regno);
1557 		return;
1558 	}
1559 	mark_reg_known_zero(env, regs, regno);
1560 	regs[regno].type = PTR_TO_BTF_ID | flag;
1561 	regs[regno].btf = btf;
1562 	regs[regno].btf_id = btf_id;
1563 }
1564 
1565 #define DEF_NOT_SUBREG	(0)
1566 static void init_reg_state(struct bpf_verifier_env *env,
1567 			   struct bpf_func_state *state)
1568 {
1569 	struct bpf_reg_state *regs = state->regs;
1570 	int i;
1571 
1572 	for (i = 0; i < MAX_BPF_REG; i++) {
1573 		mark_reg_not_init(env, regs, i);
1574 		regs[i].live = REG_LIVE_NONE;
1575 		regs[i].parent = NULL;
1576 		regs[i].subreg_def = DEF_NOT_SUBREG;
1577 	}
1578 
1579 	/* frame pointer */
1580 	regs[BPF_REG_FP].type = PTR_TO_STACK;
1581 	mark_reg_known_zero(env, regs, BPF_REG_FP);
1582 	regs[BPF_REG_FP].frameno = state->frameno;
1583 }
1584 
1585 #define BPF_MAIN_FUNC (-1)
1586 static void init_func_state(struct bpf_verifier_env *env,
1587 			    struct bpf_func_state *state,
1588 			    int callsite, int frameno, int subprogno)
1589 {
1590 	state->callsite = callsite;
1591 	state->frameno = frameno;
1592 	state->subprogno = subprogno;
1593 	init_reg_state(env, state);
1594 	mark_verifier_state_scratched(env);
1595 }
1596 
1597 /* Similar to push_stack(), but for async callbacks */
1598 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1599 						int insn_idx, int prev_insn_idx,
1600 						int subprog)
1601 {
1602 	struct bpf_verifier_stack_elem *elem;
1603 	struct bpf_func_state *frame;
1604 
1605 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1606 	if (!elem)
1607 		goto err;
1608 
1609 	elem->insn_idx = insn_idx;
1610 	elem->prev_insn_idx = prev_insn_idx;
1611 	elem->next = env->head;
1612 	elem->log_pos = env->log.len_used;
1613 	env->head = elem;
1614 	env->stack_size++;
1615 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1616 		verbose(env,
1617 			"The sequence of %d jumps is too complex for async cb.\n",
1618 			env->stack_size);
1619 		goto err;
1620 	}
1621 	/* Unlike push_stack() do not copy_verifier_state().
1622 	 * The caller state doesn't matter.
1623 	 * This is async callback. It starts in a fresh stack.
1624 	 * Initialize it similar to do_check_common().
1625 	 */
1626 	elem->st.branches = 1;
1627 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1628 	if (!frame)
1629 		goto err;
1630 	init_func_state(env, frame,
1631 			BPF_MAIN_FUNC /* callsite */,
1632 			0 /* frameno within this callchain */,
1633 			subprog /* subprog number within this prog */);
1634 	elem->st.frame[0] = frame;
1635 	return &elem->st;
1636 err:
1637 	free_verifier_state(env->cur_state, true);
1638 	env->cur_state = NULL;
1639 	/* pop all elements and return */
1640 	while (!pop_stack(env, NULL, NULL, false));
1641 	return NULL;
1642 }
1643 
1644 
1645 enum reg_arg_type {
1646 	SRC_OP,		/* register is used as source operand */
1647 	DST_OP,		/* register is used as destination operand */
1648 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1649 };
1650 
1651 static int cmp_subprogs(const void *a, const void *b)
1652 {
1653 	return ((struct bpf_subprog_info *)a)->start -
1654 	       ((struct bpf_subprog_info *)b)->start;
1655 }
1656 
1657 static int find_subprog(struct bpf_verifier_env *env, int off)
1658 {
1659 	struct bpf_subprog_info *p;
1660 
1661 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1662 		    sizeof(env->subprog_info[0]), cmp_subprogs);
1663 	if (!p)
1664 		return -ENOENT;
1665 	return p - env->subprog_info;
1666 
1667 }
1668 
1669 static int add_subprog(struct bpf_verifier_env *env, int off)
1670 {
1671 	int insn_cnt = env->prog->len;
1672 	int ret;
1673 
1674 	if (off >= insn_cnt || off < 0) {
1675 		verbose(env, "call to invalid destination\n");
1676 		return -EINVAL;
1677 	}
1678 	ret = find_subprog(env, off);
1679 	if (ret >= 0)
1680 		return ret;
1681 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1682 		verbose(env, "too many subprograms\n");
1683 		return -E2BIG;
1684 	}
1685 	/* determine subprog starts. The end is one before the next starts */
1686 	env->subprog_info[env->subprog_cnt++].start = off;
1687 	sort(env->subprog_info, env->subprog_cnt,
1688 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1689 	return env->subprog_cnt - 1;
1690 }
1691 
1692 #define MAX_KFUNC_DESCS 256
1693 #define MAX_KFUNC_BTFS	256
1694 
1695 struct bpf_kfunc_desc {
1696 	struct btf_func_model func_model;
1697 	u32 func_id;
1698 	s32 imm;
1699 	u16 offset;
1700 };
1701 
1702 struct bpf_kfunc_btf {
1703 	struct btf *btf;
1704 	struct module *module;
1705 	u16 offset;
1706 };
1707 
1708 struct bpf_kfunc_desc_tab {
1709 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1710 	u32 nr_descs;
1711 };
1712 
1713 struct bpf_kfunc_btf_tab {
1714 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1715 	u32 nr_descs;
1716 };
1717 
1718 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1719 {
1720 	const struct bpf_kfunc_desc *d0 = a;
1721 	const struct bpf_kfunc_desc *d1 = b;
1722 
1723 	/* func_id is not greater than BTF_MAX_TYPE */
1724 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1725 }
1726 
1727 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1728 {
1729 	const struct bpf_kfunc_btf *d0 = a;
1730 	const struct bpf_kfunc_btf *d1 = b;
1731 
1732 	return d0->offset - d1->offset;
1733 }
1734 
1735 static const struct bpf_kfunc_desc *
1736 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1737 {
1738 	struct bpf_kfunc_desc desc = {
1739 		.func_id = func_id,
1740 		.offset = offset,
1741 	};
1742 	struct bpf_kfunc_desc_tab *tab;
1743 
1744 	tab = prog->aux->kfunc_tab;
1745 	return bsearch(&desc, tab->descs, tab->nr_descs,
1746 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1747 }
1748 
1749 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1750 					 s16 offset)
1751 {
1752 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
1753 	struct bpf_kfunc_btf_tab *tab;
1754 	struct bpf_kfunc_btf *b;
1755 	struct module *mod;
1756 	struct btf *btf;
1757 	int btf_fd;
1758 
1759 	tab = env->prog->aux->kfunc_btf_tab;
1760 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1761 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1762 	if (!b) {
1763 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
1764 			verbose(env, "too many different module BTFs\n");
1765 			return ERR_PTR(-E2BIG);
1766 		}
1767 
1768 		if (bpfptr_is_null(env->fd_array)) {
1769 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1770 			return ERR_PTR(-EPROTO);
1771 		}
1772 
1773 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1774 					    offset * sizeof(btf_fd),
1775 					    sizeof(btf_fd)))
1776 			return ERR_PTR(-EFAULT);
1777 
1778 		btf = btf_get_by_fd(btf_fd);
1779 		if (IS_ERR(btf)) {
1780 			verbose(env, "invalid module BTF fd specified\n");
1781 			return btf;
1782 		}
1783 
1784 		if (!btf_is_module(btf)) {
1785 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
1786 			btf_put(btf);
1787 			return ERR_PTR(-EINVAL);
1788 		}
1789 
1790 		mod = btf_try_get_module(btf);
1791 		if (!mod) {
1792 			btf_put(btf);
1793 			return ERR_PTR(-ENXIO);
1794 		}
1795 
1796 		b = &tab->descs[tab->nr_descs++];
1797 		b->btf = btf;
1798 		b->module = mod;
1799 		b->offset = offset;
1800 
1801 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1802 		     kfunc_btf_cmp_by_off, NULL);
1803 	}
1804 	return b->btf;
1805 }
1806 
1807 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1808 {
1809 	if (!tab)
1810 		return;
1811 
1812 	while (tab->nr_descs--) {
1813 		module_put(tab->descs[tab->nr_descs].module);
1814 		btf_put(tab->descs[tab->nr_descs].btf);
1815 	}
1816 	kfree(tab);
1817 }
1818 
1819 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
1820 				       u32 func_id, s16 offset)
1821 {
1822 	if (offset) {
1823 		if (offset < 0) {
1824 			/* In the future, this can be allowed to increase limit
1825 			 * of fd index into fd_array, interpreted as u16.
1826 			 */
1827 			verbose(env, "negative offset disallowed for kernel module function call\n");
1828 			return ERR_PTR(-EINVAL);
1829 		}
1830 
1831 		return __find_kfunc_desc_btf(env, offset);
1832 	}
1833 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
1834 }
1835 
1836 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1837 {
1838 	const struct btf_type *func, *func_proto;
1839 	struct bpf_kfunc_btf_tab *btf_tab;
1840 	struct bpf_kfunc_desc_tab *tab;
1841 	struct bpf_prog_aux *prog_aux;
1842 	struct bpf_kfunc_desc *desc;
1843 	const char *func_name;
1844 	struct btf *desc_btf;
1845 	unsigned long call_imm;
1846 	unsigned long addr;
1847 	int err;
1848 
1849 	prog_aux = env->prog->aux;
1850 	tab = prog_aux->kfunc_tab;
1851 	btf_tab = prog_aux->kfunc_btf_tab;
1852 	if (!tab) {
1853 		if (!btf_vmlinux) {
1854 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1855 			return -ENOTSUPP;
1856 		}
1857 
1858 		if (!env->prog->jit_requested) {
1859 			verbose(env, "JIT is required for calling kernel function\n");
1860 			return -ENOTSUPP;
1861 		}
1862 
1863 		if (!bpf_jit_supports_kfunc_call()) {
1864 			verbose(env, "JIT does not support calling kernel function\n");
1865 			return -ENOTSUPP;
1866 		}
1867 
1868 		if (!env->prog->gpl_compatible) {
1869 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1870 			return -EINVAL;
1871 		}
1872 
1873 		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1874 		if (!tab)
1875 			return -ENOMEM;
1876 		prog_aux->kfunc_tab = tab;
1877 	}
1878 
1879 	/* func_id == 0 is always invalid, but instead of returning an error, be
1880 	 * conservative and wait until the code elimination pass before returning
1881 	 * error, so that invalid calls that get pruned out can be in BPF programs
1882 	 * loaded from userspace.  It is also required that offset be untouched
1883 	 * for such calls.
1884 	 */
1885 	if (!func_id && !offset)
1886 		return 0;
1887 
1888 	if (!btf_tab && offset) {
1889 		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1890 		if (!btf_tab)
1891 			return -ENOMEM;
1892 		prog_aux->kfunc_btf_tab = btf_tab;
1893 	}
1894 
1895 	desc_btf = find_kfunc_desc_btf(env, func_id, offset);
1896 	if (IS_ERR(desc_btf)) {
1897 		verbose(env, "failed to find BTF for kernel function\n");
1898 		return PTR_ERR(desc_btf);
1899 	}
1900 
1901 	if (find_kfunc_desc(env->prog, func_id, offset))
1902 		return 0;
1903 
1904 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
1905 		verbose(env, "too many different kernel function calls\n");
1906 		return -E2BIG;
1907 	}
1908 
1909 	func = btf_type_by_id(desc_btf, func_id);
1910 	if (!func || !btf_type_is_func(func)) {
1911 		verbose(env, "kernel btf_id %u is not a function\n",
1912 			func_id);
1913 		return -EINVAL;
1914 	}
1915 	func_proto = btf_type_by_id(desc_btf, func->type);
1916 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1917 		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1918 			func_id);
1919 		return -EINVAL;
1920 	}
1921 
1922 	func_name = btf_name_by_offset(desc_btf, func->name_off);
1923 	addr = kallsyms_lookup_name(func_name);
1924 	if (!addr) {
1925 		verbose(env, "cannot find address for kernel function %s\n",
1926 			func_name);
1927 		return -EINVAL;
1928 	}
1929 
1930 	call_imm = BPF_CALL_IMM(addr);
1931 	/* Check whether or not the relative offset overflows desc->imm */
1932 	if ((unsigned long)(s32)call_imm != call_imm) {
1933 		verbose(env, "address of kernel function %s is out of range\n",
1934 			func_name);
1935 		return -EINVAL;
1936 	}
1937 
1938 	desc = &tab->descs[tab->nr_descs++];
1939 	desc->func_id = func_id;
1940 	desc->imm = call_imm;
1941 	desc->offset = offset;
1942 	err = btf_distill_func_proto(&env->log, desc_btf,
1943 				     func_proto, func_name,
1944 				     &desc->func_model);
1945 	if (!err)
1946 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1947 		     kfunc_desc_cmp_by_id_off, NULL);
1948 	return err;
1949 }
1950 
1951 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1952 {
1953 	const struct bpf_kfunc_desc *d0 = a;
1954 	const struct bpf_kfunc_desc *d1 = b;
1955 
1956 	if (d0->imm > d1->imm)
1957 		return 1;
1958 	else if (d0->imm < d1->imm)
1959 		return -1;
1960 	return 0;
1961 }
1962 
1963 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1964 {
1965 	struct bpf_kfunc_desc_tab *tab;
1966 
1967 	tab = prog->aux->kfunc_tab;
1968 	if (!tab)
1969 		return;
1970 
1971 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1972 	     kfunc_desc_cmp_by_imm, NULL);
1973 }
1974 
1975 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1976 {
1977 	return !!prog->aux->kfunc_tab;
1978 }
1979 
1980 const struct btf_func_model *
1981 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1982 			 const struct bpf_insn *insn)
1983 {
1984 	const struct bpf_kfunc_desc desc = {
1985 		.imm = insn->imm,
1986 	};
1987 	const struct bpf_kfunc_desc *res;
1988 	struct bpf_kfunc_desc_tab *tab;
1989 
1990 	tab = prog->aux->kfunc_tab;
1991 	res = bsearch(&desc, tab->descs, tab->nr_descs,
1992 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1993 
1994 	return res ? &res->func_model : NULL;
1995 }
1996 
1997 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
1998 {
1999 	struct bpf_subprog_info *subprog = env->subprog_info;
2000 	struct bpf_insn *insn = env->prog->insnsi;
2001 	int i, ret, insn_cnt = env->prog->len;
2002 
2003 	/* Add entry function. */
2004 	ret = add_subprog(env, 0);
2005 	if (ret)
2006 		return ret;
2007 
2008 	for (i = 0; i < insn_cnt; i++, insn++) {
2009 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2010 		    !bpf_pseudo_kfunc_call(insn))
2011 			continue;
2012 
2013 		if (!env->bpf_capable) {
2014 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2015 			return -EPERM;
2016 		}
2017 
2018 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2019 			ret = add_subprog(env, i + insn->imm + 1);
2020 		else
2021 			ret = add_kfunc_call(env, insn->imm, insn->off);
2022 
2023 		if (ret < 0)
2024 			return ret;
2025 	}
2026 
2027 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2028 	 * logic. 'subprog_cnt' should not be increased.
2029 	 */
2030 	subprog[env->subprog_cnt].start = insn_cnt;
2031 
2032 	if (env->log.level & BPF_LOG_LEVEL2)
2033 		for (i = 0; i < env->subprog_cnt; i++)
2034 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2035 
2036 	return 0;
2037 }
2038 
2039 static int check_subprogs(struct bpf_verifier_env *env)
2040 {
2041 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2042 	struct bpf_subprog_info *subprog = env->subprog_info;
2043 	struct bpf_insn *insn = env->prog->insnsi;
2044 	int insn_cnt = env->prog->len;
2045 
2046 	/* now check that all jumps are within the same subprog */
2047 	subprog_start = subprog[cur_subprog].start;
2048 	subprog_end = subprog[cur_subprog + 1].start;
2049 	for (i = 0; i < insn_cnt; i++) {
2050 		u8 code = insn[i].code;
2051 
2052 		if (code == (BPF_JMP | BPF_CALL) &&
2053 		    insn[i].imm == BPF_FUNC_tail_call &&
2054 		    insn[i].src_reg != BPF_PSEUDO_CALL)
2055 			subprog[cur_subprog].has_tail_call = true;
2056 		if (BPF_CLASS(code) == BPF_LD &&
2057 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2058 			subprog[cur_subprog].has_ld_abs = true;
2059 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2060 			goto next;
2061 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2062 			goto next;
2063 		off = i + insn[i].off + 1;
2064 		if (off < subprog_start || off >= subprog_end) {
2065 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2066 			return -EINVAL;
2067 		}
2068 next:
2069 		if (i == subprog_end - 1) {
2070 			/* to avoid fall-through from one subprog into another
2071 			 * the last insn of the subprog should be either exit
2072 			 * or unconditional jump back
2073 			 */
2074 			if (code != (BPF_JMP | BPF_EXIT) &&
2075 			    code != (BPF_JMP | BPF_JA)) {
2076 				verbose(env, "last insn is not an exit or jmp\n");
2077 				return -EINVAL;
2078 			}
2079 			subprog_start = subprog_end;
2080 			cur_subprog++;
2081 			if (cur_subprog < env->subprog_cnt)
2082 				subprog_end = subprog[cur_subprog + 1].start;
2083 		}
2084 	}
2085 	return 0;
2086 }
2087 
2088 /* Parentage chain of this register (or stack slot) should take care of all
2089  * issues like callee-saved registers, stack slot allocation time, etc.
2090  */
2091 static int mark_reg_read(struct bpf_verifier_env *env,
2092 			 const struct bpf_reg_state *state,
2093 			 struct bpf_reg_state *parent, u8 flag)
2094 {
2095 	bool writes = parent == state->parent; /* Observe write marks */
2096 	int cnt = 0;
2097 
2098 	while (parent) {
2099 		/* if read wasn't screened by an earlier write ... */
2100 		if (writes && state->live & REG_LIVE_WRITTEN)
2101 			break;
2102 		if (parent->live & REG_LIVE_DONE) {
2103 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2104 				reg_type_str(env, parent->type),
2105 				parent->var_off.value, parent->off);
2106 			return -EFAULT;
2107 		}
2108 		/* The first condition is more likely to be true than the
2109 		 * second, checked it first.
2110 		 */
2111 		if ((parent->live & REG_LIVE_READ) == flag ||
2112 		    parent->live & REG_LIVE_READ64)
2113 			/* The parentage chain never changes and
2114 			 * this parent was already marked as LIVE_READ.
2115 			 * There is no need to keep walking the chain again and
2116 			 * keep re-marking all parents as LIVE_READ.
2117 			 * This case happens when the same register is read
2118 			 * multiple times without writes into it in-between.
2119 			 * Also, if parent has the stronger REG_LIVE_READ64 set,
2120 			 * then no need to set the weak REG_LIVE_READ32.
2121 			 */
2122 			break;
2123 		/* ... then we depend on parent's value */
2124 		parent->live |= flag;
2125 		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2126 		if (flag == REG_LIVE_READ64)
2127 			parent->live &= ~REG_LIVE_READ32;
2128 		state = parent;
2129 		parent = state->parent;
2130 		writes = true;
2131 		cnt++;
2132 	}
2133 
2134 	if (env->longest_mark_read_walk < cnt)
2135 		env->longest_mark_read_walk = cnt;
2136 	return 0;
2137 }
2138 
2139 /* This function is supposed to be used by the following 32-bit optimization
2140  * code only. It returns TRUE if the source or destination register operates
2141  * on 64-bit, otherwise return FALSE.
2142  */
2143 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2144 		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2145 {
2146 	u8 code, class, op;
2147 
2148 	code = insn->code;
2149 	class = BPF_CLASS(code);
2150 	op = BPF_OP(code);
2151 	if (class == BPF_JMP) {
2152 		/* BPF_EXIT for "main" will reach here. Return TRUE
2153 		 * conservatively.
2154 		 */
2155 		if (op == BPF_EXIT)
2156 			return true;
2157 		if (op == BPF_CALL) {
2158 			/* BPF to BPF call will reach here because of marking
2159 			 * caller saved clobber with DST_OP_NO_MARK for which we
2160 			 * don't care the register def because they are anyway
2161 			 * marked as NOT_INIT already.
2162 			 */
2163 			if (insn->src_reg == BPF_PSEUDO_CALL)
2164 				return false;
2165 			/* Helper call will reach here because of arg type
2166 			 * check, conservatively return TRUE.
2167 			 */
2168 			if (t == SRC_OP)
2169 				return true;
2170 
2171 			return false;
2172 		}
2173 	}
2174 
2175 	if (class == BPF_ALU64 || class == BPF_JMP ||
2176 	    /* BPF_END always use BPF_ALU class. */
2177 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2178 		return true;
2179 
2180 	if (class == BPF_ALU || class == BPF_JMP32)
2181 		return false;
2182 
2183 	if (class == BPF_LDX) {
2184 		if (t != SRC_OP)
2185 			return BPF_SIZE(code) == BPF_DW;
2186 		/* LDX source must be ptr. */
2187 		return true;
2188 	}
2189 
2190 	if (class == BPF_STX) {
2191 		/* BPF_STX (including atomic variants) has multiple source
2192 		 * operands, one of which is a ptr. Check whether the caller is
2193 		 * asking about it.
2194 		 */
2195 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
2196 			return true;
2197 		return BPF_SIZE(code) == BPF_DW;
2198 	}
2199 
2200 	if (class == BPF_LD) {
2201 		u8 mode = BPF_MODE(code);
2202 
2203 		/* LD_IMM64 */
2204 		if (mode == BPF_IMM)
2205 			return true;
2206 
2207 		/* Both LD_IND and LD_ABS return 32-bit data. */
2208 		if (t != SRC_OP)
2209 			return  false;
2210 
2211 		/* Implicit ctx ptr. */
2212 		if (regno == BPF_REG_6)
2213 			return true;
2214 
2215 		/* Explicit source could be any width. */
2216 		return true;
2217 	}
2218 
2219 	if (class == BPF_ST)
2220 		/* The only source register for BPF_ST is a ptr. */
2221 		return true;
2222 
2223 	/* Conservatively return true at default. */
2224 	return true;
2225 }
2226 
2227 /* Return the regno defined by the insn, or -1. */
2228 static int insn_def_regno(const struct bpf_insn *insn)
2229 {
2230 	switch (BPF_CLASS(insn->code)) {
2231 	case BPF_JMP:
2232 	case BPF_JMP32:
2233 	case BPF_ST:
2234 		return -1;
2235 	case BPF_STX:
2236 		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2237 		    (insn->imm & BPF_FETCH)) {
2238 			if (insn->imm == BPF_CMPXCHG)
2239 				return BPF_REG_0;
2240 			else
2241 				return insn->src_reg;
2242 		} else {
2243 			return -1;
2244 		}
2245 	default:
2246 		return insn->dst_reg;
2247 	}
2248 }
2249 
2250 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2251 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2252 {
2253 	int dst_reg = insn_def_regno(insn);
2254 
2255 	if (dst_reg == -1)
2256 		return false;
2257 
2258 	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2259 }
2260 
2261 static void mark_insn_zext(struct bpf_verifier_env *env,
2262 			   struct bpf_reg_state *reg)
2263 {
2264 	s32 def_idx = reg->subreg_def;
2265 
2266 	if (def_idx == DEF_NOT_SUBREG)
2267 		return;
2268 
2269 	env->insn_aux_data[def_idx - 1].zext_dst = true;
2270 	/* The dst will be zero extended, so won't be sub-register anymore. */
2271 	reg->subreg_def = DEF_NOT_SUBREG;
2272 }
2273 
2274 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2275 			 enum reg_arg_type t)
2276 {
2277 	struct bpf_verifier_state *vstate = env->cur_state;
2278 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2279 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2280 	struct bpf_reg_state *reg, *regs = state->regs;
2281 	bool rw64;
2282 
2283 	if (regno >= MAX_BPF_REG) {
2284 		verbose(env, "R%d is invalid\n", regno);
2285 		return -EINVAL;
2286 	}
2287 
2288 	mark_reg_scratched(env, regno);
2289 
2290 	reg = &regs[regno];
2291 	rw64 = is_reg64(env, insn, regno, reg, t);
2292 	if (t == SRC_OP) {
2293 		/* check whether register used as source operand can be read */
2294 		if (reg->type == NOT_INIT) {
2295 			verbose(env, "R%d !read_ok\n", regno);
2296 			return -EACCES;
2297 		}
2298 		/* We don't need to worry about FP liveness because it's read-only */
2299 		if (regno == BPF_REG_FP)
2300 			return 0;
2301 
2302 		if (rw64)
2303 			mark_insn_zext(env, reg);
2304 
2305 		return mark_reg_read(env, reg, reg->parent,
2306 				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2307 	} else {
2308 		/* check whether register used as dest operand can be written to */
2309 		if (regno == BPF_REG_FP) {
2310 			verbose(env, "frame pointer is read only\n");
2311 			return -EACCES;
2312 		}
2313 		reg->live |= REG_LIVE_WRITTEN;
2314 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2315 		if (t == DST_OP)
2316 			mark_reg_unknown(env, regs, regno);
2317 	}
2318 	return 0;
2319 }
2320 
2321 /* for any branch, call, exit record the history of jmps in the given state */
2322 static int push_jmp_history(struct bpf_verifier_env *env,
2323 			    struct bpf_verifier_state *cur)
2324 {
2325 	u32 cnt = cur->jmp_history_cnt;
2326 	struct bpf_idx_pair *p;
2327 
2328 	cnt++;
2329 	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2330 	if (!p)
2331 		return -ENOMEM;
2332 	p[cnt - 1].idx = env->insn_idx;
2333 	p[cnt - 1].prev_idx = env->prev_insn_idx;
2334 	cur->jmp_history = p;
2335 	cur->jmp_history_cnt = cnt;
2336 	return 0;
2337 }
2338 
2339 /* Backtrack one insn at a time. If idx is not at the top of recorded
2340  * history then previous instruction came from straight line execution.
2341  */
2342 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2343 			     u32 *history)
2344 {
2345 	u32 cnt = *history;
2346 
2347 	if (cnt && st->jmp_history[cnt - 1].idx == i) {
2348 		i = st->jmp_history[cnt - 1].prev_idx;
2349 		(*history)--;
2350 	} else {
2351 		i--;
2352 	}
2353 	return i;
2354 }
2355 
2356 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2357 {
2358 	const struct btf_type *func;
2359 	struct btf *desc_btf;
2360 
2361 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2362 		return NULL;
2363 
2364 	desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
2365 	if (IS_ERR(desc_btf))
2366 		return "<error>";
2367 
2368 	func = btf_type_by_id(desc_btf, insn->imm);
2369 	return btf_name_by_offset(desc_btf, func->name_off);
2370 }
2371 
2372 /* For given verifier state backtrack_insn() is called from the last insn to
2373  * the first insn. Its purpose is to compute a bitmask of registers and
2374  * stack slots that needs precision in the parent verifier state.
2375  */
2376 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2377 			  u32 *reg_mask, u64 *stack_mask)
2378 {
2379 	const struct bpf_insn_cbs cbs = {
2380 		.cb_call	= disasm_kfunc_name,
2381 		.cb_print	= verbose,
2382 		.private_data	= env,
2383 	};
2384 	struct bpf_insn *insn = env->prog->insnsi + idx;
2385 	u8 class = BPF_CLASS(insn->code);
2386 	u8 opcode = BPF_OP(insn->code);
2387 	u8 mode = BPF_MODE(insn->code);
2388 	u32 dreg = 1u << insn->dst_reg;
2389 	u32 sreg = 1u << insn->src_reg;
2390 	u32 spi;
2391 
2392 	if (insn->code == 0)
2393 		return 0;
2394 	if (env->log.level & BPF_LOG_LEVEL2) {
2395 		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2396 		verbose(env, "%d: ", idx);
2397 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2398 	}
2399 
2400 	if (class == BPF_ALU || class == BPF_ALU64) {
2401 		if (!(*reg_mask & dreg))
2402 			return 0;
2403 		if (opcode == BPF_MOV) {
2404 			if (BPF_SRC(insn->code) == BPF_X) {
2405 				/* dreg = sreg
2406 				 * dreg needs precision after this insn
2407 				 * sreg needs precision before this insn
2408 				 */
2409 				*reg_mask &= ~dreg;
2410 				*reg_mask |= sreg;
2411 			} else {
2412 				/* dreg = K
2413 				 * dreg needs precision after this insn.
2414 				 * Corresponding register is already marked
2415 				 * as precise=true in this verifier state.
2416 				 * No further markings in parent are necessary
2417 				 */
2418 				*reg_mask &= ~dreg;
2419 			}
2420 		} else {
2421 			if (BPF_SRC(insn->code) == BPF_X) {
2422 				/* dreg += sreg
2423 				 * both dreg and sreg need precision
2424 				 * before this insn
2425 				 */
2426 				*reg_mask |= sreg;
2427 			} /* else dreg += K
2428 			   * dreg still needs precision before this insn
2429 			   */
2430 		}
2431 	} else if (class == BPF_LDX) {
2432 		if (!(*reg_mask & dreg))
2433 			return 0;
2434 		*reg_mask &= ~dreg;
2435 
2436 		/* scalars can only be spilled into stack w/o losing precision.
2437 		 * Load from any other memory can be zero extended.
2438 		 * The desire to keep that precision is already indicated
2439 		 * by 'precise' mark in corresponding register of this state.
2440 		 * No further tracking necessary.
2441 		 */
2442 		if (insn->src_reg != BPF_REG_FP)
2443 			return 0;
2444 
2445 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
2446 		 * that [fp - off] slot contains scalar that needs to be
2447 		 * tracked with precision
2448 		 */
2449 		spi = (-insn->off - 1) / BPF_REG_SIZE;
2450 		if (spi >= 64) {
2451 			verbose(env, "BUG spi %d\n", spi);
2452 			WARN_ONCE(1, "verifier backtracking bug");
2453 			return -EFAULT;
2454 		}
2455 		*stack_mask |= 1ull << spi;
2456 	} else if (class == BPF_STX || class == BPF_ST) {
2457 		if (*reg_mask & dreg)
2458 			/* stx & st shouldn't be using _scalar_ dst_reg
2459 			 * to access memory. It means backtracking
2460 			 * encountered a case of pointer subtraction.
2461 			 */
2462 			return -ENOTSUPP;
2463 		/* scalars can only be spilled into stack */
2464 		if (insn->dst_reg != BPF_REG_FP)
2465 			return 0;
2466 		spi = (-insn->off - 1) / BPF_REG_SIZE;
2467 		if (spi >= 64) {
2468 			verbose(env, "BUG spi %d\n", spi);
2469 			WARN_ONCE(1, "verifier backtracking bug");
2470 			return -EFAULT;
2471 		}
2472 		if (!(*stack_mask & (1ull << spi)))
2473 			return 0;
2474 		*stack_mask &= ~(1ull << spi);
2475 		if (class == BPF_STX)
2476 			*reg_mask |= sreg;
2477 	} else if (class == BPF_JMP || class == BPF_JMP32) {
2478 		if (opcode == BPF_CALL) {
2479 			if (insn->src_reg == BPF_PSEUDO_CALL)
2480 				return -ENOTSUPP;
2481 			/* regular helper call sets R0 */
2482 			*reg_mask &= ~1;
2483 			if (*reg_mask & 0x3f) {
2484 				/* if backtracing was looking for registers R1-R5
2485 				 * they should have been found already.
2486 				 */
2487 				verbose(env, "BUG regs %x\n", *reg_mask);
2488 				WARN_ONCE(1, "verifier backtracking bug");
2489 				return -EFAULT;
2490 			}
2491 		} else if (opcode == BPF_EXIT) {
2492 			return -ENOTSUPP;
2493 		}
2494 	} else if (class == BPF_LD) {
2495 		if (!(*reg_mask & dreg))
2496 			return 0;
2497 		*reg_mask &= ~dreg;
2498 		/* It's ld_imm64 or ld_abs or ld_ind.
2499 		 * For ld_imm64 no further tracking of precision
2500 		 * into parent is necessary
2501 		 */
2502 		if (mode == BPF_IND || mode == BPF_ABS)
2503 			/* to be analyzed */
2504 			return -ENOTSUPP;
2505 	}
2506 	return 0;
2507 }
2508 
2509 /* the scalar precision tracking algorithm:
2510  * . at the start all registers have precise=false.
2511  * . scalar ranges are tracked as normal through alu and jmp insns.
2512  * . once precise value of the scalar register is used in:
2513  *   .  ptr + scalar alu
2514  *   . if (scalar cond K|scalar)
2515  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2516  *   backtrack through the verifier states and mark all registers and
2517  *   stack slots with spilled constants that these scalar regisers
2518  *   should be precise.
2519  * . during state pruning two registers (or spilled stack slots)
2520  *   are equivalent if both are not precise.
2521  *
2522  * Note the verifier cannot simply walk register parentage chain,
2523  * since many different registers and stack slots could have been
2524  * used to compute single precise scalar.
2525  *
2526  * The approach of starting with precise=true for all registers and then
2527  * backtrack to mark a register as not precise when the verifier detects
2528  * that program doesn't care about specific value (e.g., when helper
2529  * takes register as ARG_ANYTHING parameter) is not safe.
2530  *
2531  * It's ok to walk single parentage chain of the verifier states.
2532  * It's possible that this backtracking will go all the way till 1st insn.
2533  * All other branches will be explored for needing precision later.
2534  *
2535  * The backtracking needs to deal with cases like:
2536  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2537  * r9 -= r8
2538  * r5 = r9
2539  * if r5 > 0x79f goto pc+7
2540  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2541  * r5 += 1
2542  * ...
2543  * call bpf_perf_event_output#25
2544  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2545  *
2546  * and this case:
2547  * r6 = 1
2548  * call foo // uses callee's r6 inside to compute r0
2549  * r0 += r6
2550  * if r0 == 0 goto
2551  *
2552  * to track above reg_mask/stack_mask needs to be independent for each frame.
2553  *
2554  * Also if parent's curframe > frame where backtracking started,
2555  * the verifier need to mark registers in both frames, otherwise callees
2556  * may incorrectly prune callers. This is similar to
2557  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2558  *
2559  * For now backtracking falls back into conservative marking.
2560  */
2561 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2562 				     struct bpf_verifier_state *st)
2563 {
2564 	struct bpf_func_state *func;
2565 	struct bpf_reg_state *reg;
2566 	int i, j;
2567 
2568 	/* big hammer: mark all scalars precise in this path.
2569 	 * pop_stack may still get !precise scalars.
2570 	 */
2571 	for (; st; st = st->parent)
2572 		for (i = 0; i <= st->curframe; i++) {
2573 			func = st->frame[i];
2574 			for (j = 0; j < BPF_REG_FP; j++) {
2575 				reg = &func->regs[j];
2576 				if (reg->type != SCALAR_VALUE)
2577 					continue;
2578 				reg->precise = true;
2579 			}
2580 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2581 				if (!is_spilled_reg(&func->stack[j]))
2582 					continue;
2583 				reg = &func->stack[j].spilled_ptr;
2584 				if (reg->type != SCALAR_VALUE)
2585 					continue;
2586 				reg->precise = true;
2587 			}
2588 		}
2589 }
2590 
2591 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2592 				  int spi)
2593 {
2594 	struct bpf_verifier_state *st = env->cur_state;
2595 	int first_idx = st->first_insn_idx;
2596 	int last_idx = env->insn_idx;
2597 	struct bpf_func_state *func;
2598 	struct bpf_reg_state *reg;
2599 	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2600 	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2601 	bool skip_first = true;
2602 	bool new_marks = false;
2603 	int i, err;
2604 
2605 	if (!env->bpf_capable)
2606 		return 0;
2607 
2608 	func = st->frame[st->curframe];
2609 	if (regno >= 0) {
2610 		reg = &func->regs[regno];
2611 		if (reg->type != SCALAR_VALUE) {
2612 			WARN_ONCE(1, "backtracing misuse");
2613 			return -EFAULT;
2614 		}
2615 		if (!reg->precise)
2616 			new_marks = true;
2617 		else
2618 			reg_mask = 0;
2619 		reg->precise = true;
2620 	}
2621 
2622 	while (spi >= 0) {
2623 		if (!is_spilled_reg(&func->stack[spi])) {
2624 			stack_mask = 0;
2625 			break;
2626 		}
2627 		reg = &func->stack[spi].spilled_ptr;
2628 		if (reg->type != SCALAR_VALUE) {
2629 			stack_mask = 0;
2630 			break;
2631 		}
2632 		if (!reg->precise)
2633 			new_marks = true;
2634 		else
2635 			stack_mask = 0;
2636 		reg->precise = true;
2637 		break;
2638 	}
2639 
2640 	if (!new_marks)
2641 		return 0;
2642 	if (!reg_mask && !stack_mask)
2643 		return 0;
2644 	for (;;) {
2645 		DECLARE_BITMAP(mask, 64);
2646 		u32 history = st->jmp_history_cnt;
2647 
2648 		if (env->log.level & BPF_LOG_LEVEL2)
2649 			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2650 		for (i = last_idx;;) {
2651 			if (skip_first) {
2652 				err = 0;
2653 				skip_first = false;
2654 			} else {
2655 				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2656 			}
2657 			if (err == -ENOTSUPP) {
2658 				mark_all_scalars_precise(env, st);
2659 				return 0;
2660 			} else if (err) {
2661 				return err;
2662 			}
2663 			if (!reg_mask && !stack_mask)
2664 				/* Found assignment(s) into tracked register in this state.
2665 				 * Since this state is already marked, just return.
2666 				 * Nothing to be tracked further in the parent state.
2667 				 */
2668 				return 0;
2669 			if (i == first_idx)
2670 				break;
2671 			i = get_prev_insn_idx(st, i, &history);
2672 			if (i >= env->prog->len) {
2673 				/* This can happen if backtracking reached insn 0
2674 				 * and there are still reg_mask or stack_mask
2675 				 * to backtrack.
2676 				 * It means the backtracking missed the spot where
2677 				 * particular register was initialized with a constant.
2678 				 */
2679 				verbose(env, "BUG backtracking idx %d\n", i);
2680 				WARN_ONCE(1, "verifier backtracking bug");
2681 				return -EFAULT;
2682 			}
2683 		}
2684 		st = st->parent;
2685 		if (!st)
2686 			break;
2687 
2688 		new_marks = false;
2689 		func = st->frame[st->curframe];
2690 		bitmap_from_u64(mask, reg_mask);
2691 		for_each_set_bit(i, mask, 32) {
2692 			reg = &func->regs[i];
2693 			if (reg->type != SCALAR_VALUE) {
2694 				reg_mask &= ~(1u << i);
2695 				continue;
2696 			}
2697 			if (!reg->precise)
2698 				new_marks = true;
2699 			reg->precise = true;
2700 		}
2701 
2702 		bitmap_from_u64(mask, stack_mask);
2703 		for_each_set_bit(i, mask, 64) {
2704 			if (i >= func->allocated_stack / BPF_REG_SIZE) {
2705 				/* the sequence of instructions:
2706 				 * 2: (bf) r3 = r10
2707 				 * 3: (7b) *(u64 *)(r3 -8) = r0
2708 				 * 4: (79) r4 = *(u64 *)(r10 -8)
2709 				 * doesn't contain jmps. It's backtracked
2710 				 * as a single block.
2711 				 * During backtracking insn 3 is not recognized as
2712 				 * stack access, so at the end of backtracking
2713 				 * stack slot fp-8 is still marked in stack_mask.
2714 				 * However the parent state may not have accessed
2715 				 * fp-8 and it's "unallocated" stack space.
2716 				 * In such case fallback to conservative.
2717 				 */
2718 				mark_all_scalars_precise(env, st);
2719 				return 0;
2720 			}
2721 
2722 			if (!is_spilled_reg(&func->stack[i])) {
2723 				stack_mask &= ~(1ull << i);
2724 				continue;
2725 			}
2726 			reg = &func->stack[i].spilled_ptr;
2727 			if (reg->type != SCALAR_VALUE) {
2728 				stack_mask &= ~(1ull << i);
2729 				continue;
2730 			}
2731 			if (!reg->precise)
2732 				new_marks = true;
2733 			reg->precise = true;
2734 		}
2735 		if (env->log.level & BPF_LOG_LEVEL2) {
2736 			verbose(env, "parent %s regs=%x stack=%llx marks:",
2737 				new_marks ? "didn't have" : "already had",
2738 				reg_mask, stack_mask);
2739 			print_verifier_state(env, func, true);
2740 		}
2741 
2742 		if (!reg_mask && !stack_mask)
2743 			break;
2744 		if (!new_marks)
2745 			break;
2746 
2747 		last_idx = st->last_insn_idx;
2748 		first_idx = st->first_insn_idx;
2749 	}
2750 	return 0;
2751 }
2752 
2753 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2754 {
2755 	return __mark_chain_precision(env, regno, -1);
2756 }
2757 
2758 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2759 {
2760 	return __mark_chain_precision(env, -1, spi);
2761 }
2762 
2763 static bool is_spillable_regtype(enum bpf_reg_type type)
2764 {
2765 	switch (base_type(type)) {
2766 	case PTR_TO_MAP_VALUE:
2767 	case PTR_TO_STACK:
2768 	case PTR_TO_CTX:
2769 	case PTR_TO_PACKET:
2770 	case PTR_TO_PACKET_META:
2771 	case PTR_TO_PACKET_END:
2772 	case PTR_TO_FLOW_KEYS:
2773 	case CONST_PTR_TO_MAP:
2774 	case PTR_TO_SOCKET:
2775 	case PTR_TO_SOCK_COMMON:
2776 	case PTR_TO_TCP_SOCK:
2777 	case PTR_TO_XDP_SOCK:
2778 	case PTR_TO_BTF_ID:
2779 	case PTR_TO_BUF:
2780 	case PTR_TO_PERCPU_BTF_ID:
2781 	case PTR_TO_MEM:
2782 	case PTR_TO_FUNC:
2783 	case PTR_TO_MAP_KEY:
2784 		return true;
2785 	default:
2786 		return false;
2787 	}
2788 }
2789 
2790 /* Does this register contain a constant zero? */
2791 static bool register_is_null(struct bpf_reg_state *reg)
2792 {
2793 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2794 }
2795 
2796 static bool register_is_const(struct bpf_reg_state *reg)
2797 {
2798 	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2799 }
2800 
2801 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2802 {
2803 	return tnum_is_unknown(reg->var_off) &&
2804 	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2805 	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2806 	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2807 	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2808 }
2809 
2810 static bool register_is_bounded(struct bpf_reg_state *reg)
2811 {
2812 	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2813 }
2814 
2815 static bool __is_pointer_value(bool allow_ptr_leaks,
2816 			       const struct bpf_reg_state *reg)
2817 {
2818 	if (allow_ptr_leaks)
2819 		return false;
2820 
2821 	return reg->type != SCALAR_VALUE;
2822 }
2823 
2824 static void save_register_state(struct bpf_func_state *state,
2825 				int spi, struct bpf_reg_state *reg,
2826 				int size)
2827 {
2828 	int i;
2829 
2830 	state->stack[spi].spilled_ptr = *reg;
2831 	if (size == BPF_REG_SIZE)
2832 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2833 
2834 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2835 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2836 
2837 	/* size < 8 bytes spill */
2838 	for (; i; i--)
2839 		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2840 }
2841 
2842 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2843  * stack boundary and alignment are checked in check_mem_access()
2844  */
2845 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2846 				       /* stack frame we're writing to */
2847 				       struct bpf_func_state *state,
2848 				       int off, int size, int value_regno,
2849 				       int insn_idx)
2850 {
2851 	struct bpf_func_state *cur; /* state of the current function */
2852 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2853 	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2854 	struct bpf_reg_state *reg = NULL;
2855 
2856 	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
2857 	if (err)
2858 		return err;
2859 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2860 	 * so it's aligned access and [off, off + size) are within stack limits
2861 	 */
2862 	if (!env->allow_ptr_leaks &&
2863 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
2864 	    size != BPF_REG_SIZE) {
2865 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
2866 		return -EACCES;
2867 	}
2868 
2869 	cur = env->cur_state->frame[env->cur_state->curframe];
2870 	if (value_regno >= 0)
2871 		reg = &cur->regs[value_regno];
2872 	if (!env->bypass_spec_v4) {
2873 		bool sanitize = reg && is_spillable_regtype(reg->type);
2874 
2875 		for (i = 0; i < size; i++) {
2876 			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2877 				sanitize = true;
2878 				break;
2879 			}
2880 		}
2881 
2882 		if (sanitize)
2883 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2884 	}
2885 
2886 	mark_stack_slot_scratched(env, spi);
2887 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2888 	    !register_is_null(reg) && env->bpf_capable) {
2889 		if (dst_reg != BPF_REG_FP) {
2890 			/* The backtracking logic can only recognize explicit
2891 			 * stack slot address like [fp - 8]. Other spill of
2892 			 * scalar via different register has to be conservative.
2893 			 * Backtrack from here and mark all registers as precise
2894 			 * that contributed into 'reg' being a constant.
2895 			 */
2896 			err = mark_chain_precision(env, value_regno);
2897 			if (err)
2898 				return err;
2899 		}
2900 		save_register_state(state, spi, reg, size);
2901 	} else if (reg && is_spillable_regtype(reg->type)) {
2902 		/* register containing pointer is being spilled into stack */
2903 		if (size != BPF_REG_SIZE) {
2904 			verbose_linfo(env, insn_idx, "; ");
2905 			verbose(env, "invalid size of register spill\n");
2906 			return -EACCES;
2907 		}
2908 		if (state != cur && reg->type == PTR_TO_STACK) {
2909 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2910 			return -EINVAL;
2911 		}
2912 		save_register_state(state, spi, reg, size);
2913 	} else {
2914 		u8 type = STACK_MISC;
2915 
2916 		/* regular write of data into stack destroys any spilled ptr */
2917 		state->stack[spi].spilled_ptr.type = NOT_INIT;
2918 		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2919 		if (is_spilled_reg(&state->stack[spi]))
2920 			for (i = 0; i < BPF_REG_SIZE; i++)
2921 				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
2922 
2923 		/* only mark the slot as written if all 8 bytes were written
2924 		 * otherwise read propagation may incorrectly stop too soon
2925 		 * when stack slots are partially written.
2926 		 * This heuristic means that read propagation will be
2927 		 * conservative, since it will add reg_live_read marks
2928 		 * to stack slots all the way to first state when programs
2929 		 * writes+reads less than 8 bytes
2930 		 */
2931 		if (size == BPF_REG_SIZE)
2932 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2933 
2934 		/* when we zero initialize stack slots mark them as such */
2935 		if (reg && register_is_null(reg)) {
2936 			/* backtracking doesn't work for STACK_ZERO yet. */
2937 			err = mark_chain_precision(env, value_regno);
2938 			if (err)
2939 				return err;
2940 			type = STACK_ZERO;
2941 		}
2942 
2943 		/* Mark slots affected by this stack write. */
2944 		for (i = 0; i < size; i++)
2945 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2946 				type;
2947 	}
2948 	return 0;
2949 }
2950 
2951 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2952  * known to contain a variable offset.
2953  * This function checks whether the write is permitted and conservatively
2954  * tracks the effects of the write, considering that each stack slot in the
2955  * dynamic range is potentially written to.
2956  *
2957  * 'off' includes 'regno->off'.
2958  * 'value_regno' can be -1, meaning that an unknown value is being written to
2959  * the stack.
2960  *
2961  * Spilled pointers in range are not marked as written because we don't know
2962  * what's going to be actually written. This means that read propagation for
2963  * future reads cannot be terminated by this write.
2964  *
2965  * For privileged programs, uninitialized stack slots are considered
2966  * initialized by this write (even though we don't know exactly what offsets
2967  * are going to be written to). The idea is that we don't want the verifier to
2968  * reject future reads that access slots written to through variable offsets.
2969  */
2970 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2971 				     /* func where register points to */
2972 				     struct bpf_func_state *state,
2973 				     int ptr_regno, int off, int size,
2974 				     int value_regno, int insn_idx)
2975 {
2976 	struct bpf_func_state *cur; /* state of the current function */
2977 	int min_off, max_off;
2978 	int i, err;
2979 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2980 	bool writing_zero = false;
2981 	/* set if the fact that we're writing a zero is used to let any
2982 	 * stack slots remain STACK_ZERO
2983 	 */
2984 	bool zero_used = false;
2985 
2986 	cur = env->cur_state->frame[env->cur_state->curframe];
2987 	ptr_reg = &cur->regs[ptr_regno];
2988 	min_off = ptr_reg->smin_value + off;
2989 	max_off = ptr_reg->smax_value + off + size;
2990 	if (value_regno >= 0)
2991 		value_reg = &cur->regs[value_regno];
2992 	if (value_reg && register_is_null(value_reg))
2993 		writing_zero = true;
2994 
2995 	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
2996 	if (err)
2997 		return err;
2998 
2999 
3000 	/* Variable offset writes destroy any spilled pointers in range. */
3001 	for (i = min_off; i < max_off; i++) {
3002 		u8 new_type, *stype;
3003 		int slot, spi;
3004 
3005 		slot = -i - 1;
3006 		spi = slot / BPF_REG_SIZE;
3007 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3008 		mark_stack_slot_scratched(env, spi);
3009 
3010 		if (!env->allow_ptr_leaks
3011 				&& *stype != NOT_INIT
3012 				&& *stype != SCALAR_VALUE) {
3013 			/* Reject the write if there's are spilled pointers in
3014 			 * range. If we didn't reject here, the ptr status
3015 			 * would be erased below (even though not all slots are
3016 			 * actually overwritten), possibly opening the door to
3017 			 * leaks.
3018 			 */
3019 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3020 				insn_idx, i);
3021 			return -EINVAL;
3022 		}
3023 
3024 		/* Erase all spilled pointers. */
3025 		state->stack[spi].spilled_ptr.type = NOT_INIT;
3026 
3027 		/* Update the slot type. */
3028 		new_type = STACK_MISC;
3029 		if (writing_zero && *stype == STACK_ZERO) {
3030 			new_type = STACK_ZERO;
3031 			zero_used = true;
3032 		}
3033 		/* If the slot is STACK_INVALID, we check whether it's OK to
3034 		 * pretend that it will be initialized by this write. The slot
3035 		 * might not actually be written to, and so if we mark it as
3036 		 * initialized future reads might leak uninitialized memory.
3037 		 * For privileged programs, we will accept such reads to slots
3038 		 * that may or may not be written because, if we're reject
3039 		 * them, the error would be too confusing.
3040 		 */
3041 		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3042 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3043 					insn_idx, i);
3044 			return -EINVAL;
3045 		}
3046 		*stype = new_type;
3047 	}
3048 	if (zero_used) {
3049 		/* backtracking doesn't work for STACK_ZERO yet. */
3050 		err = mark_chain_precision(env, value_regno);
3051 		if (err)
3052 			return err;
3053 	}
3054 	return 0;
3055 }
3056 
3057 /* When register 'dst_regno' is assigned some values from stack[min_off,
3058  * max_off), we set the register's type according to the types of the
3059  * respective stack slots. If all the stack values are known to be zeros, then
3060  * so is the destination reg. Otherwise, the register is considered to be
3061  * SCALAR. This function does not deal with register filling; the caller must
3062  * ensure that all spilled registers in the stack range have been marked as
3063  * read.
3064  */
3065 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3066 				/* func where src register points to */
3067 				struct bpf_func_state *ptr_state,
3068 				int min_off, int max_off, int dst_regno)
3069 {
3070 	struct bpf_verifier_state *vstate = env->cur_state;
3071 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3072 	int i, slot, spi;
3073 	u8 *stype;
3074 	int zeros = 0;
3075 
3076 	for (i = min_off; i < max_off; i++) {
3077 		slot = -i - 1;
3078 		spi = slot / BPF_REG_SIZE;
3079 		stype = ptr_state->stack[spi].slot_type;
3080 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3081 			break;
3082 		zeros++;
3083 	}
3084 	if (zeros == max_off - min_off) {
3085 		/* any access_size read into register is zero extended,
3086 		 * so the whole register == const_zero
3087 		 */
3088 		__mark_reg_const_zero(&state->regs[dst_regno]);
3089 		/* backtracking doesn't support STACK_ZERO yet,
3090 		 * so mark it precise here, so that later
3091 		 * backtracking can stop here.
3092 		 * Backtracking may not need this if this register
3093 		 * doesn't participate in pointer adjustment.
3094 		 * Forward propagation of precise flag is not
3095 		 * necessary either. This mark is only to stop
3096 		 * backtracking. Any register that contributed
3097 		 * to const 0 was marked precise before spill.
3098 		 */
3099 		state->regs[dst_regno].precise = true;
3100 	} else {
3101 		/* have read misc data from the stack */
3102 		mark_reg_unknown(env, state->regs, dst_regno);
3103 	}
3104 	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3105 }
3106 
3107 /* Read the stack at 'off' and put the results into the register indicated by
3108  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3109  * spilled reg.
3110  *
3111  * 'dst_regno' can be -1, meaning that the read value is not going to a
3112  * register.
3113  *
3114  * The access is assumed to be within the current stack bounds.
3115  */
3116 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3117 				      /* func where src register points to */
3118 				      struct bpf_func_state *reg_state,
3119 				      int off, int size, int dst_regno)
3120 {
3121 	struct bpf_verifier_state *vstate = env->cur_state;
3122 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3123 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3124 	struct bpf_reg_state *reg;
3125 	u8 *stype, type;
3126 
3127 	stype = reg_state->stack[spi].slot_type;
3128 	reg = &reg_state->stack[spi].spilled_ptr;
3129 
3130 	if (is_spilled_reg(&reg_state->stack[spi])) {
3131 		u8 spill_size = 1;
3132 
3133 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3134 			spill_size++;
3135 
3136 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3137 			if (reg->type != SCALAR_VALUE) {
3138 				verbose_linfo(env, env->insn_idx, "; ");
3139 				verbose(env, "invalid size of register fill\n");
3140 				return -EACCES;
3141 			}
3142 
3143 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3144 			if (dst_regno < 0)
3145 				return 0;
3146 
3147 			if (!(off % BPF_REG_SIZE) && size == spill_size) {
3148 				/* The earlier check_reg_arg() has decided the
3149 				 * subreg_def for this insn.  Save it first.
3150 				 */
3151 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3152 
3153 				state->regs[dst_regno] = *reg;
3154 				state->regs[dst_regno].subreg_def = subreg_def;
3155 			} else {
3156 				for (i = 0; i < size; i++) {
3157 					type = stype[(slot - i) % BPF_REG_SIZE];
3158 					if (type == STACK_SPILL)
3159 						continue;
3160 					if (type == STACK_MISC)
3161 						continue;
3162 					verbose(env, "invalid read from stack off %d+%d size %d\n",
3163 						off, i, size);
3164 					return -EACCES;
3165 				}
3166 				mark_reg_unknown(env, state->regs, dst_regno);
3167 			}
3168 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3169 			return 0;
3170 		}
3171 
3172 		if (dst_regno >= 0) {
3173 			/* restore register state from stack */
3174 			state->regs[dst_regno] = *reg;
3175 			/* mark reg as written since spilled pointer state likely
3176 			 * has its liveness marks cleared by is_state_visited()
3177 			 * which resets stack/reg liveness for state transitions
3178 			 */
3179 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3180 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3181 			/* If dst_regno==-1, the caller is asking us whether
3182 			 * it is acceptable to use this value as a SCALAR_VALUE
3183 			 * (e.g. for XADD).
3184 			 * We must not allow unprivileged callers to do that
3185 			 * with spilled pointers.
3186 			 */
3187 			verbose(env, "leaking pointer from stack off %d\n",
3188 				off);
3189 			return -EACCES;
3190 		}
3191 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3192 	} else {
3193 		for (i = 0; i < size; i++) {
3194 			type = stype[(slot - i) % BPF_REG_SIZE];
3195 			if (type == STACK_MISC)
3196 				continue;
3197 			if (type == STACK_ZERO)
3198 				continue;
3199 			verbose(env, "invalid read from stack off %d+%d size %d\n",
3200 				off, i, size);
3201 			return -EACCES;
3202 		}
3203 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3204 		if (dst_regno >= 0)
3205 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3206 	}
3207 	return 0;
3208 }
3209 
3210 enum stack_access_src {
3211 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3212 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3213 };
3214 
3215 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3216 					 int regno, int off, int access_size,
3217 					 bool zero_size_allowed,
3218 					 enum stack_access_src type,
3219 					 struct bpf_call_arg_meta *meta);
3220 
3221 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3222 {
3223 	return cur_regs(env) + regno;
3224 }
3225 
3226 /* Read the stack at 'ptr_regno + off' and put the result into the register
3227  * 'dst_regno'.
3228  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3229  * but not its variable offset.
3230  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3231  *
3232  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3233  * filling registers (i.e. reads of spilled register cannot be detected when
3234  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3235  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3236  * offset; for a fixed offset check_stack_read_fixed_off should be used
3237  * instead.
3238  */
3239 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3240 				    int ptr_regno, int off, int size, int dst_regno)
3241 {
3242 	/* The state of the source register. */
3243 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3244 	struct bpf_func_state *ptr_state = func(env, reg);
3245 	int err;
3246 	int min_off, max_off;
3247 
3248 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3249 	 */
3250 	err = check_stack_range_initialized(env, ptr_regno, off, size,
3251 					    false, ACCESS_DIRECT, NULL);
3252 	if (err)
3253 		return err;
3254 
3255 	min_off = reg->smin_value + off;
3256 	max_off = reg->smax_value + off;
3257 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3258 	return 0;
3259 }
3260 
3261 /* check_stack_read dispatches to check_stack_read_fixed_off or
3262  * check_stack_read_var_off.
3263  *
3264  * The caller must ensure that the offset falls within the allocated stack
3265  * bounds.
3266  *
3267  * 'dst_regno' is a register which will receive the value from the stack. It
3268  * can be -1, meaning that the read value is not going to a register.
3269  */
3270 static int check_stack_read(struct bpf_verifier_env *env,
3271 			    int ptr_regno, int off, int size,
3272 			    int dst_regno)
3273 {
3274 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3275 	struct bpf_func_state *state = func(env, reg);
3276 	int err;
3277 	/* Some accesses are only permitted with a static offset. */
3278 	bool var_off = !tnum_is_const(reg->var_off);
3279 
3280 	/* The offset is required to be static when reads don't go to a
3281 	 * register, in order to not leak pointers (see
3282 	 * check_stack_read_fixed_off).
3283 	 */
3284 	if (dst_regno < 0 && var_off) {
3285 		char tn_buf[48];
3286 
3287 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3288 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3289 			tn_buf, off, size);
3290 		return -EACCES;
3291 	}
3292 	/* Variable offset is prohibited for unprivileged mode for simplicity
3293 	 * since it requires corresponding support in Spectre masking for stack
3294 	 * ALU. See also retrieve_ptr_limit().
3295 	 */
3296 	if (!env->bypass_spec_v1 && var_off) {
3297 		char tn_buf[48];
3298 
3299 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3300 		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3301 				ptr_regno, tn_buf);
3302 		return -EACCES;
3303 	}
3304 
3305 	if (!var_off) {
3306 		off += reg->var_off.value;
3307 		err = check_stack_read_fixed_off(env, state, off, size,
3308 						 dst_regno);
3309 	} else {
3310 		/* Variable offset stack reads need more conservative handling
3311 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3312 		 * branch.
3313 		 */
3314 		err = check_stack_read_var_off(env, ptr_regno, off, size,
3315 					       dst_regno);
3316 	}
3317 	return err;
3318 }
3319 
3320 
3321 /* check_stack_write dispatches to check_stack_write_fixed_off or
3322  * check_stack_write_var_off.
3323  *
3324  * 'ptr_regno' is the register used as a pointer into the stack.
3325  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3326  * 'value_regno' is the register whose value we're writing to the stack. It can
3327  * be -1, meaning that we're not writing from a register.
3328  *
3329  * The caller must ensure that the offset falls within the maximum stack size.
3330  */
3331 static int check_stack_write(struct bpf_verifier_env *env,
3332 			     int ptr_regno, int off, int size,
3333 			     int value_regno, int insn_idx)
3334 {
3335 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3336 	struct bpf_func_state *state = func(env, reg);
3337 	int err;
3338 
3339 	if (tnum_is_const(reg->var_off)) {
3340 		off += reg->var_off.value;
3341 		err = check_stack_write_fixed_off(env, state, off, size,
3342 						  value_regno, insn_idx);
3343 	} else {
3344 		/* Variable offset stack reads need more conservative handling
3345 		 * than fixed offset ones.
3346 		 */
3347 		err = check_stack_write_var_off(env, state,
3348 						ptr_regno, off, size,
3349 						value_regno, insn_idx);
3350 	}
3351 	return err;
3352 }
3353 
3354 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3355 				 int off, int size, enum bpf_access_type type)
3356 {
3357 	struct bpf_reg_state *regs = cur_regs(env);
3358 	struct bpf_map *map = regs[regno].map_ptr;
3359 	u32 cap = bpf_map_flags_to_cap(map);
3360 
3361 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3362 		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3363 			map->value_size, off, size);
3364 		return -EACCES;
3365 	}
3366 
3367 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3368 		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3369 			map->value_size, off, size);
3370 		return -EACCES;
3371 	}
3372 
3373 	return 0;
3374 }
3375 
3376 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3377 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3378 			      int off, int size, u32 mem_size,
3379 			      bool zero_size_allowed)
3380 {
3381 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3382 	struct bpf_reg_state *reg;
3383 
3384 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3385 		return 0;
3386 
3387 	reg = &cur_regs(env)[regno];
3388 	switch (reg->type) {
3389 	case PTR_TO_MAP_KEY:
3390 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3391 			mem_size, off, size);
3392 		break;
3393 	case PTR_TO_MAP_VALUE:
3394 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3395 			mem_size, off, size);
3396 		break;
3397 	case PTR_TO_PACKET:
3398 	case PTR_TO_PACKET_META:
3399 	case PTR_TO_PACKET_END:
3400 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3401 			off, size, regno, reg->id, off, mem_size);
3402 		break;
3403 	case PTR_TO_MEM:
3404 	default:
3405 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3406 			mem_size, off, size);
3407 	}
3408 
3409 	return -EACCES;
3410 }
3411 
3412 /* check read/write into a memory region with possible variable offset */
3413 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3414 				   int off, int size, u32 mem_size,
3415 				   bool zero_size_allowed)
3416 {
3417 	struct bpf_verifier_state *vstate = env->cur_state;
3418 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3419 	struct bpf_reg_state *reg = &state->regs[regno];
3420 	int err;
3421 
3422 	/* We may have adjusted the register pointing to memory region, so we
3423 	 * need to try adding each of min_value and max_value to off
3424 	 * to make sure our theoretical access will be safe.
3425 	 *
3426 	 * The minimum value is only important with signed
3427 	 * comparisons where we can't assume the floor of a
3428 	 * value is 0.  If we are using signed variables for our
3429 	 * index'es we need to make sure that whatever we use
3430 	 * will have a set floor within our range.
3431 	 */
3432 	if (reg->smin_value < 0 &&
3433 	    (reg->smin_value == S64_MIN ||
3434 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3435 	      reg->smin_value + off < 0)) {
3436 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3437 			regno);
3438 		return -EACCES;
3439 	}
3440 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
3441 				 mem_size, zero_size_allowed);
3442 	if (err) {
3443 		verbose(env, "R%d min value is outside of the allowed memory range\n",
3444 			regno);
3445 		return err;
3446 	}
3447 
3448 	/* If we haven't set a max value then we need to bail since we can't be
3449 	 * sure we won't do bad things.
3450 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
3451 	 */
3452 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3453 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3454 			regno);
3455 		return -EACCES;
3456 	}
3457 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
3458 				 mem_size, zero_size_allowed);
3459 	if (err) {
3460 		verbose(env, "R%d max value is outside of the allowed memory range\n",
3461 			regno);
3462 		return err;
3463 	}
3464 
3465 	return 0;
3466 }
3467 
3468 /* check read/write into a map element with possible variable offset */
3469 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3470 			    int off, int size, bool zero_size_allowed)
3471 {
3472 	struct bpf_verifier_state *vstate = env->cur_state;
3473 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3474 	struct bpf_reg_state *reg = &state->regs[regno];
3475 	struct bpf_map *map = reg->map_ptr;
3476 	int err;
3477 
3478 	err = check_mem_region_access(env, regno, off, size, map->value_size,
3479 				      zero_size_allowed);
3480 	if (err)
3481 		return err;
3482 
3483 	if (map_value_has_spin_lock(map)) {
3484 		u32 lock = map->spin_lock_off;
3485 
3486 		/* if any part of struct bpf_spin_lock can be touched by
3487 		 * load/store reject this program.
3488 		 * To check that [x1, x2) overlaps with [y1, y2)
3489 		 * it is sufficient to check x1 < y2 && y1 < x2.
3490 		 */
3491 		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3492 		     lock < reg->umax_value + off + size) {
3493 			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3494 			return -EACCES;
3495 		}
3496 	}
3497 	if (map_value_has_timer(map)) {
3498 		u32 t = map->timer_off;
3499 
3500 		if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3501 		     t < reg->umax_value + off + size) {
3502 			verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3503 			return -EACCES;
3504 		}
3505 	}
3506 	return err;
3507 }
3508 
3509 #define MAX_PACKET_OFF 0xffff
3510 
3511 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3512 				       const struct bpf_call_arg_meta *meta,
3513 				       enum bpf_access_type t)
3514 {
3515 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3516 
3517 	switch (prog_type) {
3518 	/* Program types only with direct read access go here! */
3519 	case BPF_PROG_TYPE_LWT_IN:
3520 	case BPF_PROG_TYPE_LWT_OUT:
3521 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3522 	case BPF_PROG_TYPE_SK_REUSEPORT:
3523 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
3524 	case BPF_PROG_TYPE_CGROUP_SKB:
3525 		if (t == BPF_WRITE)
3526 			return false;
3527 		fallthrough;
3528 
3529 	/* Program types with direct read + write access go here! */
3530 	case BPF_PROG_TYPE_SCHED_CLS:
3531 	case BPF_PROG_TYPE_SCHED_ACT:
3532 	case BPF_PROG_TYPE_XDP:
3533 	case BPF_PROG_TYPE_LWT_XMIT:
3534 	case BPF_PROG_TYPE_SK_SKB:
3535 	case BPF_PROG_TYPE_SK_MSG:
3536 		if (meta)
3537 			return meta->pkt_access;
3538 
3539 		env->seen_direct_write = true;
3540 		return true;
3541 
3542 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3543 		if (t == BPF_WRITE)
3544 			env->seen_direct_write = true;
3545 
3546 		return true;
3547 
3548 	default:
3549 		return false;
3550 	}
3551 }
3552 
3553 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3554 			       int size, bool zero_size_allowed)
3555 {
3556 	struct bpf_reg_state *regs = cur_regs(env);
3557 	struct bpf_reg_state *reg = &regs[regno];
3558 	int err;
3559 
3560 	/* We may have added a variable offset to the packet pointer; but any
3561 	 * reg->range we have comes after that.  We are only checking the fixed
3562 	 * offset.
3563 	 */
3564 
3565 	/* We don't allow negative numbers, because we aren't tracking enough
3566 	 * detail to prove they're safe.
3567 	 */
3568 	if (reg->smin_value < 0) {
3569 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3570 			regno);
3571 		return -EACCES;
3572 	}
3573 
3574 	err = reg->range < 0 ? -EINVAL :
3575 	      __check_mem_access(env, regno, off, size, reg->range,
3576 				 zero_size_allowed);
3577 	if (err) {
3578 		verbose(env, "R%d offset is outside of the packet\n", regno);
3579 		return err;
3580 	}
3581 
3582 	/* __check_mem_access has made sure "off + size - 1" is within u16.
3583 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3584 	 * otherwise find_good_pkt_pointers would have refused to set range info
3585 	 * that __check_mem_access would have rejected this pkt access.
3586 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3587 	 */
3588 	env->prog->aux->max_pkt_offset =
3589 		max_t(u32, env->prog->aux->max_pkt_offset,
3590 		      off + reg->umax_value + size - 1);
3591 
3592 	return err;
3593 }
3594 
3595 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3596 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3597 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
3598 			    struct btf **btf, u32 *btf_id)
3599 {
3600 	struct bpf_insn_access_aux info = {
3601 		.reg_type = *reg_type,
3602 		.log = &env->log,
3603 	};
3604 
3605 	if (env->ops->is_valid_access &&
3606 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3607 		/* A non zero info.ctx_field_size indicates that this field is a
3608 		 * candidate for later verifier transformation to load the whole
3609 		 * field and then apply a mask when accessed with a narrower
3610 		 * access than actual ctx access size. A zero info.ctx_field_size
3611 		 * will only allow for whole field access and rejects any other
3612 		 * type of narrower access.
3613 		 */
3614 		*reg_type = info.reg_type;
3615 
3616 		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
3617 			*btf = info.btf;
3618 			*btf_id = info.btf_id;
3619 		} else {
3620 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3621 		}
3622 		/* remember the offset of last byte accessed in ctx */
3623 		if (env->prog->aux->max_ctx_offset < off + size)
3624 			env->prog->aux->max_ctx_offset = off + size;
3625 		return 0;
3626 	}
3627 
3628 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3629 	return -EACCES;
3630 }
3631 
3632 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3633 				  int size)
3634 {
3635 	if (size < 0 || off < 0 ||
3636 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
3637 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
3638 			off, size);
3639 		return -EACCES;
3640 	}
3641 	return 0;
3642 }
3643 
3644 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3645 			     u32 regno, int off, int size,
3646 			     enum bpf_access_type t)
3647 {
3648 	struct bpf_reg_state *regs = cur_regs(env);
3649 	struct bpf_reg_state *reg = &regs[regno];
3650 	struct bpf_insn_access_aux info = {};
3651 	bool valid;
3652 
3653 	if (reg->smin_value < 0) {
3654 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3655 			regno);
3656 		return -EACCES;
3657 	}
3658 
3659 	switch (reg->type) {
3660 	case PTR_TO_SOCK_COMMON:
3661 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3662 		break;
3663 	case PTR_TO_SOCKET:
3664 		valid = bpf_sock_is_valid_access(off, size, t, &info);
3665 		break;
3666 	case PTR_TO_TCP_SOCK:
3667 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3668 		break;
3669 	case PTR_TO_XDP_SOCK:
3670 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3671 		break;
3672 	default:
3673 		valid = false;
3674 	}
3675 
3676 
3677 	if (valid) {
3678 		env->insn_aux_data[insn_idx].ctx_field_size =
3679 			info.ctx_field_size;
3680 		return 0;
3681 	}
3682 
3683 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
3684 		regno, reg_type_str(env, reg->type), off, size);
3685 
3686 	return -EACCES;
3687 }
3688 
3689 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3690 {
3691 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3692 }
3693 
3694 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3695 {
3696 	const struct bpf_reg_state *reg = reg_state(env, regno);
3697 
3698 	return reg->type == PTR_TO_CTX;
3699 }
3700 
3701 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3702 {
3703 	const struct bpf_reg_state *reg = reg_state(env, regno);
3704 
3705 	return type_is_sk_pointer(reg->type);
3706 }
3707 
3708 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3709 {
3710 	const struct bpf_reg_state *reg = reg_state(env, regno);
3711 
3712 	return type_is_pkt_pointer(reg->type);
3713 }
3714 
3715 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3716 {
3717 	const struct bpf_reg_state *reg = reg_state(env, regno);
3718 
3719 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3720 	return reg->type == PTR_TO_FLOW_KEYS;
3721 }
3722 
3723 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3724 				   const struct bpf_reg_state *reg,
3725 				   int off, int size, bool strict)
3726 {
3727 	struct tnum reg_off;
3728 	int ip_align;
3729 
3730 	/* Byte size accesses are always allowed. */
3731 	if (!strict || size == 1)
3732 		return 0;
3733 
3734 	/* For platforms that do not have a Kconfig enabling
3735 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3736 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
3737 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3738 	 * to this code only in strict mode where we want to emulate
3739 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
3740 	 * unconditional IP align value of '2'.
3741 	 */
3742 	ip_align = 2;
3743 
3744 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3745 	if (!tnum_is_aligned(reg_off, size)) {
3746 		char tn_buf[48];
3747 
3748 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3749 		verbose(env,
3750 			"misaligned packet access off %d+%s+%d+%d size %d\n",
3751 			ip_align, tn_buf, reg->off, off, size);
3752 		return -EACCES;
3753 	}
3754 
3755 	return 0;
3756 }
3757 
3758 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3759 				       const struct bpf_reg_state *reg,
3760 				       const char *pointer_desc,
3761 				       int off, int size, bool strict)
3762 {
3763 	struct tnum reg_off;
3764 
3765 	/* Byte size accesses are always allowed. */
3766 	if (!strict || size == 1)
3767 		return 0;
3768 
3769 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3770 	if (!tnum_is_aligned(reg_off, size)) {
3771 		char tn_buf[48];
3772 
3773 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3774 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3775 			pointer_desc, tn_buf, reg->off, off, size);
3776 		return -EACCES;
3777 	}
3778 
3779 	return 0;
3780 }
3781 
3782 static int check_ptr_alignment(struct bpf_verifier_env *env,
3783 			       const struct bpf_reg_state *reg, int off,
3784 			       int size, bool strict_alignment_once)
3785 {
3786 	bool strict = env->strict_alignment || strict_alignment_once;
3787 	const char *pointer_desc = "";
3788 
3789 	switch (reg->type) {
3790 	case PTR_TO_PACKET:
3791 	case PTR_TO_PACKET_META:
3792 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
3793 		 * right in front, treat it the very same way.
3794 		 */
3795 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
3796 	case PTR_TO_FLOW_KEYS:
3797 		pointer_desc = "flow keys ";
3798 		break;
3799 	case PTR_TO_MAP_KEY:
3800 		pointer_desc = "key ";
3801 		break;
3802 	case PTR_TO_MAP_VALUE:
3803 		pointer_desc = "value ";
3804 		break;
3805 	case PTR_TO_CTX:
3806 		pointer_desc = "context ";
3807 		break;
3808 	case PTR_TO_STACK:
3809 		pointer_desc = "stack ";
3810 		/* The stack spill tracking logic in check_stack_write_fixed_off()
3811 		 * and check_stack_read_fixed_off() relies on stack accesses being
3812 		 * aligned.
3813 		 */
3814 		strict = true;
3815 		break;
3816 	case PTR_TO_SOCKET:
3817 		pointer_desc = "sock ";
3818 		break;
3819 	case PTR_TO_SOCK_COMMON:
3820 		pointer_desc = "sock_common ";
3821 		break;
3822 	case PTR_TO_TCP_SOCK:
3823 		pointer_desc = "tcp_sock ";
3824 		break;
3825 	case PTR_TO_XDP_SOCK:
3826 		pointer_desc = "xdp_sock ";
3827 		break;
3828 	default:
3829 		break;
3830 	}
3831 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3832 					   strict);
3833 }
3834 
3835 static int update_stack_depth(struct bpf_verifier_env *env,
3836 			      const struct bpf_func_state *func,
3837 			      int off)
3838 {
3839 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
3840 
3841 	if (stack >= -off)
3842 		return 0;
3843 
3844 	/* update known max for given subprogram */
3845 	env->subprog_info[func->subprogno].stack_depth = -off;
3846 	return 0;
3847 }
3848 
3849 /* starting from main bpf function walk all instructions of the function
3850  * and recursively walk all callees that given function can call.
3851  * Ignore jump and exit insns.
3852  * Since recursion is prevented by check_cfg() this algorithm
3853  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3854  */
3855 static int check_max_stack_depth(struct bpf_verifier_env *env)
3856 {
3857 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3858 	struct bpf_subprog_info *subprog = env->subprog_info;
3859 	struct bpf_insn *insn = env->prog->insnsi;
3860 	bool tail_call_reachable = false;
3861 	int ret_insn[MAX_CALL_FRAMES];
3862 	int ret_prog[MAX_CALL_FRAMES];
3863 	int j;
3864 
3865 process_func:
3866 	/* protect against potential stack overflow that might happen when
3867 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3868 	 * depth for such case down to 256 so that the worst case scenario
3869 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
3870 	 * 8k).
3871 	 *
3872 	 * To get the idea what might happen, see an example:
3873 	 * func1 -> sub rsp, 128
3874 	 *  subfunc1 -> sub rsp, 256
3875 	 *  tailcall1 -> add rsp, 256
3876 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3877 	 *   subfunc2 -> sub rsp, 64
3878 	 *   subfunc22 -> sub rsp, 128
3879 	 *   tailcall2 -> add rsp, 128
3880 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3881 	 *
3882 	 * tailcall will unwind the current stack frame but it will not get rid
3883 	 * of caller's stack as shown on the example above.
3884 	 */
3885 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
3886 		verbose(env,
3887 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3888 			depth);
3889 		return -EACCES;
3890 	}
3891 	/* round up to 32-bytes, since this is granularity
3892 	 * of interpreter stack size
3893 	 */
3894 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3895 	if (depth > MAX_BPF_STACK) {
3896 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
3897 			frame + 1, depth);
3898 		return -EACCES;
3899 	}
3900 continue_func:
3901 	subprog_end = subprog[idx + 1].start;
3902 	for (; i < subprog_end; i++) {
3903 		int next_insn;
3904 
3905 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
3906 			continue;
3907 		/* remember insn and function to return to */
3908 		ret_insn[frame] = i + 1;
3909 		ret_prog[frame] = idx;
3910 
3911 		/* find the callee */
3912 		next_insn = i + insn[i].imm + 1;
3913 		idx = find_subprog(env, next_insn);
3914 		if (idx < 0) {
3915 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3916 				  next_insn);
3917 			return -EFAULT;
3918 		}
3919 		if (subprog[idx].is_async_cb) {
3920 			if (subprog[idx].has_tail_call) {
3921 				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
3922 				return -EFAULT;
3923 			}
3924 			 /* async callbacks don't increase bpf prog stack size */
3925 			continue;
3926 		}
3927 		i = next_insn;
3928 
3929 		if (subprog[idx].has_tail_call)
3930 			tail_call_reachable = true;
3931 
3932 		frame++;
3933 		if (frame >= MAX_CALL_FRAMES) {
3934 			verbose(env, "the call stack of %d frames is too deep !\n",
3935 				frame);
3936 			return -E2BIG;
3937 		}
3938 		goto process_func;
3939 	}
3940 	/* if tail call got detected across bpf2bpf calls then mark each of the
3941 	 * currently present subprog frames as tail call reachable subprogs;
3942 	 * this info will be utilized by JIT so that we will be preserving the
3943 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
3944 	 */
3945 	if (tail_call_reachable)
3946 		for (j = 0; j < frame; j++)
3947 			subprog[ret_prog[j]].tail_call_reachable = true;
3948 	if (subprog[0].tail_call_reachable)
3949 		env->prog->aux->tail_call_reachable = true;
3950 
3951 	/* end of for() loop means the last insn of the 'subprog'
3952 	 * was reached. Doesn't matter whether it was JA or EXIT
3953 	 */
3954 	if (frame == 0)
3955 		return 0;
3956 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3957 	frame--;
3958 	i = ret_insn[frame];
3959 	idx = ret_prog[frame];
3960 	goto continue_func;
3961 }
3962 
3963 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3964 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3965 				  const struct bpf_insn *insn, int idx)
3966 {
3967 	int start = idx + insn->imm + 1, subprog;
3968 
3969 	subprog = find_subprog(env, start);
3970 	if (subprog < 0) {
3971 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3972 			  start);
3973 		return -EFAULT;
3974 	}
3975 	return env->subprog_info[subprog].stack_depth;
3976 }
3977 #endif
3978 
3979 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3980 			       const struct bpf_reg_state *reg, int regno,
3981 			       bool fixed_off_ok)
3982 {
3983 	/* Access to this pointer-typed register or passing it to a helper
3984 	 * is only allowed in its original, unmodified form.
3985 	 */
3986 
3987 	if (!fixed_off_ok && reg->off) {
3988 		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3989 			reg_type_str(env, reg->type), regno, reg->off);
3990 		return -EACCES;
3991 	}
3992 
3993 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3994 		char tn_buf[48];
3995 
3996 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3997 		verbose(env, "variable %s access var_off=%s disallowed\n",
3998 			reg_type_str(env, reg->type), tn_buf);
3999 		return -EACCES;
4000 	}
4001 
4002 	return 0;
4003 }
4004 
4005 int check_ptr_off_reg(struct bpf_verifier_env *env,
4006 		      const struct bpf_reg_state *reg, int regno)
4007 {
4008 	return __check_ptr_off_reg(env, reg, regno, false);
4009 }
4010 
4011 static int __check_buffer_access(struct bpf_verifier_env *env,
4012 				 const char *buf_info,
4013 				 const struct bpf_reg_state *reg,
4014 				 int regno, int off, int size)
4015 {
4016 	if (off < 0) {
4017 		verbose(env,
4018 			"R%d invalid %s buffer access: off=%d, size=%d\n",
4019 			regno, buf_info, off, size);
4020 		return -EACCES;
4021 	}
4022 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4023 		char tn_buf[48];
4024 
4025 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4026 		verbose(env,
4027 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4028 			regno, off, tn_buf);
4029 		return -EACCES;
4030 	}
4031 
4032 	return 0;
4033 }
4034 
4035 static int check_tp_buffer_access(struct bpf_verifier_env *env,
4036 				  const struct bpf_reg_state *reg,
4037 				  int regno, int off, int size)
4038 {
4039 	int err;
4040 
4041 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4042 	if (err)
4043 		return err;
4044 
4045 	if (off + size > env->prog->aux->max_tp_access)
4046 		env->prog->aux->max_tp_access = off + size;
4047 
4048 	return 0;
4049 }
4050 
4051 static int check_buffer_access(struct bpf_verifier_env *env,
4052 			       const struct bpf_reg_state *reg,
4053 			       int regno, int off, int size,
4054 			       bool zero_size_allowed,
4055 			       const char *buf_info,
4056 			       u32 *max_access)
4057 {
4058 	int err;
4059 
4060 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4061 	if (err)
4062 		return err;
4063 
4064 	if (off + size > *max_access)
4065 		*max_access = off + size;
4066 
4067 	return 0;
4068 }
4069 
4070 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4071 static void zext_32_to_64(struct bpf_reg_state *reg)
4072 {
4073 	reg->var_off = tnum_subreg(reg->var_off);
4074 	__reg_assign_32_into_64(reg);
4075 }
4076 
4077 /* truncate register to smaller size (in bytes)
4078  * must be called with size < BPF_REG_SIZE
4079  */
4080 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4081 {
4082 	u64 mask;
4083 
4084 	/* clear high bits in bit representation */
4085 	reg->var_off = tnum_cast(reg->var_off, size);
4086 
4087 	/* fix arithmetic bounds */
4088 	mask = ((u64)1 << (size * 8)) - 1;
4089 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4090 		reg->umin_value &= mask;
4091 		reg->umax_value &= mask;
4092 	} else {
4093 		reg->umin_value = 0;
4094 		reg->umax_value = mask;
4095 	}
4096 	reg->smin_value = reg->umin_value;
4097 	reg->smax_value = reg->umax_value;
4098 
4099 	/* If size is smaller than 32bit register the 32bit register
4100 	 * values are also truncated so we push 64-bit bounds into
4101 	 * 32-bit bounds. Above were truncated < 32-bits already.
4102 	 */
4103 	if (size >= 4)
4104 		return;
4105 	__reg_combine_64_into_32(reg);
4106 }
4107 
4108 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4109 {
4110 	/* A map is considered read-only if the following condition are true:
4111 	 *
4112 	 * 1) BPF program side cannot change any of the map content. The
4113 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4114 	 *    and was set at map creation time.
4115 	 * 2) The map value(s) have been initialized from user space by a
4116 	 *    loader and then "frozen", such that no new map update/delete
4117 	 *    operations from syscall side are possible for the rest of
4118 	 *    the map's lifetime from that point onwards.
4119 	 * 3) Any parallel/pending map update/delete operations from syscall
4120 	 *    side have been completed. Only after that point, it's safe to
4121 	 *    assume that map value(s) are immutable.
4122 	 */
4123 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
4124 	       READ_ONCE(map->frozen) &&
4125 	       !bpf_map_write_active(map);
4126 }
4127 
4128 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4129 {
4130 	void *ptr;
4131 	u64 addr;
4132 	int err;
4133 
4134 	err = map->ops->map_direct_value_addr(map, &addr, off);
4135 	if (err)
4136 		return err;
4137 	ptr = (void *)(long)addr + off;
4138 
4139 	switch (size) {
4140 	case sizeof(u8):
4141 		*val = (u64)*(u8 *)ptr;
4142 		break;
4143 	case sizeof(u16):
4144 		*val = (u64)*(u16 *)ptr;
4145 		break;
4146 	case sizeof(u32):
4147 		*val = (u64)*(u32 *)ptr;
4148 		break;
4149 	case sizeof(u64):
4150 		*val = *(u64 *)ptr;
4151 		break;
4152 	default:
4153 		return -EINVAL;
4154 	}
4155 	return 0;
4156 }
4157 
4158 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4159 				   struct bpf_reg_state *regs,
4160 				   int regno, int off, int size,
4161 				   enum bpf_access_type atype,
4162 				   int value_regno)
4163 {
4164 	struct bpf_reg_state *reg = regs + regno;
4165 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4166 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4167 	enum bpf_type_flag flag = 0;
4168 	u32 btf_id;
4169 	int ret;
4170 
4171 	if (off < 0) {
4172 		verbose(env,
4173 			"R%d is ptr_%s invalid negative access: off=%d\n",
4174 			regno, tname, off);
4175 		return -EACCES;
4176 	}
4177 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4178 		char tn_buf[48];
4179 
4180 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4181 		verbose(env,
4182 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4183 			regno, tname, off, tn_buf);
4184 		return -EACCES;
4185 	}
4186 
4187 	if (reg->type & MEM_USER) {
4188 		verbose(env,
4189 			"R%d is ptr_%s access user memory: off=%d\n",
4190 			regno, tname, off);
4191 		return -EACCES;
4192 	}
4193 
4194 	if (env->ops->btf_struct_access) {
4195 		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4196 						  off, size, atype, &btf_id, &flag);
4197 	} else {
4198 		if (atype != BPF_READ) {
4199 			verbose(env, "only read is supported\n");
4200 			return -EACCES;
4201 		}
4202 
4203 		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4204 					atype, &btf_id, &flag);
4205 	}
4206 
4207 	if (ret < 0)
4208 		return ret;
4209 
4210 	if (atype == BPF_READ && value_regno >= 0)
4211 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
4212 
4213 	return 0;
4214 }
4215 
4216 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4217 				   struct bpf_reg_state *regs,
4218 				   int regno, int off, int size,
4219 				   enum bpf_access_type atype,
4220 				   int value_regno)
4221 {
4222 	struct bpf_reg_state *reg = regs + regno;
4223 	struct bpf_map *map = reg->map_ptr;
4224 	enum bpf_type_flag flag = 0;
4225 	const struct btf_type *t;
4226 	const char *tname;
4227 	u32 btf_id;
4228 	int ret;
4229 
4230 	if (!btf_vmlinux) {
4231 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4232 		return -ENOTSUPP;
4233 	}
4234 
4235 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4236 		verbose(env, "map_ptr access not supported for map type %d\n",
4237 			map->map_type);
4238 		return -ENOTSUPP;
4239 	}
4240 
4241 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4242 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4243 
4244 	if (!env->allow_ptr_to_map_access) {
4245 		verbose(env,
4246 			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4247 			tname);
4248 		return -EPERM;
4249 	}
4250 
4251 	if (off < 0) {
4252 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
4253 			regno, tname, off);
4254 		return -EACCES;
4255 	}
4256 
4257 	if (atype != BPF_READ) {
4258 		verbose(env, "only read from %s is supported\n", tname);
4259 		return -EACCES;
4260 	}
4261 
4262 	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
4263 	if (ret < 0)
4264 		return ret;
4265 
4266 	if (value_regno >= 0)
4267 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
4268 
4269 	return 0;
4270 }
4271 
4272 /* Check that the stack access at the given offset is within bounds. The
4273  * maximum valid offset is -1.
4274  *
4275  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4276  * -state->allocated_stack for reads.
4277  */
4278 static int check_stack_slot_within_bounds(int off,
4279 					  struct bpf_func_state *state,
4280 					  enum bpf_access_type t)
4281 {
4282 	int min_valid_off;
4283 
4284 	if (t == BPF_WRITE)
4285 		min_valid_off = -MAX_BPF_STACK;
4286 	else
4287 		min_valid_off = -state->allocated_stack;
4288 
4289 	if (off < min_valid_off || off > -1)
4290 		return -EACCES;
4291 	return 0;
4292 }
4293 
4294 /* Check that the stack access at 'regno + off' falls within the maximum stack
4295  * bounds.
4296  *
4297  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4298  */
4299 static int check_stack_access_within_bounds(
4300 		struct bpf_verifier_env *env,
4301 		int regno, int off, int access_size,
4302 		enum stack_access_src src, enum bpf_access_type type)
4303 {
4304 	struct bpf_reg_state *regs = cur_regs(env);
4305 	struct bpf_reg_state *reg = regs + regno;
4306 	struct bpf_func_state *state = func(env, reg);
4307 	int min_off, max_off;
4308 	int err;
4309 	char *err_extra;
4310 
4311 	if (src == ACCESS_HELPER)
4312 		/* We don't know if helpers are reading or writing (or both). */
4313 		err_extra = " indirect access to";
4314 	else if (type == BPF_READ)
4315 		err_extra = " read from";
4316 	else
4317 		err_extra = " write to";
4318 
4319 	if (tnum_is_const(reg->var_off)) {
4320 		min_off = reg->var_off.value + off;
4321 		if (access_size > 0)
4322 			max_off = min_off + access_size - 1;
4323 		else
4324 			max_off = min_off;
4325 	} else {
4326 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4327 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
4328 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4329 				err_extra, regno);
4330 			return -EACCES;
4331 		}
4332 		min_off = reg->smin_value + off;
4333 		if (access_size > 0)
4334 			max_off = reg->smax_value + off + access_size - 1;
4335 		else
4336 			max_off = min_off;
4337 	}
4338 
4339 	err = check_stack_slot_within_bounds(min_off, state, type);
4340 	if (!err)
4341 		err = check_stack_slot_within_bounds(max_off, state, type);
4342 
4343 	if (err) {
4344 		if (tnum_is_const(reg->var_off)) {
4345 			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4346 				err_extra, regno, off, access_size);
4347 		} else {
4348 			char tn_buf[48];
4349 
4350 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4351 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4352 				err_extra, regno, tn_buf, access_size);
4353 		}
4354 	}
4355 	return err;
4356 }
4357 
4358 /* check whether memory at (regno + off) is accessible for t = (read | write)
4359  * if t==write, value_regno is a register which value is stored into memory
4360  * if t==read, value_regno is a register which will receive the value from memory
4361  * if t==write && value_regno==-1, some unknown value is stored into memory
4362  * if t==read && value_regno==-1, don't care what we read from memory
4363  */
4364 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4365 			    int off, int bpf_size, enum bpf_access_type t,
4366 			    int value_regno, bool strict_alignment_once)
4367 {
4368 	struct bpf_reg_state *regs = cur_regs(env);
4369 	struct bpf_reg_state *reg = regs + regno;
4370 	struct bpf_func_state *state;
4371 	int size, err = 0;
4372 
4373 	size = bpf_size_to_bytes(bpf_size);
4374 	if (size < 0)
4375 		return size;
4376 
4377 	/* alignment checks will add in reg->off themselves */
4378 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4379 	if (err)
4380 		return err;
4381 
4382 	/* for access checks, reg->off is just part of off */
4383 	off += reg->off;
4384 
4385 	if (reg->type == PTR_TO_MAP_KEY) {
4386 		if (t == BPF_WRITE) {
4387 			verbose(env, "write to change key R%d not allowed\n", regno);
4388 			return -EACCES;
4389 		}
4390 
4391 		err = check_mem_region_access(env, regno, off, size,
4392 					      reg->map_ptr->key_size, false);
4393 		if (err)
4394 			return err;
4395 		if (value_regno >= 0)
4396 			mark_reg_unknown(env, regs, value_regno);
4397 	} else if (reg->type == PTR_TO_MAP_VALUE) {
4398 		if (t == BPF_WRITE && value_regno >= 0 &&
4399 		    is_pointer_value(env, value_regno)) {
4400 			verbose(env, "R%d leaks addr into map\n", value_regno);
4401 			return -EACCES;
4402 		}
4403 		err = check_map_access_type(env, regno, off, size, t);
4404 		if (err)
4405 			return err;
4406 		err = check_map_access(env, regno, off, size, false);
4407 		if (!err && t == BPF_READ && value_regno >= 0) {
4408 			struct bpf_map *map = reg->map_ptr;
4409 
4410 			/* if map is read-only, track its contents as scalars */
4411 			if (tnum_is_const(reg->var_off) &&
4412 			    bpf_map_is_rdonly(map) &&
4413 			    map->ops->map_direct_value_addr) {
4414 				int map_off = off + reg->var_off.value;
4415 				u64 val = 0;
4416 
4417 				err = bpf_map_direct_read(map, map_off, size,
4418 							  &val);
4419 				if (err)
4420 					return err;
4421 
4422 				regs[value_regno].type = SCALAR_VALUE;
4423 				__mark_reg_known(&regs[value_regno], val);
4424 			} else {
4425 				mark_reg_unknown(env, regs, value_regno);
4426 			}
4427 		}
4428 	} else if (base_type(reg->type) == PTR_TO_MEM) {
4429 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
4430 
4431 		if (type_may_be_null(reg->type)) {
4432 			verbose(env, "R%d invalid mem access '%s'\n", regno,
4433 				reg_type_str(env, reg->type));
4434 			return -EACCES;
4435 		}
4436 
4437 		if (t == BPF_WRITE && rdonly_mem) {
4438 			verbose(env, "R%d cannot write into %s\n",
4439 				regno, reg_type_str(env, reg->type));
4440 			return -EACCES;
4441 		}
4442 
4443 		if (t == BPF_WRITE && value_regno >= 0 &&
4444 		    is_pointer_value(env, value_regno)) {
4445 			verbose(env, "R%d leaks addr into mem\n", value_regno);
4446 			return -EACCES;
4447 		}
4448 
4449 		err = check_mem_region_access(env, regno, off, size,
4450 					      reg->mem_size, false);
4451 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
4452 			mark_reg_unknown(env, regs, value_regno);
4453 	} else if (reg->type == PTR_TO_CTX) {
4454 		enum bpf_reg_type reg_type = SCALAR_VALUE;
4455 		struct btf *btf = NULL;
4456 		u32 btf_id = 0;
4457 
4458 		if (t == BPF_WRITE && value_regno >= 0 &&
4459 		    is_pointer_value(env, value_regno)) {
4460 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
4461 			return -EACCES;
4462 		}
4463 
4464 		err = check_ptr_off_reg(env, reg, regno);
4465 		if (err < 0)
4466 			return err;
4467 
4468 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
4469 				       &btf_id);
4470 		if (err)
4471 			verbose_linfo(env, insn_idx, "; ");
4472 		if (!err && t == BPF_READ && value_regno >= 0) {
4473 			/* ctx access returns either a scalar, or a
4474 			 * PTR_TO_PACKET[_META,_END]. In the latter
4475 			 * case, we know the offset is zero.
4476 			 */
4477 			if (reg_type == SCALAR_VALUE) {
4478 				mark_reg_unknown(env, regs, value_regno);
4479 			} else {
4480 				mark_reg_known_zero(env, regs,
4481 						    value_regno);
4482 				if (type_may_be_null(reg_type))
4483 					regs[value_regno].id = ++env->id_gen;
4484 				/* A load of ctx field could have different
4485 				 * actual load size with the one encoded in the
4486 				 * insn. When the dst is PTR, it is for sure not
4487 				 * a sub-register.
4488 				 */
4489 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4490 				if (base_type(reg_type) == PTR_TO_BTF_ID) {
4491 					regs[value_regno].btf = btf;
4492 					regs[value_regno].btf_id = btf_id;
4493 				}
4494 			}
4495 			regs[value_regno].type = reg_type;
4496 		}
4497 
4498 	} else if (reg->type == PTR_TO_STACK) {
4499 		/* Basic bounds checks. */
4500 		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4501 		if (err)
4502 			return err;
4503 
4504 		state = func(env, reg);
4505 		err = update_stack_depth(env, state, off);
4506 		if (err)
4507 			return err;
4508 
4509 		if (t == BPF_READ)
4510 			err = check_stack_read(env, regno, off, size,
4511 					       value_regno);
4512 		else
4513 			err = check_stack_write(env, regno, off, size,
4514 						value_regno, insn_idx);
4515 	} else if (reg_is_pkt_pointer(reg)) {
4516 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4517 			verbose(env, "cannot write into packet\n");
4518 			return -EACCES;
4519 		}
4520 		if (t == BPF_WRITE && value_regno >= 0 &&
4521 		    is_pointer_value(env, value_regno)) {
4522 			verbose(env, "R%d leaks addr into packet\n",
4523 				value_regno);
4524 			return -EACCES;
4525 		}
4526 		err = check_packet_access(env, regno, off, size, false);
4527 		if (!err && t == BPF_READ && value_regno >= 0)
4528 			mark_reg_unknown(env, regs, value_regno);
4529 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
4530 		if (t == BPF_WRITE && value_regno >= 0 &&
4531 		    is_pointer_value(env, value_regno)) {
4532 			verbose(env, "R%d leaks addr into flow keys\n",
4533 				value_regno);
4534 			return -EACCES;
4535 		}
4536 
4537 		err = check_flow_keys_access(env, off, size);
4538 		if (!err && t == BPF_READ && value_regno >= 0)
4539 			mark_reg_unknown(env, regs, value_regno);
4540 	} else if (type_is_sk_pointer(reg->type)) {
4541 		if (t == BPF_WRITE) {
4542 			verbose(env, "R%d cannot write into %s\n",
4543 				regno, reg_type_str(env, reg->type));
4544 			return -EACCES;
4545 		}
4546 		err = check_sock_access(env, insn_idx, regno, off, size, t);
4547 		if (!err && value_regno >= 0)
4548 			mark_reg_unknown(env, regs, value_regno);
4549 	} else if (reg->type == PTR_TO_TP_BUFFER) {
4550 		err = check_tp_buffer_access(env, reg, regno, off, size);
4551 		if (!err && t == BPF_READ && value_regno >= 0)
4552 			mark_reg_unknown(env, regs, value_regno);
4553 	} else if (reg->type == PTR_TO_BTF_ID) {
4554 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4555 					      value_regno);
4556 	} else if (reg->type == CONST_PTR_TO_MAP) {
4557 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4558 					      value_regno);
4559 	} else if (base_type(reg->type) == PTR_TO_BUF) {
4560 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
4561 		const char *buf_info;
4562 		u32 *max_access;
4563 
4564 		if (rdonly_mem) {
4565 			if (t == BPF_WRITE) {
4566 				verbose(env, "R%d cannot write into %s\n",
4567 					regno, reg_type_str(env, reg->type));
4568 				return -EACCES;
4569 			}
4570 			buf_info = "rdonly";
4571 			max_access = &env->prog->aux->max_rdonly_access;
4572 		} else {
4573 			buf_info = "rdwr";
4574 			max_access = &env->prog->aux->max_rdwr_access;
4575 		}
4576 
4577 		err = check_buffer_access(env, reg, regno, off, size, false,
4578 					  buf_info, max_access);
4579 
4580 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
4581 			mark_reg_unknown(env, regs, value_regno);
4582 	} else {
4583 		verbose(env, "R%d invalid mem access '%s'\n", regno,
4584 			reg_type_str(env, reg->type));
4585 		return -EACCES;
4586 	}
4587 
4588 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4589 	    regs[value_regno].type == SCALAR_VALUE) {
4590 		/* b/h/w load zero-extends, mark upper bits as known 0 */
4591 		coerce_reg_to_size(&regs[value_regno], size);
4592 	}
4593 	return err;
4594 }
4595 
4596 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4597 {
4598 	int load_reg;
4599 	int err;
4600 
4601 	switch (insn->imm) {
4602 	case BPF_ADD:
4603 	case BPF_ADD | BPF_FETCH:
4604 	case BPF_AND:
4605 	case BPF_AND | BPF_FETCH:
4606 	case BPF_OR:
4607 	case BPF_OR | BPF_FETCH:
4608 	case BPF_XOR:
4609 	case BPF_XOR | BPF_FETCH:
4610 	case BPF_XCHG:
4611 	case BPF_CMPXCHG:
4612 		break;
4613 	default:
4614 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4615 		return -EINVAL;
4616 	}
4617 
4618 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4619 		verbose(env, "invalid atomic operand size\n");
4620 		return -EINVAL;
4621 	}
4622 
4623 	/* check src1 operand */
4624 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
4625 	if (err)
4626 		return err;
4627 
4628 	/* check src2 operand */
4629 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4630 	if (err)
4631 		return err;
4632 
4633 	if (insn->imm == BPF_CMPXCHG) {
4634 		/* Check comparison of R0 with memory location */
4635 		const u32 aux_reg = BPF_REG_0;
4636 
4637 		err = check_reg_arg(env, aux_reg, SRC_OP);
4638 		if (err)
4639 			return err;
4640 
4641 		if (is_pointer_value(env, aux_reg)) {
4642 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
4643 			return -EACCES;
4644 		}
4645 	}
4646 
4647 	if (is_pointer_value(env, insn->src_reg)) {
4648 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4649 		return -EACCES;
4650 	}
4651 
4652 	if (is_ctx_reg(env, insn->dst_reg) ||
4653 	    is_pkt_reg(env, insn->dst_reg) ||
4654 	    is_flow_key_reg(env, insn->dst_reg) ||
4655 	    is_sk_reg(env, insn->dst_reg)) {
4656 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4657 			insn->dst_reg,
4658 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
4659 		return -EACCES;
4660 	}
4661 
4662 	if (insn->imm & BPF_FETCH) {
4663 		if (insn->imm == BPF_CMPXCHG)
4664 			load_reg = BPF_REG_0;
4665 		else
4666 			load_reg = insn->src_reg;
4667 
4668 		/* check and record load of old value */
4669 		err = check_reg_arg(env, load_reg, DST_OP);
4670 		if (err)
4671 			return err;
4672 	} else {
4673 		/* This instruction accesses a memory location but doesn't
4674 		 * actually load it into a register.
4675 		 */
4676 		load_reg = -1;
4677 	}
4678 
4679 	/* Check whether we can read the memory, with second call for fetch
4680 	 * case to simulate the register fill.
4681 	 */
4682 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4683 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
4684 	if (!err && load_reg >= 0)
4685 		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4686 				       BPF_SIZE(insn->code), BPF_READ, load_reg,
4687 				       true);
4688 	if (err)
4689 		return err;
4690 
4691 	/* Check whether we can write into the same memory. */
4692 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4693 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4694 	if (err)
4695 		return err;
4696 
4697 	return 0;
4698 }
4699 
4700 /* When register 'regno' is used to read the stack (either directly or through
4701  * a helper function) make sure that it's within stack boundary and, depending
4702  * on the access type, that all elements of the stack are initialized.
4703  *
4704  * 'off' includes 'regno->off', but not its dynamic part (if any).
4705  *
4706  * All registers that have been spilled on the stack in the slots within the
4707  * read offsets are marked as read.
4708  */
4709 static int check_stack_range_initialized(
4710 		struct bpf_verifier_env *env, int regno, int off,
4711 		int access_size, bool zero_size_allowed,
4712 		enum stack_access_src type, struct bpf_call_arg_meta *meta)
4713 {
4714 	struct bpf_reg_state *reg = reg_state(env, regno);
4715 	struct bpf_func_state *state = func(env, reg);
4716 	int err, min_off, max_off, i, j, slot, spi;
4717 	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4718 	enum bpf_access_type bounds_check_type;
4719 	/* Some accesses can write anything into the stack, others are
4720 	 * read-only.
4721 	 */
4722 	bool clobber = false;
4723 
4724 	if (access_size == 0 && !zero_size_allowed) {
4725 		verbose(env, "invalid zero-sized read\n");
4726 		return -EACCES;
4727 	}
4728 
4729 	if (type == ACCESS_HELPER) {
4730 		/* The bounds checks for writes are more permissive than for
4731 		 * reads. However, if raw_mode is not set, we'll do extra
4732 		 * checks below.
4733 		 */
4734 		bounds_check_type = BPF_WRITE;
4735 		clobber = true;
4736 	} else {
4737 		bounds_check_type = BPF_READ;
4738 	}
4739 	err = check_stack_access_within_bounds(env, regno, off, access_size,
4740 					       type, bounds_check_type);
4741 	if (err)
4742 		return err;
4743 
4744 
4745 	if (tnum_is_const(reg->var_off)) {
4746 		min_off = max_off = reg->var_off.value + off;
4747 	} else {
4748 		/* Variable offset is prohibited for unprivileged mode for
4749 		 * simplicity since it requires corresponding support in
4750 		 * Spectre masking for stack ALU.
4751 		 * See also retrieve_ptr_limit().
4752 		 */
4753 		if (!env->bypass_spec_v1) {
4754 			char tn_buf[48];
4755 
4756 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4757 			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4758 				regno, err_extra, tn_buf);
4759 			return -EACCES;
4760 		}
4761 		/* Only initialized buffer on stack is allowed to be accessed
4762 		 * with variable offset. With uninitialized buffer it's hard to
4763 		 * guarantee that whole memory is marked as initialized on
4764 		 * helper return since specific bounds are unknown what may
4765 		 * cause uninitialized stack leaking.
4766 		 */
4767 		if (meta && meta->raw_mode)
4768 			meta = NULL;
4769 
4770 		min_off = reg->smin_value + off;
4771 		max_off = reg->smax_value + off;
4772 	}
4773 
4774 	if (meta && meta->raw_mode) {
4775 		meta->access_size = access_size;
4776 		meta->regno = regno;
4777 		return 0;
4778 	}
4779 
4780 	for (i = min_off; i < max_off + access_size; i++) {
4781 		u8 *stype;
4782 
4783 		slot = -i - 1;
4784 		spi = slot / BPF_REG_SIZE;
4785 		if (state->allocated_stack <= slot)
4786 			goto err;
4787 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4788 		if (*stype == STACK_MISC)
4789 			goto mark;
4790 		if (*stype == STACK_ZERO) {
4791 			if (clobber) {
4792 				/* helper can write anything into the stack */
4793 				*stype = STACK_MISC;
4794 			}
4795 			goto mark;
4796 		}
4797 
4798 		if (is_spilled_reg(&state->stack[spi]) &&
4799 		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4800 			goto mark;
4801 
4802 		if (is_spilled_reg(&state->stack[spi]) &&
4803 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4804 		     env->allow_ptr_leaks)) {
4805 			if (clobber) {
4806 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4807 				for (j = 0; j < BPF_REG_SIZE; j++)
4808 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
4809 			}
4810 			goto mark;
4811 		}
4812 
4813 err:
4814 		if (tnum_is_const(reg->var_off)) {
4815 			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4816 				err_extra, regno, min_off, i - min_off, access_size);
4817 		} else {
4818 			char tn_buf[48];
4819 
4820 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4821 			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4822 				err_extra, regno, tn_buf, i - min_off, access_size);
4823 		}
4824 		return -EACCES;
4825 mark:
4826 		/* reading any byte out of 8-byte 'spill_slot' will cause
4827 		 * the whole slot to be marked as 'read'
4828 		 */
4829 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
4830 			      state->stack[spi].spilled_ptr.parent,
4831 			      REG_LIVE_READ64);
4832 	}
4833 	return update_stack_depth(env, state, min_off);
4834 }
4835 
4836 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4837 				   int access_size, bool zero_size_allowed,
4838 				   struct bpf_call_arg_meta *meta)
4839 {
4840 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4841 	const char *buf_info;
4842 	u32 *max_access;
4843 
4844 	switch (base_type(reg->type)) {
4845 	case PTR_TO_PACKET:
4846 	case PTR_TO_PACKET_META:
4847 		return check_packet_access(env, regno, reg->off, access_size,
4848 					   zero_size_allowed);
4849 	case PTR_TO_MAP_KEY:
4850 		return check_mem_region_access(env, regno, reg->off, access_size,
4851 					       reg->map_ptr->key_size, false);
4852 	case PTR_TO_MAP_VALUE:
4853 		if (check_map_access_type(env, regno, reg->off, access_size,
4854 					  meta && meta->raw_mode ? BPF_WRITE :
4855 					  BPF_READ))
4856 			return -EACCES;
4857 		return check_map_access(env, regno, reg->off, access_size,
4858 					zero_size_allowed);
4859 	case PTR_TO_MEM:
4860 		return check_mem_region_access(env, regno, reg->off,
4861 					       access_size, reg->mem_size,
4862 					       zero_size_allowed);
4863 	case PTR_TO_BUF:
4864 		if (type_is_rdonly_mem(reg->type)) {
4865 			if (meta && meta->raw_mode)
4866 				return -EACCES;
4867 
4868 			buf_info = "rdonly";
4869 			max_access = &env->prog->aux->max_rdonly_access;
4870 		} else {
4871 			buf_info = "rdwr";
4872 			max_access = &env->prog->aux->max_rdwr_access;
4873 		}
4874 		return check_buffer_access(env, reg, regno, reg->off,
4875 					   access_size, zero_size_allowed,
4876 					   buf_info, max_access);
4877 	case PTR_TO_STACK:
4878 		return check_stack_range_initialized(
4879 				env,
4880 				regno, reg->off, access_size,
4881 				zero_size_allowed, ACCESS_HELPER, meta);
4882 	default: /* scalar_value or invalid ptr */
4883 		/* Allow zero-byte read from NULL, regardless of pointer type */
4884 		if (zero_size_allowed && access_size == 0 &&
4885 		    register_is_null(reg))
4886 			return 0;
4887 
4888 		verbose(env, "R%d type=%s ", regno,
4889 			reg_type_str(env, reg->type));
4890 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
4891 		return -EACCES;
4892 	}
4893 }
4894 
4895 static int check_mem_size_reg(struct bpf_verifier_env *env,
4896 			      struct bpf_reg_state *reg, u32 regno,
4897 			      bool zero_size_allowed,
4898 			      struct bpf_call_arg_meta *meta)
4899 {
4900 	int err;
4901 
4902 	/* This is used to refine r0 return value bounds for helpers
4903 	 * that enforce this value as an upper bound on return values.
4904 	 * See do_refine_retval_range() for helpers that can refine
4905 	 * the return value. C type of helper is u32 so we pull register
4906 	 * bound from umax_value however, if negative verifier errors
4907 	 * out. Only upper bounds can be learned because retval is an
4908 	 * int type and negative retvals are allowed.
4909 	 */
4910 	if (meta)
4911 		meta->msize_max_value = reg->umax_value;
4912 
4913 	/* The register is SCALAR_VALUE; the access check
4914 	 * happens using its boundaries.
4915 	 */
4916 	if (!tnum_is_const(reg->var_off))
4917 		/* For unprivileged variable accesses, disable raw
4918 		 * mode so that the program is required to
4919 		 * initialize all the memory that the helper could
4920 		 * just partially fill up.
4921 		 */
4922 		meta = NULL;
4923 
4924 	if (reg->smin_value < 0) {
4925 		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4926 			regno);
4927 		return -EACCES;
4928 	}
4929 
4930 	if (reg->umin_value == 0) {
4931 		err = check_helper_mem_access(env, regno - 1, 0,
4932 					      zero_size_allowed,
4933 					      meta);
4934 		if (err)
4935 			return err;
4936 	}
4937 
4938 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4939 		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4940 			regno);
4941 		return -EACCES;
4942 	}
4943 	err = check_helper_mem_access(env, regno - 1,
4944 				      reg->umax_value,
4945 				      zero_size_allowed, meta);
4946 	if (!err)
4947 		err = mark_chain_precision(env, regno);
4948 	return err;
4949 }
4950 
4951 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4952 		   u32 regno, u32 mem_size)
4953 {
4954 	if (register_is_null(reg))
4955 		return 0;
4956 
4957 	if (type_may_be_null(reg->type)) {
4958 		/* Assuming that the register contains a value check if the memory
4959 		 * access is safe. Temporarily save and restore the register's state as
4960 		 * the conversion shouldn't be visible to a caller.
4961 		 */
4962 		const struct bpf_reg_state saved_reg = *reg;
4963 		int rv;
4964 
4965 		mark_ptr_not_null_reg(reg);
4966 		rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
4967 		*reg = saved_reg;
4968 		return rv;
4969 	}
4970 
4971 	return check_helper_mem_access(env, regno, mem_size, true, NULL);
4972 }
4973 
4974 int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4975 			     u32 regno)
4976 {
4977 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
4978 	bool may_be_null = type_may_be_null(mem_reg->type);
4979 	struct bpf_reg_state saved_reg;
4980 	int err;
4981 
4982 	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
4983 
4984 	if (may_be_null) {
4985 		saved_reg = *mem_reg;
4986 		mark_ptr_not_null_reg(mem_reg);
4987 	}
4988 
4989 	err = check_mem_size_reg(env, reg, regno, true, NULL);
4990 
4991 	if (may_be_null)
4992 		*mem_reg = saved_reg;
4993 	return err;
4994 }
4995 
4996 /* Implementation details:
4997  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4998  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4999  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
5000  * value_or_null->value transition, since the verifier only cares about
5001  * the range of access to valid map value pointer and doesn't care about actual
5002  * address of the map element.
5003  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
5004  * reg->id > 0 after value_or_null->value transition. By doing so
5005  * two bpf_map_lookups will be considered two different pointers that
5006  * point to different bpf_spin_locks.
5007  * The verifier allows taking only one bpf_spin_lock at a time to avoid
5008  * dead-locks.
5009  * Since only one bpf_spin_lock is allowed the checks are simpler than
5010  * reg_is_refcounted() logic. The verifier needs to remember only
5011  * one spin_lock instead of array of acquired_refs.
5012  * cur_state->active_spin_lock remembers which map value element got locked
5013  * and clears it after bpf_spin_unlock.
5014  */
5015 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
5016 			     bool is_lock)
5017 {
5018 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5019 	struct bpf_verifier_state *cur = env->cur_state;
5020 	bool is_const = tnum_is_const(reg->var_off);
5021 	struct bpf_map *map = reg->map_ptr;
5022 	u64 val = reg->var_off.value;
5023 
5024 	if (!is_const) {
5025 		verbose(env,
5026 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5027 			regno);
5028 		return -EINVAL;
5029 	}
5030 	if (!map->btf) {
5031 		verbose(env,
5032 			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
5033 			map->name);
5034 		return -EINVAL;
5035 	}
5036 	if (!map_value_has_spin_lock(map)) {
5037 		if (map->spin_lock_off == -E2BIG)
5038 			verbose(env,
5039 				"map '%s' has more than one 'struct bpf_spin_lock'\n",
5040 				map->name);
5041 		else if (map->spin_lock_off == -ENOENT)
5042 			verbose(env,
5043 				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
5044 				map->name);
5045 		else
5046 			verbose(env,
5047 				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
5048 				map->name);
5049 		return -EINVAL;
5050 	}
5051 	if (map->spin_lock_off != val + reg->off) {
5052 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
5053 			val + reg->off);
5054 		return -EINVAL;
5055 	}
5056 	if (is_lock) {
5057 		if (cur->active_spin_lock) {
5058 			verbose(env,
5059 				"Locking two bpf_spin_locks are not allowed\n");
5060 			return -EINVAL;
5061 		}
5062 		cur->active_spin_lock = reg->id;
5063 	} else {
5064 		if (!cur->active_spin_lock) {
5065 			verbose(env, "bpf_spin_unlock without taking a lock\n");
5066 			return -EINVAL;
5067 		}
5068 		if (cur->active_spin_lock != reg->id) {
5069 			verbose(env, "bpf_spin_unlock of different lock\n");
5070 			return -EINVAL;
5071 		}
5072 		cur->active_spin_lock = 0;
5073 	}
5074 	return 0;
5075 }
5076 
5077 static int process_timer_func(struct bpf_verifier_env *env, int regno,
5078 			      struct bpf_call_arg_meta *meta)
5079 {
5080 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5081 	bool is_const = tnum_is_const(reg->var_off);
5082 	struct bpf_map *map = reg->map_ptr;
5083 	u64 val = reg->var_off.value;
5084 
5085 	if (!is_const) {
5086 		verbose(env,
5087 			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5088 			regno);
5089 		return -EINVAL;
5090 	}
5091 	if (!map->btf) {
5092 		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5093 			map->name);
5094 		return -EINVAL;
5095 	}
5096 	if (!map_value_has_timer(map)) {
5097 		if (map->timer_off == -E2BIG)
5098 			verbose(env,
5099 				"map '%s' has more than one 'struct bpf_timer'\n",
5100 				map->name);
5101 		else if (map->timer_off == -ENOENT)
5102 			verbose(env,
5103 				"map '%s' doesn't have 'struct bpf_timer'\n",
5104 				map->name);
5105 		else
5106 			verbose(env,
5107 				"map '%s' is not a struct type or bpf_timer is mangled\n",
5108 				map->name);
5109 		return -EINVAL;
5110 	}
5111 	if (map->timer_off != val + reg->off) {
5112 		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5113 			val + reg->off, map->timer_off);
5114 		return -EINVAL;
5115 	}
5116 	if (meta->map_ptr) {
5117 		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5118 		return -EFAULT;
5119 	}
5120 	meta->map_uid = reg->map_uid;
5121 	meta->map_ptr = map;
5122 	return 0;
5123 }
5124 
5125 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
5126 {
5127 	return base_type(type) == ARG_PTR_TO_MEM ||
5128 	       base_type(type) == ARG_PTR_TO_UNINIT_MEM;
5129 }
5130 
5131 static bool arg_type_is_mem_size(enum bpf_arg_type type)
5132 {
5133 	return type == ARG_CONST_SIZE ||
5134 	       type == ARG_CONST_SIZE_OR_ZERO;
5135 }
5136 
5137 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
5138 {
5139 	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
5140 }
5141 
5142 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
5143 {
5144 	return type == ARG_PTR_TO_INT ||
5145 	       type == ARG_PTR_TO_LONG;
5146 }
5147 
5148 static int int_ptr_type_to_size(enum bpf_arg_type type)
5149 {
5150 	if (type == ARG_PTR_TO_INT)
5151 		return sizeof(u32);
5152 	else if (type == ARG_PTR_TO_LONG)
5153 		return sizeof(u64);
5154 
5155 	return -EINVAL;
5156 }
5157 
5158 static int resolve_map_arg_type(struct bpf_verifier_env *env,
5159 				 const struct bpf_call_arg_meta *meta,
5160 				 enum bpf_arg_type *arg_type)
5161 {
5162 	if (!meta->map_ptr) {
5163 		/* kernel subsystem misconfigured verifier */
5164 		verbose(env, "invalid map_ptr to access map->type\n");
5165 		return -EACCES;
5166 	}
5167 
5168 	switch (meta->map_ptr->map_type) {
5169 	case BPF_MAP_TYPE_SOCKMAP:
5170 	case BPF_MAP_TYPE_SOCKHASH:
5171 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
5172 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5173 		} else {
5174 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
5175 			return -EINVAL;
5176 		}
5177 		break;
5178 	case BPF_MAP_TYPE_BLOOM_FILTER:
5179 		if (meta->func_id == BPF_FUNC_map_peek_elem)
5180 			*arg_type = ARG_PTR_TO_MAP_VALUE;
5181 		break;
5182 	default:
5183 		break;
5184 	}
5185 	return 0;
5186 }
5187 
5188 struct bpf_reg_types {
5189 	const enum bpf_reg_type types[10];
5190 	u32 *btf_id;
5191 };
5192 
5193 static const struct bpf_reg_types map_key_value_types = {
5194 	.types = {
5195 		PTR_TO_STACK,
5196 		PTR_TO_PACKET,
5197 		PTR_TO_PACKET_META,
5198 		PTR_TO_MAP_KEY,
5199 		PTR_TO_MAP_VALUE,
5200 	},
5201 };
5202 
5203 static const struct bpf_reg_types sock_types = {
5204 	.types = {
5205 		PTR_TO_SOCK_COMMON,
5206 		PTR_TO_SOCKET,
5207 		PTR_TO_TCP_SOCK,
5208 		PTR_TO_XDP_SOCK,
5209 	},
5210 };
5211 
5212 #ifdef CONFIG_NET
5213 static const struct bpf_reg_types btf_id_sock_common_types = {
5214 	.types = {
5215 		PTR_TO_SOCK_COMMON,
5216 		PTR_TO_SOCKET,
5217 		PTR_TO_TCP_SOCK,
5218 		PTR_TO_XDP_SOCK,
5219 		PTR_TO_BTF_ID,
5220 	},
5221 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5222 };
5223 #endif
5224 
5225 static const struct bpf_reg_types mem_types = {
5226 	.types = {
5227 		PTR_TO_STACK,
5228 		PTR_TO_PACKET,
5229 		PTR_TO_PACKET_META,
5230 		PTR_TO_MAP_KEY,
5231 		PTR_TO_MAP_VALUE,
5232 		PTR_TO_MEM,
5233 		PTR_TO_MEM | MEM_ALLOC,
5234 		PTR_TO_BUF,
5235 	},
5236 };
5237 
5238 static const struct bpf_reg_types int_ptr_types = {
5239 	.types = {
5240 		PTR_TO_STACK,
5241 		PTR_TO_PACKET,
5242 		PTR_TO_PACKET_META,
5243 		PTR_TO_MAP_KEY,
5244 		PTR_TO_MAP_VALUE,
5245 	},
5246 };
5247 
5248 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5249 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5250 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5251 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
5252 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5253 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5254 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5255 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
5256 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5257 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5258 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5259 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5260 
5261 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5262 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
5263 	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
5264 	[ARG_PTR_TO_UNINIT_MAP_VALUE]	= &map_key_value_types,
5265 	[ARG_CONST_SIZE]		= &scalar_types,
5266 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
5267 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
5268 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
5269 	[ARG_PTR_TO_CTX]		= &context_types,
5270 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
5271 #ifdef CONFIG_NET
5272 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
5273 #endif
5274 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
5275 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
5276 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
5277 	[ARG_PTR_TO_MEM]		= &mem_types,
5278 	[ARG_PTR_TO_UNINIT_MEM]		= &mem_types,
5279 	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
5280 	[ARG_PTR_TO_INT]		= &int_ptr_types,
5281 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
5282 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
5283 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
5284 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
5285 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
5286 	[ARG_PTR_TO_TIMER]		= &timer_types,
5287 };
5288 
5289 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5290 			  enum bpf_arg_type arg_type,
5291 			  const u32 *arg_btf_id)
5292 {
5293 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5294 	enum bpf_reg_type expected, type = reg->type;
5295 	const struct bpf_reg_types *compatible;
5296 	int i, j;
5297 
5298 	compatible = compatible_reg_types[base_type(arg_type)];
5299 	if (!compatible) {
5300 		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5301 		return -EFAULT;
5302 	}
5303 
5304 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
5305 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
5306 	 *
5307 	 * Same for MAYBE_NULL:
5308 	 *
5309 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
5310 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
5311 	 *
5312 	 * Therefore we fold these flags depending on the arg_type before comparison.
5313 	 */
5314 	if (arg_type & MEM_RDONLY)
5315 		type &= ~MEM_RDONLY;
5316 	if (arg_type & PTR_MAYBE_NULL)
5317 		type &= ~PTR_MAYBE_NULL;
5318 
5319 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5320 		expected = compatible->types[i];
5321 		if (expected == NOT_INIT)
5322 			break;
5323 
5324 		if (type == expected)
5325 			goto found;
5326 	}
5327 
5328 	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
5329 	for (j = 0; j + 1 < i; j++)
5330 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
5331 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
5332 	return -EACCES;
5333 
5334 found:
5335 	if (reg->type == PTR_TO_BTF_ID) {
5336 		if (!arg_btf_id) {
5337 			if (!compatible->btf_id) {
5338 				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5339 				return -EFAULT;
5340 			}
5341 			arg_btf_id = compatible->btf_id;
5342 		}
5343 
5344 		if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5345 					  btf_vmlinux, *arg_btf_id)) {
5346 			verbose(env, "R%d is of type %s but %s is expected\n",
5347 				regno, kernel_type_name(reg->btf, reg->btf_id),
5348 				kernel_type_name(btf_vmlinux, *arg_btf_id));
5349 			return -EACCES;
5350 		}
5351 	}
5352 
5353 	return 0;
5354 }
5355 
5356 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5357 			  struct bpf_call_arg_meta *meta,
5358 			  const struct bpf_func_proto *fn)
5359 {
5360 	u32 regno = BPF_REG_1 + arg;
5361 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5362 	enum bpf_arg_type arg_type = fn->arg_type[arg];
5363 	enum bpf_reg_type type = reg->type;
5364 	int err = 0;
5365 
5366 	if (arg_type == ARG_DONTCARE)
5367 		return 0;
5368 
5369 	err = check_reg_arg(env, regno, SRC_OP);
5370 	if (err)
5371 		return err;
5372 
5373 	if (arg_type == ARG_ANYTHING) {
5374 		if (is_pointer_value(env, regno)) {
5375 			verbose(env, "R%d leaks addr into helper function\n",
5376 				regno);
5377 			return -EACCES;
5378 		}
5379 		return 0;
5380 	}
5381 
5382 	if (type_is_pkt_pointer(type) &&
5383 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5384 		verbose(env, "helper access to the packet is not allowed\n");
5385 		return -EACCES;
5386 	}
5387 
5388 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5389 	    base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5390 		err = resolve_map_arg_type(env, meta, &arg_type);
5391 		if (err)
5392 			return err;
5393 	}
5394 
5395 	if (register_is_null(reg) && type_may_be_null(arg_type))
5396 		/* A NULL register has a SCALAR_VALUE type, so skip
5397 		 * type checking.
5398 		 */
5399 		goto skip_type_check;
5400 
5401 	err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5402 	if (err)
5403 		return err;
5404 
5405 	switch ((u32)type) {
5406 	case SCALAR_VALUE:
5407 	/* Pointer types where reg offset is explicitly allowed: */
5408 	case PTR_TO_PACKET:
5409 	case PTR_TO_PACKET_META:
5410 	case PTR_TO_MAP_KEY:
5411 	case PTR_TO_MAP_VALUE:
5412 	case PTR_TO_MEM:
5413 	case PTR_TO_MEM | MEM_RDONLY:
5414 	case PTR_TO_MEM | MEM_ALLOC:
5415 	case PTR_TO_BUF:
5416 	case PTR_TO_BUF | MEM_RDONLY:
5417 	case PTR_TO_STACK:
5418 		/* Some of the argument types nevertheless require a
5419 		 * zero register offset.
5420 		 */
5421 		if (arg_type == ARG_PTR_TO_ALLOC_MEM)
5422 			goto force_off_check;
5423 		break;
5424 	/* All the rest must be rejected: */
5425 	default:
5426 force_off_check:
5427 		err = __check_ptr_off_reg(env, reg, regno,
5428 					  type == PTR_TO_BTF_ID);
5429 		if (err < 0)
5430 			return err;
5431 		break;
5432 	}
5433 
5434 skip_type_check:
5435 	if (reg->ref_obj_id) {
5436 		if (meta->ref_obj_id) {
5437 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5438 				regno, reg->ref_obj_id,
5439 				meta->ref_obj_id);
5440 			return -EFAULT;
5441 		}
5442 		meta->ref_obj_id = reg->ref_obj_id;
5443 	}
5444 
5445 	if (arg_type == ARG_CONST_MAP_PTR) {
5446 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5447 		if (meta->map_ptr) {
5448 			/* Use map_uid (which is unique id of inner map) to reject:
5449 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5450 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5451 			 * if (inner_map1 && inner_map2) {
5452 			 *     timer = bpf_map_lookup_elem(inner_map1);
5453 			 *     if (timer)
5454 			 *         // mismatch would have been allowed
5455 			 *         bpf_timer_init(timer, inner_map2);
5456 			 * }
5457 			 *
5458 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
5459 			 */
5460 			if (meta->map_ptr != reg->map_ptr ||
5461 			    meta->map_uid != reg->map_uid) {
5462 				verbose(env,
5463 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5464 					meta->map_uid, reg->map_uid);
5465 				return -EINVAL;
5466 			}
5467 		}
5468 		meta->map_ptr = reg->map_ptr;
5469 		meta->map_uid = reg->map_uid;
5470 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5471 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
5472 		 * check that [key, key + map->key_size) are within
5473 		 * stack limits and initialized
5474 		 */
5475 		if (!meta->map_ptr) {
5476 			/* in function declaration map_ptr must come before
5477 			 * map_key, so that it's verified and known before
5478 			 * we have to check map_key here. Otherwise it means
5479 			 * that kernel subsystem misconfigured verifier
5480 			 */
5481 			verbose(env, "invalid map_ptr to access map->key\n");
5482 			return -EACCES;
5483 		}
5484 		err = check_helper_mem_access(env, regno,
5485 					      meta->map_ptr->key_size, false,
5486 					      NULL);
5487 	} else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5488 		   base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5489 		if (type_may_be_null(arg_type) && register_is_null(reg))
5490 			return 0;
5491 
5492 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
5493 		 * check [value, value + map->value_size) validity
5494 		 */
5495 		if (!meta->map_ptr) {
5496 			/* kernel subsystem misconfigured verifier */
5497 			verbose(env, "invalid map_ptr to access map->value\n");
5498 			return -EACCES;
5499 		}
5500 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
5501 		err = check_helper_mem_access(env, regno,
5502 					      meta->map_ptr->value_size, false,
5503 					      meta);
5504 	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5505 		if (!reg->btf_id) {
5506 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5507 			return -EACCES;
5508 		}
5509 		meta->ret_btf = reg->btf;
5510 		meta->ret_btf_id = reg->btf_id;
5511 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5512 		if (meta->func_id == BPF_FUNC_spin_lock) {
5513 			if (process_spin_lock(env, regno, true))
5514 				return -EACCES;
5515 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
5516 			if (process_spin_lock(env, regno, false))
5517 				return -EACCES;
5518 		} else {
5519 			verbose(env, "verifier internal error\n");
5520 			return -EFAULT;
5521 		}
5522 	} else if (arg_type == ARG_PTR_TO_TIMER) {
5523 		if (process_timer_func(env, regno, meta))
5524 			return -EACCES;
5525 	} else if (arg_type == ARG_PTR_TO_FUNC) {
5526 		meta->subprogno = reg->subprogno;
5527 	} else if (arg_type_is_mem_ptr(arg_type)) {
5528 		/* The access to this pointer is only checked when we hit the
5529 		 * next is_mem_size argument below.
5530 		 */
5531 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
5532 	} else if (arg_type_is_mem_size(arg_type)) {
5533 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
5534 
5535 		err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
5536 	} else if (arg_type_is_alloc_size(arg_type)) {
5537 		if (!tnum_is_const(reg->var_off)) {
5538 			verbose(env, "R%d is not a known constant'\n",
5539 				regno);
5540 			return -EACCES;
5541 		}
5542 		meta->mem_size = reg->var_off.value;
5543 	} else if (arg_type_is_int_ptr(arg_type)) {
5544 		int size = int_ptr_type_to_size(arg_type);
5545 
5546 		err = check_helper_mem_access(env, regno, size, false, meta);
5547 		if (err)
5548 			return err;
5549 		err = check_ptr_alignment(env, reg, 0, size, true);
5550 	} else if (arg_type == ARG_PTR_TO_CONST_STR) {
5551 		struct bpf_map *map = reg->map_ptr;
5552 		int map_off;
5553 		u64 map_addr;
5554 		char *str_ptr;
5555 
5556 		if (!bpf_map_is_rdonly(map)) {
5557 			verbose(env, "R%d does not point to a readonly map'\n", regno);
5558 			return -EACCES;
5559 		}
5560 
5561 		if (!tnum_is_const(reg->var_off)) {
5562 			verbose(env, "R%d is not a constant address'\n", regno);
5563 			return -EACCES;
5564 		}
5565 
5566 		if (!map->ops->map_direct_value_addr) {
5567 			verbose(env, "no direct value access support for this map type\n");
5568 			return -EACCES;
5569 		}
5570 
5571 		err = check_map_access(env, regno, reg->off,
5572 				       map->value_size - reg->off, false);
5573 		if (err)
5574 			return err;
5575 
5576 		map_off = reg->off + reg->var_off.value;
5577 		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5578 		if (err) {
5579 			verbose(env, "direct value access on string failed\n");
5580 			return err;
5581 		}
5582 
5583 		str_ptr = (char *)(long)(map_addr);
5584 		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5585 			verbose(env, "string is not zero-terminated\n");
5586 			return -EINVAL;
5587 		}
5588 	}
5589 
5590 	return err;
5591 }
5592 
5593 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5594 {
5595 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
5596 	enum bpf_prog_type type = resolve_prog_type(env->prog);
5597 
5598 	if (func_id != BPF_FUNC_map_update_elem)
5599 		return false;
5600 
5601 	/* It's not possible to get access to a locked struct sock in these
5602 	 * contexts, so updating is safe.
5603 	 */
5604 	switch (type) {
5605 	case BPF_PROG_TYPE_TRACING:
5606 		if (eatype == BPF_TRACE_ITER)
5607 			return true;
5608 		break;
5609 	case BPF_PROG_TYPE_SOCKET_FILTER:
5610 	case BPF_PROG_TYPE_SCHED_CLS:
5611 	case BPF_PROG_TYPE_SCHED_ACT:
5612 	case BPF_PROG_TYPE_XDP:
5613 	case BPF_PROG_TYPE_SK_REUSEPORT:
5614 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5615 	case BPF_PROG_TYPE_SK_LOOKUP:
5616 		return true;
5617 	default:
5618 		break;
5619 	}
5620 
5621 	verbose(env, "cannot update sockmap in this context\n");
5622 	return false;
5623 }
5624 
5625 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5626 {
5627 	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5628 }
5629 
5630 static int check_map_func_compatibility(struct bpf_verifier_env *env,
5631 					struct bpf_map *map, int func_id)
5632 {
5633 	if (!map)
5634 		return 0;
5635 
5636 	/* We need a two way check, first is from map perspective ... */
5637 	switch (map->map_type) {
5638 	case BPF_MAP_TYPE_PROG_ARRAY:
5639 		if (func_id != BPF_FUNC_tail_call)
5640 			goto error;
5641 		break;
5642 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5643 		if (func_id != BPF_FUNC_perf_event_read &&
5644 		    func_id != BPF_FUNC_perf_event_output &&
5645 		    func_id != BPF_FUNC_skb_output &&
5646 		    func_id != BPF_FUNC_perf_event_read_value &&
5647 		    func_id != BPF_FUNC_xdp_output)
5648 			goto error;
5649 		break;
5650 	case BPF_MAP_TYPE_RINGBUF:
5651 		if (func_id != BPF_FUNC_ringbuf_output &&
5652 		    func_id != BPF_FUNC_ringbuf_reserve &&
5653 		    func_id != BPF_FUNC_ringbuf_query)
5654 			goto error;
5655 		break;
5656 	case BPF_MAP_TYPE_STACK_TRACE:
5657 		if (func_id != BPF_FUNC_get_stackid)
5658 			goto error;
5659 		break;
5660 	case BPF_MAP_TYPE_CGROUP_ARRAY:
5661 		if (func_id != BPF_FUNC_skb_under_cgroup &&
5662 		    func_id != BPF_FUNC_current_task_under_cgroup)
5663 			goto error;
5664 		break;
5665 	case BPF_MAP_TYPE_CGROUP_STORAGE:
5666 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
5667 		if (func_id != BPF_FUNC_get_local_storage)
5668 			goto error;
5669 		break;
5670 	case BPF_MAP_TYPE_DEVMAP:
5671 	case BPF_MAP_TYPE_DEVMAP_HASH:
5672 		if (func_id != BPF_FUNC_redirect_map &&
5673 		    func_id != BPF_FUNC_map_lookup_elem)
5674 			goto error;
5675 		break;
5676 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
5677 	 * appear.
5678 	 */
5679 	case BPF_MAP_TYPE_CPUMAP:
5680 		if (func_id != BPF_FUNC_redirect_map)
5681 			goto error;
5682 		break;
5683 	case BPF_MAP_TYPE_XSKMAP:
5684 		if (func_id != BPF_FUNC_redirect_map &&
5685 		    func_id != BPF_FUNC_map_lookup_elem)
5686 			goto error;
5687 		break;
5688 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5689 	case BPF_MAP_TYPE_HASH_OF_MAPS:
5690 		if (func_id != BPF_FUNC_map_lookup_elem)
5691 			goto error;
5692 		break;
5693 	case BPF_MAP_TYPE_SOCKMAP:
5694 		if (func_id != BPF_FUNC_sk_redirect_map &&
5695 		    func_id != BPF_FUNC_sock_map_update &&
5696 		    func_id != BPF_FUNC_map_delete_elem &&
5697 		    func_id != BPF_FUNC_msg_redirect_map &&
5698 		    func_id != BPF_FUNC_sk_select_reuseport &&
5699 		    func_id != BPF_FUNC_map_lookup_elem &&
5700 		    !may_update_sockmap(env, func_id))
5701 			goto error;
5702 		break;
5703 	case BPF_MAP_TYPE_SOCKHASH:
5704 		if (func_id != BPF_FUNC_sk_redirect_hash &&
5705 		    func_id != BPF_FUNC_sock_hash_update &&
5706 		    func_id != BPF_FUNC_map_delete_elem &&
5707 		    func_id != BPF_FUNC_msg_redirect_hash &&
5708 		    func_id != BPF_FUNC_sk_select_reuseport &&
5709 		    func_id != BPF_FUNC_map_lookup_elem &&
5710 		    !may_update_sockmap(env, func_id))
5711 			goto error;
5712 		break;
5713 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
5714 		if (func_id != BPF_FUNC_sk_select_reuseport)
5715 			goto error;
5716 		break;
5717 	case BPF_MAP_TYPE_QUEUE:
5718 	case BPF_MAP_TYPE_STACK:
5719 		if (func_id != BPF_FUNC_map_peek_elem &&
5720 		    func_id != BPF_FUNC_map_pop_elem &&
5721 		    func_id != BPF_FUNC_map_push_elem)
5722 			goto error;
5723 		break;
5724 	case BPF_MAP_TYPE_SK_STORAGE:
5725 		if (func_id != BPF_FUNC_sk_storage_get &&
5726 		    func_id != BPF_FUNC_sk_storage_delete)
5727 			goto error;
5728 		break;
5729 	case BPF_MAP_TYPE_INODE_STORAGE:
5730 		if (func_id != BPF_FUNC_inode_storage_get &&
5731 		    func_id != BPF_FUNC_inode_storage_delete)
5732 			goto error;
5733 		break;
5734 	case BPF_MAP_TYPE_TASK_STORAGE:
5735 		if (func_id != BPF_FUNC_task_storage_get &&
5736 		    func_id != BPF_FUNC_task_storage_delete)
5737 			goto error;
5738 		break;
5739 	case BPF_MAP_TYPE_BLOOM_FILTER:
5740 		if (func_id != BPF_FUNC_map_peek_elem &&
5741 		    func_id != BPF_FUNC_map_push_elem)
5742 			goto error;
5743 		break;
5744 	default:
5745 		break;
5746 	}
5747 
5748 	/* ... and second from the function itself. */
5749 	switch (func_id) {
5750 	case BPF_FUNC_tail_call:
5751 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
5752 			goto error;
5753 		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
5754 			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
5755 			return -EINVAL;
5756 		}
5757 		break;
5758 	case BPF_FUNC_perf_event_read:
5759 	case BPF_FUNC_perf_event_output:
5760 	case BPF_FUNC_perf_event_read_value:
5761 	case BPF_FUNC_skb_output:
5762 	case BPF_FUNC_xdp_output:
5763 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
5764 			goto error;
5765 		break;
5766 	case BPF_FUNC_ringbuf_output:
5767 	case BPF_FUNC_ringbuf_reserve:
5768 	case BPF_FUNC_ringbuf_query:
5769 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
5770 			goto error;
5771 		break;
5772 	case BPF_FUNC_get_stackid:
5773 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
5774 			goto error;
5775 		break;
5776 	case BPF_FUNC_current_task_under_cgroup:
5777 	case BPF_FUNC_skb_under_cgroup:
5778 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
5779 			goto error;
5780 		break;
5781 	case BPF_FUNC_redirect_map:
5782 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
5783 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
5784 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
5785 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
5786 			goto error;
5787 		break;
5788 	case BPF_FUNC_sk_redirect_map:
5789 	case BPF_FUNC_msg_redirect_map:
5790 	case BPF_FUNC_sock_map_update:
5791 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
5792 			goto error;
5793 		break;
5794 	case BPF_FUNC_sk_redirect_hash:
5795 	case BPF_FUNC_msg_redirect_hash:
5796 	case BPF_FUNC_sock_hash_update:
5797 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
5798 			goto error;
5799 		break;
5800 	case BPF_FUNC_get_local_storage:
5801 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
5802 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
5803 			goto error;
5804 		break;
5805 	case BPF_FUNC_sk_select_reuseport:
5806 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
5807 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
5808 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
5809 			goto error;
5810 		break;
5811 	case BPF_FUNC_map_pop_elem:
5812 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5813 		    map->map_type != BPF_MAP_TYPE_STACK)
5814 			goto error;
5815 		break;
5816 	case BPF_FUNC_map_peek_elem:
5817 	case BPF_FUNC_map_push_elem:
5818 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5819 		    map->map_type != BPF_MAP_TYPE_STACK &&
5820 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
5821 			goto error;
5822 		break;
5823 	case BPF_FUNC_sk_storage_get:
5824 	case BPF_FUNC_sk_storage_delete:
5825 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
5826 			goto error;
5827 		break;
5828 	case BPF_FUNC_inode_storage_get:
5829 	case BPF_FUNC_inode_storage_delete:
5830 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
5831 			goto error;
5832 		break;
5833 	case BPF_FUNC_task_storage_get:
5834 	case BPF_FUNC_task_storage_delete:
5835 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
5836 			goto error;
5837 		break;
5838 	default:
5839 		break;
5840 	}
5841 
5842 	return 0;
5843 error:
5844 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
5845 		map->map_type, func_id_name(func_id), func_id);
5846 	return -EINVAL;
5847 }
5848 
5849 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
5850 {
5851 	int count = 0;
5852 
5853 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
5854 		count++;
5855 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
5856 		count++;
5857 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
5858 		count++;
5859 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
5860 		count++;
5861 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
5862 		count++;
5863 
5864 	/* We only support one arg being in raw mode at the moment,
5865 	 * which is sufficient for the helper functions we have
5866 	 * right now.
5867 	 */
5868 	return count <= 1;
5869 }
5870 
5871 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
5872 				    enum bpf_arg_type arg_next)
5873 {
5874 	return (arg_type_is_mem_ptr(arg_curr) &&
5875 	        !arg_type_is_mem_size(arg_next)) ||
5876 	       (!arg_type_is_mem_ptr(arg_curr) &&
5877 		arg_type_is_mem_size(arg_next));
5878 }
5879 
5880 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
5881 {
5882 	/* bpf_xxx(..., buf, len) call will access 'len'
5883 	 * bytes from memory 'buf'. Both arg types need
5884 	 * to be paired, so make sure there's no buggy
5885 	 * helper function specification.
5886 	 */
5887 	if (arg_type_is_mem_size(fn->arg1_type) ||
5888 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
5889 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
5890 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
5891 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
5892 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
5893 		return false;
5894 
5895 	return true;
5896 }
5897 
5898 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
5899 {
5900 	int count = 0;
5901 
5902 	if (arg_type_may_be_refcounted(fn->arg1_type))
5903 		count++;
5904 	if (arg_type_may_be_refcounted(fn->arg2_type))
5905 		count++;
5906 	if (arg_type_may_be_refcounted(fn->arg3_type))
5907 		count++;
5908 	if (arg_type_may_be_refcounted(fn->arg4_type))
5909 		count++;
5910 	if (arg_type_may_be_refcounted(fn->arg5_type))
5911 		count++;
5912 
5913 	/* A reference acquiring function cannot acquire
5914 	 * another refcounted ptr.
5915 	 */
5916 	if (may_be_acquire_function(func_id) && count)
5917 		return false;
5918 
5919 	/* We only support one arg being unreferenced at the moment,
5920 	 * which is sufficient for the helper functions we have right now.
5921 	 */
5922 	return count <= 1;
5923 }
5924 
5925 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
5926 {
5927 	int i;
5928 
5929 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
5930 		if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
5931 			return false;
5932 
5933 		if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
5934 			return false;
5935 	}
5936 
5937 	return true;
5938 }
5939 
5940 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
5941 {
5942 	return check_raw_mode_ok(fn) &&
5943 	       check_arg_pair_ok(fn) &&
5944 	       check_btf_id_ok(fn) &&
5945 	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
5946 }
5947 
5948 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
5949  * are now invalid, so turn them into unknown SCALAR_VALUE.
5950  */
5951 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
5952 				     struct bpf_func_state *state)
5953 {
5954 	struct bpf_reg_state *regs = state->regs, *reg;
5955 	int i;
5956 
5957 	for (i = 0; i < MAX_BPF_REG; i++)
5958 		if (reg_is_pkt_pointer_any(&regs[i]))
5959 			mark_reg_unknown(env, regs, i);
5960 
5961 	bpf_for_each_spilled_reg(i, state, reg) {
5962 		if (!reg)
5963 			continue;
5964 		if (reg_is_pkt_pointer_any(reg))
5965 			__mark_reg_unknown(env, reg);
5966 	}
5967 }
5968 
5969 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5970 {
5971 	struct bpf_verifier_state *vstate = env->cur_state;
5972 	int i;
5973 
5974 	for (i = 0; i <= vstate->curframe; i++)
5975 		__clear_all_pkt_pointers(env, vstate->frame[i]);
5976 }
5977 
5978 enum {
5979 	AT_PKT_END = -1,
5980 	BEYOND_PKT_END = -2,
5981 };
5982 
5983 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5984 {
5985 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5986 	struct bpf_reg_state *reg = &state->regs[regn];
5987 
5988 	if (reg->type != PTR_TO_PACKET)
5989 		/* PTR_TO_PACKET_META is not supported yet */
5990 		return;
5991 
5992 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5993 	 * How far beyond pkt_end it goes is unknown.
5994 	 * if (!range_open) it's the case of pkt >= pkt_end
5995 	 * if (range_open) it's the case of pkt > pkt_end
5996 	 * hence this pointer is at least 1 byte bigger than pkt_end
5997 	 */
5998 	if (range_open)
5999 		reg->range = BEYOND_PKT_END;
6000 	else
6001 		reg->range = AT_PKT_END;
6002 }
6003 
6004 static void release_reg_references(struct bpf_verifier_env *env,
6005 				   struct bpf_func_state *state,
6006 				   int ref_obj_id)
6007 {
6008 	struct bpf_reg_state *regs = state->regs, *reg;
6009 	int i;
6010 
6011 	for (i = 0; i < MAX_BPF_REG; i++)
6012 		if (regs[i].ref_obj_id == ref_obj_id)
6013 			mark_reg_unknown(env, regs, i);
6014 
6015 	bpf_for_each_spilled_reg(i, state, reg) {
6016 		if (!reg)
6017 			continue;
6018 		if (reg->ref_obj_id == ref_obj_id)
6019 			__mark_reg_unknown(env, reg);
6020 	}
6021 }
6022 
6023 /* The pointer with the specified id has released its reference to kernel
6024  * resources. Identify all copies of the same pointer and clear the reference.
6025  */
6026 static int release_reference(struct bpf_verifier_env *env,
6027 			     int ref_obj_id)
6028 {
6029 	struct bpf_verifier_state *vstate = env->cur_state;
6030 	int err;
6031 	int i;
6032 
6033 	err = release_reference_state(cur_func(env), ref_obj_id);
6034 	if (err)
6035 		return err;
6036 
6037 	for (i = 0; i <= vstate->curframe; i++)
6038 		release_reg_references(env, vstate->frame[i], ref_obj_id);
6039 
6040 	return 0;
6041 }
6042 
6043 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
6044 				    struct bpf_reg_state *regs)
6045 {
6046 	int i;
6047 
6048 	/* after the call registers r0 - r5 were scratched */
6049 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
6050 		mark_reg_not_init(env, regs, caller_saved[i]);
6051 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6052 	}
6053 }
6054 
6055 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
6056 				   struct bpf_func_state *caller,
6057 				   struct bpf_func_state *callee,
6058 				   int insn_idx);
6059 
6060 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6061 			     int *insn_idx, int subprog,
6062 			     set_callee_state_fn set_callee_state_cb)
6063 {
6064 	struct bpf_verifier_state *state = env->cur_state;
6065 	struct bpf_func_info_aux *func_info_aux;
6066 	struct bpf_func_state *caller, *callee;
6067 	int err;
6068 	bool is_global = false;
6069 
6070 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
6071 		verbose(env, "the call stack of %d frames is too deep\n",
6072 			state->curframe + 2);
6073 		return -E2BIG;
6074 	}
6075 
6076 	caller = state->frame[state->curframe];
6077 	if (state->frame[state->curframe + 1]) {
6078 		verbose(env, "verifier bug. Frame %d already allocated\n",
6079 			state->curframe + 1);
6080 		return -EFAULT;
6081 	}
6082 
6083 	func_info_aux = env->prog->aux->func_info_aux;
6084 	if (func_info_aux)
6085 		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
6086 	err = btf_check_subprog_arg_match(env, subprog, caller->regs);
6087 	if (err == -EFAULT)
6088 		return err;
6089 	if (is_global) {
6090 		if (err) {
6091 			verbose(env, "Caller passes invalid args into func#%d\n",
6092 				subprog);
6093 			return err;
6094 		} else {
6095 			if (env->log.level & BPF_LOG_LEVEL)
6096 				verbose(env,
6097 					"Func#%d is global and valid. Skipping.\n",
6098 					subprog);
6099 			clear_caller_saved_regs(env, caller->regs);
6100 
6101 			/* All global functions return a 64-bit SCALAR_VALUE */
6102 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
6103 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6104 
6105 			/* continue with next insn after call */
6106 			return 0;
6107 		}
6108 	}
6109 
6110 	if (insn->code == (BPF_JMP | BPF_CALL) &&
6111 	    insn->src_reg == 0 &&
6112 	    insn->imm == BPF_FUNC_timer_set_callback) {
6113 		struct bpf_verifier_state *async_cb;
6114 
6115 		/* there is no real recursion here. timer callbacks are async */
6116 		env->subprog_info[subprog].is_async_cb = true;
6117 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
6118 					 *insn_idx, subprog);
6119 		if (!async_cb)
6120 			return -EFAULT;
6121 		callee = async_cb->frame[0];
6122 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
6123 
6124 		/* Convert bpf_timer_set_callback() args into timer callback args */
6125 		err = set_callee_state_cb(env, caller, callee, *insn_idx);
6126 		if (err)
6127 			return err;
6128 
6129 		clear_caller_saved_regs(env, caller->regs);
6130 		mark_reg_unknown(env, caller->regs, BPF_REG_0);
6131 		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6132 		/* continue with next insn after call */
6133 		return 0;
6134 	}
6135 
6136 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
6137 	if (!callee)
6138 		return -ENOMEM;
6139 	state->frame[state->curframe + 1] = callee;
6140 
6141 	/* callee cannot access r0, r6 - r9 for reading and has to write
6142 	 * into its own stack before reading from it.
6143 	 * callee can read/write into caller's stack
6144 	 */
6145 	init_func_state(env, callee,
6146 			/* remember the callsite, it will be used by bpf_exit */
6147 			*insn_idx /* callsite */,
6148 			state->curframe + 1 /* frameno within this callchain */,
6149 			subprog /* subprog number within this prog */);
6150 
6151 	/* Transfer references to the callee */
6152 	err = copy_reference_state(callee, caller);
6153 	if (err)
6154 		return err;
6155 
6156 	err = set_callee_state_cb(env, caller, callee, *insn_idx);
6157 	if (err)
6158 		return err;
6159 
6160 	clear_caller_saved_regs(env, caller->regs);
6161 
6162 	/* only increment it after check_reg_arg() finished */
6163 	state->curframe++;
6164 
6165 	/* and go analyze first insn of the callee */
6166 	*insn_idx = env->subprog_info[subprog].start - 1;
6167 
6168 	if (env->log.level & BPF_LOG_LEVEL) {
6169 		verbose(env, "caller:\n");
6170 		print_verifier_state(env, caller, true);
6171 		verbose(env, "callee:\n");
6172 		print_verifier_state(env, callee, true);
6173 	}
6174 	return 0;
6175 }
6176 
6177 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6178 				   struct bpf_func_state *caller,
6179 				   struct bpf_func_state *callee)
6180 {
6181 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6182 	 *      void *callback_ctx, u64 flags);
6183 	 * callback_fn(struct bpf_map *map, void *key, void *value,
6184 	 *      void *callback_ctx);
6185 	 */
6186 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6187 
6188 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6189 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6190 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6191 
6192 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6193 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6194 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6195 
6196 	/* pointer to stack or null */
6197 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6198 
6199 	/* unused */
6200 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6201 	return 0;
6202 }
6203 
6204 static int set_callee_state(struct bpf_verifier_env *env,
6205 			    struct bpf_func_state *caller,
6206 			    struct bpf_func_state *callee, int insn_idx)
6207 {
6208 	int i;
6209 
6210 	/* copy r1 - r5 args that callee can access.  The copy includes parent
6211 	 * pointers, which connects us up to the liveness chain
6212 	 */
6213 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6214 		callee->regs[i] = caller->regs[i];
6215 	return 0;
6216 }
6217 
6218 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6219 			   int *insn_idx)
6220 {
6221 	int subprog, target_insn;
6222 
6223 	target_insn = *insn_idx + insn->imm + 1;
6224 	subprog = find_subprog(env, target_insn);
6225 	if (subprog < 0) {
6226 		verbose(env, "verifier bug. No program starts at insn %d\n",
6227 			target_insn);
6228 		return -EFAULT;
6229 	}
6230 
6231 	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6232 }
6233 
6234 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6235 				       struct bpf_func_state *caller,
6236 				       struct bpf_func_state *callee,
6237 				       int insn_idx)
6238 {
6239 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6240 	struct bpf_map *map;
6241 	int err;
6242 
6243 	if (bpf_map_ptr_poisoned(insn_aux)) {
6244 		verbose(env, "tail_call abusing map_ptr\n");
6245 		return -EINVAL;
6246 	}
6247 
6248 	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6249 	if (!map->ops->map_set_for_each_callback_args ||
6250 	    !map->ops->map_for_each_callback) {
6251 		verbose(env, "callback function not allowed for map\n");
6252 		return -ENOTSUPP;
6253 	}
6254 
6255 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6256 	if (err)
6257 		return err;
6258 
6259 	callee->in_callback_fn = true;
6260 	return 0;
6261 }
6262 
6263 static int set_loop_callback_state(struct bpf_verifier_env *env,
6264 				   struct bpf_func_state *caller,
6265 				   struct bpf_func_state *callee,
6266 				   int insn_idx)
6267 {
6268 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
6269 	 *	    u64 flags);
6270 	 * callback_fn(u32 index, void *callback_ctx);
6271 	 */
6272 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
6273 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
6274 
6275 	/* unused */
6276 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
6277 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6278 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6279 
6280 	callee->in_callback_fn = true;
6281 	return 0;
6282 }
6283 
6284 static int set_timer_callback_state(struct bpf_verifier_env *env,
6285 				    struct bpf_func_state *caller,
6286 				    struct bpf_func_state *callee,
6287 				    int insn_idx)
6288 {
6289 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6290 
6291 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6292 	 * callback_fn(struct bpf_map *map, void *key, void *value);
6293 	 */
6294 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6295 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6296 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
6297 
6298 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6299 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6300 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
6301 
6302 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6303 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6304 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
6305 
6306 	/* unused */
6307 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6308 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6309 	callee->in_async_callback_fn = true;
6310 	return 0;
6311 }
6312 
6313 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
6314 				       struct bpf_func_state *caller,
6315 				       struct bpf_func_state *callee,
6316 				       int insn_idx)
6317 {
6318 	/* bpf_find_vma(struct task_struct *task, u64 addr,
6319 	 *               void *callback_fn, void *callback_ctx, u64 flags)
6320 	 * (callback_fn)(struct task_struct *task,
6321 	 *               struct vm_area_struct *vma, void *callback_ctx);
6322 	 */
6323 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6324 
6325 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
6326 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6327 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
6328 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
6329 
6330 	/* pointer to stack or null */
6331 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
6332 
6333 	/* unused */
6334 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6335 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6336 	callee->in_callback_fn = true;
6337 	return 0;
6338 }
6339 
6340 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6341 {
6342 	struct bpf_verifier_state *state = env->cur_state;
6343 	struct bpf_func_state *caller, *callee;
6344 	struct bpf_reg_state *r0;
6345 	int err;
6346 
6347 	callee = state->frame[state->curframe];
6348 	r0 = &callee->regs[BPF_REG_0];
6349 	if (r0->type == PTR_TO_STACK) {
6350 		/* technically it's ok to return caller's stack pointer
6351 		 * (or caller's caller's pointer) back to the caller,
6352 		 * since these pointers are valid. Only current stack
6353 		 * pointer will be invalid as soon as function exits,
6354 		 * but let's be conservative
6355 		 */
6356 		verbose(env, "cannot return stack pointer to the caller\n");
6357 		return -EINVAL;
6358 	}
6359 
6360 	state->curframe--;
6361 	caller = state->frame[state->curframe];
6362 	if (callee->in_callback_fn) {
6363 		/* enforce R0 return value range [0, 1]. */
6364 		struct tnum range = tnum_range(0, 1);
6365 
6366 		if (r0->type != SCALAR_VALUE) {
6367 			verbose(env, "R0 not a scalar value\n");
6368 			return -EACCES;
6369 		}
6370 		if (!tnum_in(range, r0->var_off)) {
6371 			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6372 			return -EINVAL;
6373 		}
6374 	} else {
6375 		/* return to the caller whatever r0 had in the callee */
6376 		caller->regs[BPF_REG_0] = *r0;
6377 	}
6378 
6379 	/* Transfer references to the caller */
6380 	err = copy_reference_state(caller, callee);
6381 	if (err)
6382 		return err;
6383 
6384 	*insn_idx = callee->callsite + 1;
6385 	if (env->log.level & BPF_LOG_LEVEL) {
6386 		verbose(env, "returning from callee:\n");
6387 		print_verifier_state(env, callee, true);
6388 		verbose(env, "to caller at %d:\n", *insn_idx);
6389 		print_verifier_state(env, caller, true);
6390 	}
6391 	/* clear everything in the callee */
6392 	free_func_state(callee);
6393 	state->frame[state->curframe + 1] = NULL;
6394 	return 0;
6395 }
6396 
6397 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6398 				   int func_id,
6399 				   struct bpf_call_arg_meta *meta)
6400 {
6401 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6402 
6403 	if (ret_type != RET_INTEGER ||
6404 	    (func_id != BPF_FUNC_get_stack &&
6405 	     func_id != BPF_FUNC_get_task_stack &&
6406 	     func_id != BPF_FUNC_probe_read_str &&
6407 	     func_id != BPF_FUNC_probe_read_kernel_str &&
6408 	     func_id != BPF_FUNC_probe_read_user_str))
6409 		return;
6410 
6411 	ret_reg->smax_value = meta->msize_max_value;
6412 	ret_reg->s32_max_value = meta->msize_max_value;
6413 	ret_reg->smin_value = -MAX_ERRNO;
6414 	ret_reg->s32_min_value = -MAX_ERRNO;
6415 	__reg_deduce_bounds(ret_reg);
6416 	__reg_bound_offset(ret_reg);
6417 	__update_reg_bounds(ret_reg);
6418 }
6419 
6420 static int
6421 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6422 		int func_id, int insn_idx)
6423 {
6424 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6425 	struct bpf_map *map = meta->map_ptr;
6426 
6427 	if (func_id != BPF_FUNC_tail_call &&
6428 	    func_id != BPF_FUNC_map_lookup_elem &&
6429 	    func_id != BPF_FUNC_map_update_elem &&
6430 	    func_id != BPF_FUNC_map_delete_elem &&
6431 	    func_id != BPF_FUNC_map_push_elem &&
6432 	    func_id != BPF_FUNC_map_pop_elem &&
6433 	    func_id != BPF_FUNC_map_peek_elem &&
6434 	    func_id != BPF_FUNC_for_each_map_elem &&
6435 	    func_id != BPF_FUNC_redirect_map)
6436 		return 0;
6437 
6438 	if (map == NULL) {
6439 		verbose(env, "kernel subsystem misconfigured verifier\n");
6440 		return -EINVAL;
6441 	}
6442 
6443 	/* In case of read-only, some additional restrictions
6444 	 * need to be applied in order to prevent altering the
6445 	 * state of the map from program side.
6446 	 */
6447 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6448 	    (func_id == BPF_FUNC_map_delete_elem ||
6449 	     func_id == BPF_FUNC_map_update_elem ||
6450 	     func_id == BPF_FUNC_map_push_elem ||
6451 	     func_id == BPF_FUNC_map_pop_elem)) {
6452 		verbose(env, "write into map forbidden\n");
6453 		return -EACCES;
6454 	}
6455 
6456 	if (!BPF_MAP_PTR(aux->map_ptr_state))
6457 		bpf_map_ptr_store(aux, meta->map_ptr,
6458 				  !meta->map_ptr->bypass_spec_v1);
6459 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
6460 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
6461 				  !meta->map_ptr->bypass_spec_v1);
6462 	return 0;
6463 }
6464 
6465 static int
6466 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6467 		int func_id, int insn_idx)
6468 {
6469 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6470 	struct bpf_reg_state *regs = cur_regs(env), *reg;
6471 	struct bpf_map *map = meta->map_ptr;
6472 	struct tnum range;
6473 	u64 val;
6474 	int err;
6475 
6476 	if (func_id != BPF_FUNC_tail_call)
6477 		return 0;
6478 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6479 		verbose(env, "kernel subsystem misconfigured verifier\n");
6480 		return -EINVAL;
6481 	}
6482 
6483 	range = tnum_range(0, map->max_entries - 1);
6484 	reg = &regs[BPF_REG_3];
6485 
6486 	if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6487 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6488 		return 0;
6489 	}
6490 
6491 	err = mark_chain_precision(env, BPF_REG_3);
6492 	if (err)
6493 		return err;
6494 
6495 	val = reg->var_off.value;
6496 	if (bpf_map_key_unseen(aux))
6497 		bpf_map_key_store(aux, val);
6498 	else if (!bpf_map_key_poisoned(aux) &&
6499 		  bpf_map_key_immediate(aux) != val)
6500 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6501 	return 0;
6502 }
6503 
6504 static int check_reference_leak(struct bpf_verifier_env *env)
6505 {
6506 	struct bpf_func_state *state = cur_func(env);
6507 	int i;
6508 
6509 	for (i = 0; i < state->acquired_refs; i++) {
6510 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6511 			state->refs[i].id, state->refs[i].insn_idx);
6512 	}
6513 	return state->acquired_refs ? -EINVAL : 0;
6514 }
6515 
6516 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6517 				   struct bpf_reg_state *regs)
6518 {
6519 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6520 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6521 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
6522 	int err, fmt_map_off, num_args;
6523 	u64 fmt_addr;
6524 	char *fmt;
6525 
6526 	/* data must be an array of u64 */
6527 	if (data_len_reg->var_off.value % 8)
6528 		return -EINVAL;
6529 	num_args = data_len_reg->var_off.value / 8;
6530 
6531 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6532 	 * and map_direct_value_addr is set.
6533 	 */
6534 	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6535 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6536 						  fmt_map_off);
6537 	if (err) {
6538 		verbose(env, "verifier bug\n");
6539 		return -EFAULT;
6540 	}
6541 	fmt = (char *)(long)fmt_addr + fmt_map_off;
6542 
6543 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6544 	 * can focus on validating the format specifiers.
6545 	 */
6546 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
6547 	if (err < 0)
6548 		verbose(env, "Invalid format string\n");
6549 
6550 	return err;
6551 }
6552 
6553 static int check_get_func_ip(struct bpf_verifier_env *env)
6554 {
6555 	enum bpf_prog_type type = resolve_prog_type(env->prog);
6556 	int func_id = BPF_FUNC_get_func_ip;
6557 
6558 	if (type == BPF_PROG_TYPE_TRACING) {
6559 		if (!bpf_prog_has_trampoline(env->prog)) {
6560 			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6561 				func_id_name(func_id), func_id);
6562 			return -ENOTSUPP;
6563 		}
6564 		return 0;
6565 	} else if (type == BPF_PROG_TYPE_KPROBE) {
6566 		return 0;
6567 	}
6568 
6569 	verbose(env, "func %s#%d not supported for program type %d\n",
6570 		func_id_name(func_id), func_id, type);
6571 	return -ENOTSUPP;
6572 }
6573 
6574 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6575 			     int *insn_idx_p)
6576 {
6577 	const struct bpf_func_proto *fn = NULL;
6578 	enum bpf_return_type ret_type;
6579 	enum bpf_type_flag ret_flag;
6580 	struct bpf_reg_state *regs;
6581 	struct bpf_call_arg_meta meta;
6582 	int insn_idx = *insn_idx_p;
6583 	bool changes_data;
6584 	int i, err, func_id;
6585 
6586 	/* find function prototype */
6587 	func_id = insn->imm;
6588 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
6589 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6590 			func_id);
6591 		return -EINVAL;
6592 	}
6593 
6594 	if (env->ops->get_func_proto)
6595 		fn = env->ops->get_func_proto(func_id, env->prog);
6596 	if (!fn) {
6597 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6598 			func_id);
6599 		return -EINVAL;
6600 	}
6601 
6602 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
6603 	if (!env->prog->gpl_compatible && fn->gpl_only) {
6604 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
6605 		return -EINVAL;
6606 	}
6607 
6608 	if (fn->allowed && !fn->allowed(env->prog)) {
6609 		verbose(env, "helper call is not allowed in probe\n");
6610 		return -EINVAL;
6611 	}
6612 
6613 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
6614 	changes_data = bpf_helper_changes_pkt_data(fn->func);
6615 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6616 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6617 			func_id_name(func_id), func_id);
6618 		return -EINVAL;
6619 	}
6620 
6621 	memset(&meta, 0, sizeof(meta));
6622 	meta.pkt_access = fn->pkt_access;
6623 
6624 	err = check_func_proto(fn, func_id);
6625 	if (err) {
6626 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
6627 			func_id_name(func_id), func_id);
6628 		return err;
6629 	}
6630 
6631 	meta.func_id = func_id;
6632 	/* check args */
6633 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
6634 		err = check_func_arg(env, i, &meta, fn);
6635 		if (err)
6636 			return err;
6637 	}
6638 
6639 	err = record_func_map(env, &meta, func_id, insn_idx);
6640 	if (err)
6641 		return err;
6642 
6643 	err = record_func_key(env, &meta, func_id, insn_idx);
6644 	if (err)
6645 		return err;
6646 
6647 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
6648 	 * is inferred from register state.
6649 	 */
6650 	for (i = 0; i < meta.access_size; i++) {
6651 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6652 				       BPF_WRITE, -1, false);
6653 		if (err)
6654 			return err;
6655 	}
6656 
6657 	if (is_release_function(func_id)) {
6658 		err = release_reference(env, meta.ref_obj_id);
6659 		if (err) {
6660 			verbose(env, "func %s#%d reference has not been acquired before\n",
6661 				func_id_name(func_id), func_id);
6662 			return err;
6663 		}
6664 	}
6665 
6666 	regs = cur_regs(env);
6667 
6668 	switch (func_id) {
6669 	case BPF_FUNC_tail_call:
6670 		err = check_reference_leak(env);
6671 		if (err) {
6672 			verbose(env, "tail_call would lead to reference leak\n");
6673 			return err;
6674 		}
6675 		break;
6676 	case BPF_FUNC_get_local_storage:
6677 		/* check that flags argument in get_local_storage(map, flags) is 0,
6678 		 * this is required because get_local_storage() can't return an error.
6679 		 */
6680 		if (!register_is_null(&regs[BPF_REG_2])) {
6681 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6682 			return -EINVAL;
6683 		}
6684 		break;
6685 	case BPF_FUNC_for_each_map_elem:
6686 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6687 					set_map_elem_callback_state);
6688 		break;
6689 	case BPF_FUNC_timer_set_callback:
6690 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6691 					set_timer_callback_state);
6692 		break;
6693 	case BPF_FUNC_find_vma:
6694 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6695 					set_find_vma_callback_state);
6696 		break;
6697 	case BPF_FUNC_snprintf:
6698 		err = check_bpf_snprintf_call(env, regs);
6699 		break;
6700 	case BPF_FUNC_loop:
6701 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6702 					set_loop_callback_state);
6703 		break;
6704 	}
6705 
6706 	if (err)
6707 		return err;
6708 
6709 	/* reset caller saved regs */
6710 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
6711 		mark_reg_not_init(env, regs, caller_saved[i]);
6712 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6713 	}
6714 
6715 	/* helper call returns 64-bit value. */
6716 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6717 
6718 	/* update return register (already marked as written above) */
6719 	ret_type = fn->ret_type;
6720 	ret_flag = type_flag(fn->ret_type);
6721 	if (ret_type == RET_INTEGER) {
6722 		/* sets type to SCALAR_VALUE */
6723 		mark_reg_unknown(env, regs, BPF_REG_0);
6724 	} else if (ret_type == RET_VOID) {
6725 		regs[BPF_REG_0].type = NOT_INIT;
6726 	} else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
6727 		/* There is no offset yet applied, variable or fixed */
6728 		mark_reg_known_zero(env, regs, BPF_REG_0);
6729 		/* remember map_ptr, so that check_map_access()
6730 		 * can check 'value_size' boundary of memory access
6731 		 * to map element returned from bpf_map_lookup_elem()
6732 		 */
6733 		if (meta.map_ptr == NULL) {
6734 			verbose(env,
6735 				"kernel subsystem misconfigured verifier\n");
6736 			return -EINVAL;
6737 		}
6738 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
6739 		regs[BPF_REG_0].map_uid = meta.map_uid;
6740 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
6741 		if (!type_may_be_null(ret_type) &&
6742 		    map_value_has_spin_lock(meta.map_ptr)) {
6743 			regs[BPF_REG_0].id = ++env->id_gen;
6744 		}
6745 	} else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
6746 		mark_reg_known_zero(env, regs, BPF_REG_0);
6747 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
6748 	} else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
6749 		mark_reg_known_zero(env, regs, BPF_REG_0);
6750 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
6751 	} else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
6752 		mark_reg_known_zero(env, regs, BPF_REG_0);
6753 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
6754 	} else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
6755 		mark_reg_known_zero(env, regs, BPF_REG_0);
6756 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
6757 		regs[BPF_REG_0].mem_size = meta.mem_size;
6758 	} else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
6759 		const struct btf_type *t;
6760 
6761 		mark_reg_known_zero(env, regs, BPF_REG_0);
6762 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
6763 		if (!btf_type_is_struct(t)) {
6764 			u32 tsize;
6765 			const struct btf_type *ret;
6766 			const char *tname;
6767 
6768 			/* resolve the type size of ksym. */
6769 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
6770 			if (IS_ERR(ret)) {
6771 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
6772 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
6773 					tname, PTR_ERR(ret));
6774 				return -EINVAL;
6775 			}
6776 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
6777 			regs[BPF_REG_0].mem_size = tsize;
6778 		} else {
6779 			/* MEM_RDONLY may be carried from ret_flag, but it
6780 			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
6781 			 * it will confuse the check of PTR_TO_BTF_ID in
6782 			 * check_mem_access().
6783 			 */
6784 			ret_flag &= ~MEM_RDONLY;
6785 
6786 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
6787 			regs[BPF_REG_0].btf = meta.ret_btf;
6788 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
6789 		}
6790 	} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
6791 		int ret_btf_id;
6792 
6793 		mark_reg_known_zero(env, regs, BPF_REG_0);
6794 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
6795 		ret_btf_id = *fn->ret_btf_id;
6796 		if (ret_btf_id == 0) {
6797 			verbose(env, "invalid return type %u of func %s#%d\n",
6798 				base_type(ret_type), func_id_name(func_id),
6799 				func_id);
6800 			return -EINVAL;
6801 		}
6802 		/* current BPF helper definitions are only coming from
6803 		 * built-in code with type IDs from  vmlinux BTF
6804 		 */
6805 		regs[BPF_REG_0].btf = btf_vmlinux;
6806 		regs[BPF_REG_0].btf_id = ret_btf_id;
6807 	} else {
6808 		verbose(env, "unknown return type %u of func %s#%d\n",
6809 			base_type(ret_type), func_id_name(func_id), func_id);
6810 		return -EINVAL;
6811 	}
6812 
6813 	if (type_may_be_null(regs[BPF_REG_0].type))
6814 		regs[BPF_REG_0].id = ++env->id_gen;
6815 
6816 	if (is_ptr_cast_function(func_id)) {
6817 		/* For release_reference() */
6818 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
6819 	} else if (is_acquire_function(func_id, meta.map_ptr)) {
6820 		int id = acquire_reference_state(env, insn_idx);
6821 
6822 		if (id < 0)
6823 			return id;
6824 		/* For mark_ptr_or_null_reg() */
6825 		regs[BPF_REG_0].id = id;
6826 		/* For release_reference() */
6827 		regs[BPF_REG_0].ref_obj_id = id;
6828 	}
6829 
6830 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
6831 
6832 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
6833 	if (err)
6834 		return err;
6835 
6836 	if ((func_id == BPF_FUNC_get_stack ||
6837 	     func_id == BPF_FUNC_get_task_stack) &&
6838 	    !env->prog->has_callchain_buf) {
6839 		const char *err_str;
6840 
6841 #ifdef CONFIG_PERF_EVENTS
6842 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
6843 		err_str = "cannot get callchain buffer for func %s#%d\n";
6844 #else
6845 		err = -ENOTSUPP;
6846 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
6847 #endif
6848 		if (err) {
6849 			verbose(env, err_str, func_id_name(func_id), func_id);
6850 			return err;
6851 		}
6852 
6853 		env->prog->has_callchain_buf = true;
6854 	}
6855 
6856 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
6857 		env->prog->call_get_stack = true;
6858 
6859 	if (func_id == BPF_FUNC_get_func_ip) {
6860 		if (check_get_func_ip(env))
6861 			return -ENOTSUPP;
6862 		env->prog->call_get_func_ip = true;
6863 	}
6864 
6865 	if (changes_data)
6866 		clear_all_pkt_pointers(env);
6867 	return 0;
6868 }
6869 
6870 /* mark_btf_func_reg_size() is used when the reg size is determined by
6871  * the BTF func_proto's return value size and argument.
6872  */
6873 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
6874 				   size_t reg_size)
6875 {
6876 	struct bpf_reg_state *reg = &cur_regs(env)[regno];
6877 
6878 	if (regno == BPF_REG_0) {
6879 		/* Function return value */
6880 		reg->live |= REG_LIVE_WRITTEN;
6881 		reg->subreg_def = reg_size == sizeof(u64) ?
6882 			DEF_NOT_SUBREG : env->insn_idx + 1;
6883 	} else {
6884 		/* Function argument */
6885 		if (reg_size == sizeof(u64)) {
6886 			mark_insn_zext(env, reg);
6887 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
6888 		} else {
6889 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
6890 		}
6891 	}
6892 }
6893 
6894 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6895 			    int *insn_idx_p)
6896 {
6897 	const struct btf_type *t, *func, *func_proto, *ptr_type;
6898 	struct bpf_reg_state *regs = cur_regs(env);
6899 	const char *func_name, *ptr_type_name;
6900 	u32 i, nargs, func_id, ptr_type_id;
6901 	int err, insn_idx = *insn_idx_p;
6902 	const struct btf_param *args;
6903 	struct btf *desc_btf;
6904 	bool acq;
6905 
6906 	/* skip for now, but return error when we find this in fixup_kfunc_call */
6907 	if (!insn->imm)
6908 		return 0;
6909 
6910 	desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
6911 	if (IS_ERR(desc_btf))
6912 		return PTR_ERR(desc_btf);
6913 
6914 	func_id = insn->imm;
6915 	func = btf_type_by_id(desc_btf, func_id);
6916 	func_name = btf_name_by_offset(desc_btf, func->name_off);
6917 	func_proto = btf_type_by_id(desc_btf, func->type);
6918 
6919 	if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6920 				      BTF_KFUNC_TYPE_CHECK, func_id)) {
6921 		verbose(env, "calling kernel function %s is not allowed\n",
6922 			func_name);
6923 		return -EACCES;
6924 	}
6925 
6926 	acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6927 					BTF_KFUNC_TYPE_ACQUIRE, func_id);
6928 
6929 	/* Check the arguments */
6930 	err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
6931 	if (err < 0)
6932 		return err;
6933 	/* In case of release function, we get register number of refcounted
6934 	 * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
6935 	 */
6936 	if (err) {
6937 		err = release_reference(env, regs[err].ref_obj_id);
6938 		if (err) {
6939 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
6940 				func_name, func_id);
6941 			return err;
6942 		}
6943 	}
6944 
6945 	for (i = 0; i < CALLER_SAVED_REGS; i++)
6946 		mark_reg_not_init(env, regs, caller_saved[i]);
6947 
6948 	/* Check return type */
6949 	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
6950 
6951 	if (acq && !btf_type_is_ptr(t)) {
6952 		verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
6953 		return -EINVAL;
6954 	}
6955 
6956 	if (btf_type_is_scalar(t)) {
6957 		mark_reg_unknown(env, regs, BPF_REG_0);
6958 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
6959 	} else if (btf_type_is_ptr(t)) {
6960 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
6961 						   &ptr_type_id);
6962 		if (!btf_type_is_struct(ptr_type)) {
6963 			ptr_type_name = btf_name_by_offset(desc_btf,
6964 							   ptr_type->name_off);
6965 			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
6966 				func_name, btf_type_str(ptr_type),
6967 				ptr_type_name);
6968 			return -EINVAL;
6969 		}
6970 		mark_reg_known_zero(env, regs, BPF_REG_0);
6971 		regs[BPF_REG_0].btf = desc_btf;
6972 		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
6973 		regs[BPF_REG_0].btf_id = ptr_type_id;
6974 		if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6975 					      BTF_KFUNC_TYPE_RET_NULL, func_id)) {
6976 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
6977 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
6978 			regs[BPF_REG_0].id = ++env->id_gen;
6979 		}
6980 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
6981 		if (acq) {
6982 			int id = acquire_reference_state(env, insn_idx);
6983 
6984 			if (id < 0)
6985 				return id;
6986 			regs[BPF_REG_0].id = id;
6987 			regs[BPF_REG_0].ref_obj_id = id;
6988 		}
6989 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
6990 
6991 	nargs = btf_type_vlen(func_proto);
6992 	args = (const struct btf_param *)(func_proto + 1);
6993 	for (i = 0; i < nargs; i++) {
6994 		u32 regno = i + 1;
6995 
6996 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
6997 		if (btf_type_is_ptr(t))
6998 			mark_btf_func_reg_size(env, regno, sizeof(void *));
6999 		else
7000 			/* scalar. ensured by btf_check_kfunc_arg_match() */
7001 			mark_btf_func_reg_size(env, regno, t->size);
7002 	}
7003 
7004 	return 0;
7005 }
7006 
7007 static bool signed_add_overflows(s64 a, s64 b)
7008 {
7009 	/* Do the add in u64, where overflow is well-defined */
7010 	s64 res = (s64)((u64)a + (u64)b);
7011 
7012 	if (b < 0)
7013 		return res > a;
7014 	return res < a;
7015 }
7016 
7017 static bool signed_add32_overflows(s32 a, s32 b)
7018 {
7019 	/* Do the add in u32, where overflow is well-defined */
7020 	s32 res = (s32)((u32)a + (u32)b);
7021 
7022 	if (b < 0)
7023 		return res > a;
7024 	return res < a;
7025 }
7026 
7027 static bool signed_sub_overflows(s64 a, s64 b)
7028 {
7029 	/* Do the sub in u64, where overflow is well-defined */
7030 	s64 res = (s64)((u64)a - (u64)b);
7031 
7032 	if (b < 0)
7033 		return res < a;
7034 	return res > a;
7035 }
7036 
7037 static bool signed_sub32_overflows(s32 a, s32 b)
7038 {
7039 	/* Do the sub in u32, where overflow is well-defined */
7040 	s32 res = (s32)((u32)a - (u32)b);
7041 
7042 	if (b < 0)
7043 		return res < a;
7044 	return res > a;
7045 }
7046 
7047 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
7048 				  const struct bpf_reg_state *reg,
7049 				  enum bpf_reg_type type)
7050 {
7051 	bool known = tnum_is_const(reg->var_off);
7052 	s64 val = reg->var_off.value;
7053 	s64 smin = reg->smin_value;
7054 
7055 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
7056 		verbose(env, "math between %s pointer and %lld is not allowed\n",
7057 			reg_type_str(env, type), val);
7058 		return false;
7059 	}
7060 
7061 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
7062 		verbose(env, "%s pointer offset %d is not allowed\n",
7063 			reg_type_str(env, type), reg->off);
7064 		return false;
7065 	}
7066 
7067 	if (smin == S64_MIN) {
7068 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
7069 			reg_type_str(env, type));
7070 		return false;
7071 	}
7072 
7073 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
7074 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
7075 			smin, reg_type_str(env, type));
7076 		return false;
7077 	}
7078 
7079 	return true;
7080 }
7081 
7082 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7083 {
7084 	return &env->insn_aux_data[env->insn_idx];
7085 }
7086 
7087 enum {
7088 	REASON_BOUNDS	= -1,
7089 	REASON_TYPE	= -2,
7090 	REASON_PATHS	= -3,
7091 	REASON_LIMIT	= -4,
7092 	REASON_STACK	= -5,
7093 };
7094 
7095 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
7096 			      u32 *alu_limit, bool mask_to_left)
7097 {
7098 	u32 max = 0, ptr_limit = 0;
7099 
7100 	switch (ptr_reg->type) {
7101 	case PTR_TO_STACK:
7102 		/* Offset 0 is out-of-bounds, but acceptable start for the
7103 		 * left direction, see BPF_REG_FP. Also, unknown scalar
7104 		 * offset where we would need to deal with min/max bounds is
7105 		 * currently prohibited for unprivileged.
7106 		 */
7107 		max = MAX_BPF_STACK + mask_to_left;
7108 		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
7109 		break;
7110 	case PTR_TO_MAP_VALUE:
7111 		max = ptr_reg->map_ptr->value_size;
7112 		ptr_limit = (mask_to_left ?
7113 			     ptr_reg->smin_value :
7114 			     ptr_reg->umax_value) + ptr_reg->off;
7115 		break;
7116 	default:
7117 		return REASON_TYPE;
7118 	}
7119 
7120 	if (ptr_limit >= max)
7121 		return REASON_LIMIT;
7122 	*alu_limit = ptr_limit;
7123 	return 0;
7124 }
7125 
7126 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
7127 				    const struct bpf_insn *insn)
7128 {
7129 	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
7130 }
7131 
7132 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
7133 				       u32 alu_state, u32 alu_limit)
7134 {
7135 	/* If we arrived here from different branches with different
7136 	 * state or limits to sanitize, then this won't work.
7137 	 */
7138 	if (aux->alu_state &&
7139 	    (aux->alu_state != alu_state ||
7140 	     aux->alu_limit != alu_limit))
7141 		return REASON_PATHS;
7142 
7143 	/* Corresponding fixup done in do_misc_fixups(). */
7144 	aux->alu_state = alu_state;
7145 	aux->alu_limit = alu_limit;
7146 	return 0;
7147 }
7148 
7149 static int sanitize_val_alu(struct bpf_verifier_env *env,
7150 			    struct bpf_insn *insn)
7151 {
7152 	struct bpf_insn_aux_data *aux = cur_aux(env);
7153 
7154 	if (can_skip_alu_sanitation(env, insn))
7155 		return 0;
7156 
7157 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
7158 }
7159 
7160 static bool sanitize_needed(u8 opcode)
7161 {
7162 	return opcode == BPF_ADD || opcode == BPF_SUB;
7163 }
7164 
7165 struct bpf_sanitize_info {
7166 	struct bpf_insn_aux_data aux;
7167 	bool mask_to_left;
7168 };
7169 
7170 static struct bpf_verifier_state *
7171 sanitize_speculative_path(struct bpf_verifier_env *env,
7172 			  const struct bpf_insn *insn,
7173 			  u32 next_idx, u32 curr_idx)
7174 {
7175 	struct bpf_verifier_state *branch;
7176 	struct bpf_reg_state *regs;
7177 
7178 	branch = push_stack(env, next_idx, curr_idx, true);
7179 	if (branch && insn) {
7180 		regs = branch->frame[branch->curframe]->regs;
7181 		if (BPF_SRC(insn->code) == BPF_K) {
7182 			mark_reg_unknown(env, regs, insn->dst_reg);
7183 		} else if (BPF_SRC(insn->code) == BPF_X) {
7184 			mark_reg_unknown(env, regs, insn->dst_reg);
7185 			mark_reg_unknown(env, regs, insn->src_reg);
7186 		}
7187 	}
7188 	return branch;
7189 }
7190 
7191 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
7192 			    struct bpf_insn *insn,
7193 			    const struct bpf_reg_state *ptr_reg,
7194 			    const struct bpf_reg_state *off_reg,
7195 			    struct bpf_reg_state *dst_reg,
7196 			    struct bpf_sanitize_info *info,
7197 			    const bool commit_window)
7198 {
7199 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
7200 	struct bpf_verifier_state *vstate = env->cur_state;
7201 	bool off_is_imm = tnum_is_const(off_reg->var_off);
7202 	bool off_is_neg = off_reg->smin_value < 0;
7203 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
7204 	u8 opcode = BPF_OP(insn->code);
7205 	u32 alu_state, alu_limit;
7206 	struct bpf_reg_state tmp;
7207 	bool ret;
7208 	int err;
7209 
7210 	if (can_skip_alu_sanitation(env, insn))
7211 		return 0;
7212 
7213 	/* We already marked aux for masking from non-speculative
7214 	 * paths, thus we got here in the first place. We only care
7215 	 * to explore bad access from here.
7216 	 */
7217 	if (vstate->speculative)
7218 		goto do_sim;
7219 
7220 	if (!commit_window) {
7221 		if (!tnum_is_const(off_reg->var_off) &&
7222 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
7223 			return REASON_BOUNDS;
7224 
7225 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
7226 				     (opcode == BPF_SUB && !off_is_neg);
7227 	}
7228 
7229 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
7230 	if (err < 0)
7231 		return err;
7232 
7233 	if (commit_window) {
7234 		/* In commit phase we narrow the masking window based on
7235 		 * the observed pointer move after the simulated operation.
7236 		 */
7237 		alu_state = info->aux.alu_state;
7238 		alu_limit = abs(info->aux.alu_limit - alu_limit);
7239 	} else {
7240 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
7241 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7242 		alu_state |= ptr_is_dst_reg ?
7243 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
7244 
7245 		/* Limit pruning on unknown scalars to enable deep search for
7246 		 * potential masking differences from other program paths.
7247 		 */
7248 		if (!off_is_imm)
7249 			env->explore_alu_limits = true;
7250 	}
7251 
7252 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7253 	if (err < 0)
7254 		return err;
7255 do_sim:
7256 	/* If we're in commit phase, we're done here given we already
7257 	 * pushed the truncated dst_reg into the speculative verification
7258 	 * stack.
7259 	 *
7260 	 * Also, when register is a known constant, we rewrite register-based
7261 	 * operation to immediate-based, and thus do not need masking (and as
7262 	 * a consequence, do not need to simulate the zero-truncation either).
7263 	 */
7264 	if (commit_window || off_is_imm)
7265 		return 0;
7266 
7267 	/* Simulate and find potential out-of-bounds access under
7268 	 * speculative execution from truncation as a result of
7269 	 * masking when off was not within expected range. If off
7270 	 * sits in dst, then we temporarily need to move ptr there
7271 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
7272 	 * for cases where we use K-based arithmetic in one direction
7273 	 * and truncated reg-based in the other in order to explore
7274 	 * bad access.
7275 	 */
7276 	if (!ptr_is_dst_reg) {
7277 		tmp = *dst_reg;
7278 		*dst_reg = *ptr_reg;
7279 	}
7280 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7281 					env->insn_idx);
7282 	if (!ptr_is_dst_reg && ret)
7283 		*dst_reg = tmp;
7284 	return !ret ? REASON_STACK : 0;
7285 }
7286 
7287 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7288 {
7289 	struct bpf_verifier_state *vstate = env->cur_state;
7290 
7291 	/* If we simulate paths under speculation, we don't update the
7292 	 * insn as 'seen' such that when we verify unreachable paths in
7293 	 * the non-speculative domain, sanitize_dead_code() can still
7294 	 * rewrite/sanitize them.
7295 	 */
7296 	if (!vstate->speculative)
7297 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7298 }
7299 
7300 static int sanitize_err(struct bpf_verifier_env *env,
7301 			const struct bpf_insn *insn, int reason,
7302 			const struct bpf_reg_state *off_reg,
7303 			const struct bpf_reg_state *dst_reg)
7304 {
7305 	static const char *err = "pointer arithmetic with it prohibited for !root";
7306 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7307 	u32 dst = insn->dst_reg, src = insn->src_reg;
7308 
7309 	switch (reason) {
7310 	case REASON_BOUNDS:
7311 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7312 			off_reg == dst_reg ? dst : src, err);
7313 		break;
7314 	case REASON_TYPE:
7315 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7316 			off_reg == dst_reg ? src : dst, err);
7317 		break;
7318 	case REASON_PATHS:
7319 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7320 			dst, op, err);
7321 		break;
7322 	case REASON_LIMIT:
7323 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7324 			dst, op, err);
7325 		break;
7326 	case REASON_STACK:
7327 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7328 			dst, err);
7329 		break;
7330 	default:
7331 		verbose(env, "verifier internal error: unknown reason (%d)\n",
7332 			reason);
7333 		break;
7334 	}
7335 
7336 	return -EACCES;
7337 }
7338 
7339 /* check that stack access falls within stack limits and that 'reg' doesn't
7340  * have a variable offset.
7341  *
7342  * Variable offset is prohibited for unprivileged mode for simplicity since it
7343  * requires corresponding support in Spectre masking for stack ALU.  See also
7344  * retrieve_ptr_limit().
7345  *
7346  *
7347  * 'off' includes 'reg->off'.
7348  */
7349 static int check_stack_access_for_ptr_arithmetic(
7350 				struct bpf_verifier_env *env,
7351 				int regno,
7352 				const struct bpf_reg_state *reg,
7353 				int off)
7354 {
7355 	if (!tnum_is_const(reg->var_off)) {
7356 		char tn_buf[48];
7357 
7358 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7359 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7360 			regno, tn_buf, off);
7361 		return -EACCES;
7362 	}
7363 
7364 	if (off >= 0 || off < -MAX_BPF_STACK) {
7365 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
7366 			"prohibited for !root; off=%d\n", regno, off);
7367 		return -EACCES;
7368 	}
7369 
7370 	return 0;
7371 }
7372 
7373 static int sanitize_check_bounds(struct bpf_verifier_env *env,
7374 				 const struct bpf_insn *insn,
7375 				 const struct bpf_reg_state *dst_reg)
7376 {
7377 	u32 dst = insn->dst_reg;
7378 
7379 	/* For unprivileged we require that resulting offset must be in bounds
7380 	 * in order to be able to sanitize access later on.
7381 	 */
7382 	if (env->bypass_spec_v1)
7383 		return 0;
7384 
7385 	switch (dst_reg->type) {
7386 	case PTR_TO_STACK:
7387 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7388 					dst_reg->off + dst_reg->var_off.value))
7389 			return -EACCES;
7390 		break;
7391 	case PTR_TO_MAP_VALUE:
7392 		if (check_map_access(env, dst, dst_reg->off, 1, false)) {
7393 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7394 				"prohibited for !root\n", dst);
7395 			return -EACCES;
7396 		}
7397 		break;
7398 	default:
7399 		break;
7400 	}
7401 
7402 	return 0;
7403 }
7404 
7405 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
7406  * Caller should also handle BPF_MOV case separately.
7407  * If we return -EACCES, caller may want to try again treating pointer as a
7408  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
7409  */
7410 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7411 				   struct bpf_insn *insn,
7412 				   const struct bpf_reg_state *ptr_reg,
7413 				   const struct bpf_reg_state *off_reg)
7414 {
7415 	struct bpf_verifier_state *vstate = env->cur_state;
7416 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
7417 	struct bpf_reg_state *regs = state->regs, *dst_reg;
7418 	bool known = tnum_is_const(off_reg->var_off);
7419 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7420 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7421 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7422 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
7423 	struct bpf_sanitize_info info = {};
7424 	u8 opcode = BPF_OP(insn->code);
7425 	u32 dst = insn->dst_reg;
7426 	int ret;
7427 
7428 	dst_reg = &regs[dst];
7429 
7430 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7431 	    smin_val > smax_val || umin_val > umax_val) {
7432 		/* Taint dst register if offset had invalid bounds derived from
7433 		 * e.g. dead branches.
7434 		 */
7435 		__mark_reg_unknown(env, dst_reg);
7436 		return 0;
7437 	}
7438 
7439 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
7440 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
7441 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7442 			__mark_reg_unknown(env, dst_reg);
7443 			return 0;
7444 		}
7445 
7446 		verbose(env,
7447 			"R%d 32-bit pointer arithmetic prohibited\n",
7448 			dst);
7449 		return -EACCES;
7450 	}
7451 
7452 	if (ptr_reg->type & PTR_MAYBE_NULL) {
7453 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
7454 			dst, reg_type_str(env, ptr_reg->type));
7455 		return -EACCES;
7456 	}
7457 
7458 	switch (base_type(ptr_reg->type)) {
7459 	case CONST_PTR_TO_MAP:
7460 		/* smin_val represents the known value */
7461 		if (known && smin_val == 0 && opcode == BPF_ADD)
7462 			break;
7463 		fallthrough;
7464 	case PTR_TO_PACKET_END:
7465 	case PTR_TO_SOCKET:
7466 	case PTR_TO_SOCK_COMMON:
7467 	case PTR_TO_TCP_SOCK:
7468 	case PTR_TO_XDP_SOCK:
7469 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
7470 			dst, reg_type_str(env, ptr_reg->type));
7471 		return -EACCES;
7472 	default:
7473 		break;
7474 	}
7475 
7476 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7477 	 * The id may be overwritten later if we create a new variable offset.
7478 	 */
7479 	dst_reg->type = ptr_reg->type;
7480 	dst_reg->id = ptr_reg->id;
7481 
7482 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7483 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7484 		return -EINVAL;
7485 
7486 	/* pointer types do not carry 32-bit bounds at the moment. */
7487 	__mark_reg32_unbounded(dst_reg);
7488 
7489 	if (sanitize_needed(opcode)) {
7490 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
7491 				       &info, false);
7492 		if (ret < 0)
7493 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
7494 	}
7495 
7496 	switch (opcode) {
7497 	case BPF_ADD:
7498 		/* We can take a fixed offset as long as it doesn't overflow
7499 		 * the s32 'off' field
7500 		 */
7501 		if (known && (ptr_reg->off + smin_val ==
7502 			      (s64)(s32)(ptr_reg->off + smin_val))) {
7503 			/* pointer += K.  Accumulate it into fixed offset */
7504 			dst_reg->smin_value = smin_ptr;
7505 			dst_reg->smax_value = smax_ptr;
7506 			dst_reg->umin_value = umin_ptr;
7507 			dst_reg->umax_value = umax_ptr;
7508 			dst_reg->var_off = ptr_reg->var_off;
7509 			dst_reg->off = ptr_reg->off + smin_val;
7510 			dst_reg->raw = ptr_reg->raw;
7511 			break;
7512 		}
7513 		/* A new variable offset is created.  Note that off_reg->off
7514 		 * == 0, since it's a scalar.
7515 		 * dst_reg gets the pointer type and since some positive
7516 		 * integer value was added to the pointer, give it a new 'id'
7517 		 * if it's a PTR_TO_PACKET.
7518 		 * this creates a new 'base' pointer, off_reg (variable) gets
7519 		 * added into the variable offset, and we copy the fixed offset
7520 		 * from ptr_reg.
7521 		 */
7522 		if (signed_add_overflows(smin_ptr, smin_val) ||
7523 		    signed_add_overflows(smax_ptr, smax_val)) {
7524 			dst_reg->smin_value = S64_MIN;
7525 			dst_reg->smax_value = S64_MAX;
7526 		} else {
7527 			dst_reg->smin_value = smin_ptr + smin_val;
7528 			dst_reg->smax_value = smax_ptr + smax_val;
7529 		}
7530 		if (umin_ptr + umin_val < umin_ptr ||
7531 		    umax_ptr + umax_val < umax_ptr) {
7532 			dst_reg->umin_value = 0;
7533 			dst_reg->umax_value = U64_MAX;
7534 		} else {
7535 			dst_reg->umin_value = umin_ptr + umin_val;
7536 			dst_reg->umax_value = umax_ptr + umax_val;
7537 		}
7538 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7539 		dst_reg->off = ptr_reg->off;
7540 		dst_reg->raw = ptr_reg->raw;
7541 		if (reg_is_pkt_pointer(ptr_reg)) {
7542 			dst_reg->id = ++env->id_gen;
7543 			/* something was added to pkt_ptr, set range to zero */
7544 			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7545 		}
7546 		break;
7547 	case BPF_SUB:
7548 		if (dst_reg == off_reg) {
7549 			/* scalar -= pointer.  Creates an unknown scalar */
7550 			verbose(env, "R%d tried to subtract pointer from scalar\n",
7551 				dst);
7552 			return -EACCES;
7553 		}
7554 		/* We don't allow subtraction from FP, because (according to
7555 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
7556 		 * be able to deal with it.
7557 		 */
7558 		if (ptr_reg->type == PTR_TO_STACK) {
7559 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
7560 				dst);
7561 			return -EACCES;
7562 		}
7563 		if (known && (ptr_reg->off - smin_val ==
7564 			      (s64)(s32)(ptr_reg->off - smin_val))) {
7565 			/* pointer -= K.  Subtract it from fixed offset */
7566 			dst_reg->smin_value = smin_ptr;
7567 			dst_reg->smax_value = smax_ptr;
7568 			dst_reg->umin_value = umin_ptr;
7569 			dst_reg->umax_value = umax_ptr;
7570 			dst_reg->var_off = ptr_reg->var_off;
7571 			dst_reg->id = ptr_reg->id;
7572 			dst_reg->off = ptr_reg->off - smin_val;
7573 			dst_reg->raw = ptr_reg->raw;
7574 			break;
7575 		}
7576 		/* A new variable offset is created.  If the subtrahend is known
7577 		 * nonnegative, then any reg->range we had before is still good.
7578 		 */
7579 		if (signed_sub_overflows(smin_ptr, smax_val) ||
7580 		    signed_sub_overflows(smax_ptr, smin_val)) {
7581 			/* Overflow possible, we know nothing */
7582 			dst_reg->smin_value = S64_MIN;
7583 			dst_reg->smax_value = S64_MAX;
7584 		} else {
7585 			dst_reg->smin_value = smin_ptr - smax_val;
7586 			dst_reg->smax_value = smax_ptr - smin_val;
7587 		}
7588 		if (umin_ptr < umax_val) {
7589 			/* Overflow possible, we know nothing */
7590 			dst_reg->umin_value = 0;
7591 			dst_reg->umax_value = U64_MAX;
7592 		} else {
7593 			/* Cannot overflow (as long as bounds are consistent) */
7594 			dst_reg->umin_value = umin_ptr - umax_val;
7595 			dst_reg->umax_value = umax_ptr - umin_val;
7596 		}
7597 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7598 		dst_reg->off = ptr_reg->off;
7599 		dst_reg->raw = ptr_reg->raw;
7600 		if (reg_is_pkt_pointer(ptr_reg)) {
7601 			dst_reg->id = ++env->id_gen;
7602 			/* something was added to pkt_ptr, set range to zero */
7603 			if (smin_val < 0)
7604 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7605 		}
7606 		break;
7607 	case BPF_AND:
7608 	case BPF_OR:
7609 	case BPF_XOR:
7610 		/* bitwise ops on pointers are troublesome, prohibit. */
7611 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7612 			dst, bpf_alu_string[opcode >> 4]);
7613 		return -EACCES;
7614 	default:
7615 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
7616 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7617 			dst, bpf_alu_string[opcode >> 4]);
7618 		return -EACCES;
7619 	}
7620 
7621 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7622 		return -EINVAL;
7623 
7624 	__update_reg_bounds(dst_reg);
7625 	__reg_deduce_bounds(dst_reg);
7626 	__reg_bound_offset(dst_reg);
7627 
7628 	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7629 		return -EACCES;
7630 	if (sanitize_needed(opcode)) {
7631 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
7632 				       &info, true);
7633 		if (ret < 0)
7634 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
7635 	}
7636 
7637 	return 0;
7638 }
7639 
7640 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7641 				 struct bpf_reg_state *src_reg)
7642 {
7643 	s32 smin_val = src_reg->s32_min_value;
7644 	s32 smax_val = src_reg->s32_max_value;
7645 	u32 umin_val = src_reg->u32_min_value;
7646 	u32 umax_val = src_reg->u32_max_value;
7647 
7648 	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7649 	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7650 		dst_reg->s32_min_value = S32_MIN;
7651 		dst_reg->s32_max_value = S32_MAX;
7652 	} else {
7653 		dst_reg->s32_min_value += smin_val;
7654 		dst_reg->s32_max_value += smax_val;
7655 	}
7656 	if (dst_reg->u32_min_value + umin_val < umin_val ||
7657 	    dst_reg->u32_max_value + umax_val < umax_val) {
7658 		dst_reg->u32_min_value = 0;
7659 		dst_reg->u32_max_value = U32_MAX;
7660 	} else {
7661 		dst_reg->u32_min_value += umin_val;
7662 		dst_reg->u32_max_value += umax_val;
7663 	}
7664 }
7665 
7666 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7667 			       struct bpf_reg_state *src_reg)
7668 {
7669 	s64 smin_val = src_reg->smin_value;
7670 	s64 smax_val = src_reg->smax_value;
7671 	u64 umin_val = src_reg->umin_value;
7672 	u64 umax_val = src_reg->umax_value;
7673 
7674 	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
7675 	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
7676 		dst_reg->smin_value = S64_MIN;
7677 		dst_reg->smax_value = S64_MAX;
7678 	} else {
7679 		dst_reg->smin_value += smin_val;
7680 		dst_reg->smax_value += smax_val;
7681 	}
7682 	if (dst_reg->umin_value + umin_val < umin_val ||
7683 	    dst_reg->umax_value + umax_val < umax_val) {
7684 		dst_reg->umin_value = 0;
7685 		dst_reg->umax_value = U64_MAX;
7686 	} else {
7687 		dst_reg->umin_value += umin_val;
7688 		dst_reg->umax_value += umax_val;
7689 	}
7690 }
7691 
7692 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
7693 				 struct bpf_reg_state *src_reg)
7694 {
7695 	s32 smin_val = src_reg->s32_min_value;
7696 	s32 smax_val = src_reg->s32_max_value;
7697 	u32 umin_val = src_reg->u32_min_value;
7698 	u32 umax_val = src_reg->u32_max_value;
7699 
7700 	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
7701 	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
7702 		/* Overflow possible, we know nothing */
7703 		dst_reg->s32_min_value = S32_MIN;
7704 		dst_reg->s32_max_value = S32_MAX;
7705 	} else {
7706 		dst_reg->s32_min_value -= smax_val;
7707 		dst_reg->s32_max_value -= smin_val;
7708 	}
7709 	if (dst_reg->u32_min_value < umax_val) {
7710 		/* Overflow possible, we know nothing */
7711 		dst_reg->u32_min_value = 0;
7712 		dst_reg->u32_max_value = U32_MAX;
7713 	} else {
7714 		/* Cannot overflow (as long as bounds are consistent) */
7715 		dst_reg->u32_min_value -= umax_val;
7716 		dst_reg->u32_max_value -= umin_val;
7717 	}
7718 }
7719 
7720 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
7721 			       struct bpf_reg_state *src_reg)
7722 {
7723 	s64 smin_val = src_reg->smin_value;
7724 	s64 smax_val = src_reg->smax_value;
7725 	u64 umin_val = src_reg->umin_value;
7726 	u64 umax_val = src_reg->umax_value;
7727 
7728 	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
7729 	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
7730 		/* Overflow possible, we know nothing */
7731 		dst_reg->smin_value = S64_MIN;
7732 		dst_reg->smax_value = S64_MAX;
7733 	} else {
7734 		dst_reg->smin_value -= smax_val;
7735 		dst_reg->smax_value -= smin_val;
7736 	}
7737 	if (dst_reg->umin_value < umax_val) {
7738 		/* Overflow possible, we know nothing */
7739 		dst_reg->umin_value = 0;
7740 		dst_reg->umax_value = U64_MAX;
7741 	} else {
7742 		/* Cannot overflow (as long as bounds are consistent) */
7743 		dst_reg->umin_value -= umax_val;
7744 		dst_reg->umax_value -= umin_val;
7745 	}
7746 }
7747 
7748 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
7749 				 struct bpf_reg_state *src_reg)
7750 {
7751 	s32 smin_val = src_reg->s32_min_value;
7752 	u32 umin_val = src_reg->u32_min_value;
7753 	u32 umax_val = src_reg->u32_max_value;
7754 
7755 	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
7756 		/* Ain't nobody got time to multiply that sign */
7757 		__mark_reg32_unbounded(dst_reg);
7758 		return;
7759 	}
7760 	/* Both values are positive, so we can work with unsigned and
7761 	 * copy the result to signed (unless it exceeds S32_MAX).
7762 	 */
7763 	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
7764 		/* Potential overflow, we know nothing */
7765 		__mark_reg32_unbounded(dst_reg);
7766 		return;
7767 	}
7768 	dst_reg->u32_min_value *= umin_val;
7769 	dst_reg->u32_max_value *= umax_val;
7770 	if (dst_reg->u32_max_value > S32_MAX) {
7771 		/* Overflow possible, we know nothing */
7772 		dst_reg->s32_min_value = S32_MIN;
7773 		dst_reg->s32_max_value = S32_MAX;
7774 	} else {
7775 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7776 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7777 	}
7778 }
7779 
7780 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
7781 			       struct bpf_reg_state *src_reg)
7782 {
7783 	s64 smin_val = src_reg->smin_value;
7784 	u64 umin_val = src_reg->umin_value;
7785 	u64 umax_val = src_reg->umax_value;
7786 
7787 	if (smin_val < 0 || dst_reg->smin_value < 0) {
7788 		/* Ain't nobody got time to multiply that sign */
7789 		__mark_reg64_unbounded(dst_reg);
7790 		return;
7791 	}
7792 	/* Both values are positive, so we can work with unsigned and
7793 	 * copy the result to signed (unless it exceeds S64_MAX).
7794 	 */
7795 	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
7796 		/* Potential overflow, we know nothing */
7797 		__mark_reg64_unbounded(dst_reg);
7798 		return;
7799 	}
7800 	dst_reg->umin_value *= umin_val;
7801 	dst_reg->umax_value *= umax_val;
7802 	if (dst_reg->umax_value > S64_MAX) {
7803 		/* Overflow possible, we know nothing */
7804 		dst_reg->smin_value = S64_MIN;
7805 		dst_reg->smax_value = S64_MAX;
7806 	} else {
7807 		dst_reg->smin_value = dst_reg->umin_value;
7808 		dst_reg->smax_value = dst_reg->umax_value;
7809 	}
7810 }
7811 
7812 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
7813 				 struct bpf_reg_state *src_reg)
7814 {
7815 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7816 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7817 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7818 	s32 smin_val = src_reg->s32_min_value;
7819 	u32 umax_val = src_reg->u32_max_value;
7820 
7821 	if (src_known && dst_known) {
7822 		__mark_reg32_known(dst_reg, var32_off.value);
7823 		return;
7824 	}
7825 
7826 	/* We get our minimum from the var_off, since that's inherently
7827 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
7828 	 */
7829 	dst_reg->u32_min_value = var32_off.value;
7830 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
7831 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7832 		/* Lose signed bounds when ANDing negative numbers,
7833 		 * ain't nobody got time for that.
7834 		 */
7835 		dst_reg->s32_min_value = S32_MIN;
7836 		dst_reg->s32_max_value = S32_MAX;
7837 	} else {
7838 		/* ANDing two positives gives a positive, so safe to
7839 		 * cast result into s64.
7840 		 */
7841 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7842 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7843 	}
7844 }
7845 
7846 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
7847 			       struct bpf_reg_state *src_reg)
7848 {
7849 	bool src_known = tnum_is_const(src_reg->var_off);
7850 	bool dst_known = tnum_is_const(dst_reg->var_off);
7851 	s64 smin_val = src_reg->smin_value;
7852 	u64 umax_val = src_reg->umax_value;
7853 
7854 	if (src_known && dst_known) {
7855 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7856 		return;
7857 	}
7858 
7859 	/* We get our minimum from the var_off, since that's inherently
7860 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
7861 	 */
7862 	dst_reg->umin_value = dst_reg->var_off.value;
7863 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
7864 	if (dst_reg->smin_value < 0 || smin_val < 0) {
7865 		/* Lose signed bounds when ANDing negative numbers,
7866 		 * ain't nobody got time for that.
7867 		 */
7868 		dst_reg->smin_value = S64_MIN;
7869 		dst_reg->smax_value = S64_MAX;
7870 	} else {
7871 		/* ANDing two positives gives a positive, so safe to
7872 		 * cast result into s64.
7873 		 */
7874 		dst_reg->smin_value = dst_reg->umin_value;
7875 		dst_reg->smax_value = dst_reg->umax_value;
7876 	}
7877 	/* We may learn something more from the var_off */
7878 	__update_reg_bounds(dst_reg);
7879 }
7880 
7881 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
7882 				struct bpf_reg_state *src_reg)
7883 {
7884 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7885 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7886 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7887 	s32 smin_val = src_reg->s32_min_value;
7888 	u32 umin_val = src_reg->u32_min_value;
7889 
7890 	if (src_known && dst_known) {
7891 		__mark_reg32_known(dst_reg, var32_off.value);
7892 		return;
7893 	}
7894 
7895 	/* We get our maximum from the var_off, and our minimum is the
7896 	 * maximum of the operands' minima
7897 	 */
7898 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
7899 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7900 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7901 		/* Lose signed bounds when ORing negative numbers,
7902 		 * ain't nobody got time for that.
7903 		 */
7904 		dst_reg->s32_min_value = S32_MIN;
7905 		dst_reg->s32_max_value = S32_MAX;
7906 	} else {
7907 		/* ORing two positives gives a positive, so safe to
7908 		 * cast result into s64.
7909 		 */
7910 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7911 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7912 	}
7913 }
7914 
7915 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
7916 			      struct bpf_reg_state *src_reg)
7917 {
7918 	bool src_known = tnum_is_const(src_reg->var_off);
7919 	bool dst_known = tnum_is_const(dst_reg->var_off);
7920 	s64 smin_val = src_reg->smin_value;
7921 	u64 umin_val = src_reg->umin_value;
7922 
7923 	if (src_known && dst_known) {
7924 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7925 		return;
7926 	}
7927 
7928 	/* We get our maximum from the var_off, and our minimum is the
7929 	 * maximum of the operands' minima
7930 	 */
7931 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
7932 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7933 	if (dst_reg->smin_value < 0 || smin_val < 0) {
7934 		/* Lose signed bounds when ORing negative numbers,
7935 		 * ain't nobody got time for that.
7936 		 */
7937 		dst_reg->smin_value = S64_MIN;
7938 		dst_reg->smax_value = S64_MAX;
7939 	} else {
7940 		/* ORing two positives gives a positive, so safe to
7941 		 * cast result into s64.
7942 		 */
7943 		dst_reg->smin_value = dst_reg->umin_value;
7944 		dst_reg->smax_value = dst_reg->umax_value;
7945 	}
7946 	/* We may learn something more from the var_off */
7947 	__update_reg_bounds(dst_reg);
7948 }
7949 
7950 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
7951 				 struct bpf_reg_state *src_reg)
7952 {
7953 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7954 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7955 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7956 	s32 smin_val = src_reg->s32_min_value;
7957 
7958 	if (src_known && dst_known) {
7959 		__mark_reg32_known(dst_reg, var32_off.value);
7960 		return;
7961 	}
7962 
7963 	/* We get both minimum and maximum from the var32_off. */
7964 	dst_reg->u32_min_value = var32_off.value;
7965 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7966 
7967 	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
7968 		/* XORing two positive sign numbers gives a positive,
7969 		 * so safe to cast u32 result into s32.
7970 		 */
7971 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7972 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7973 	} else {
7974 		dst_reg->s32_min_value = S32_MIN;
7975 		dst_reg->s32_max_value = S32_MAX;
7976 	}
7977 }
7978 
7979 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
7980 			       struct bpf_reg_state *src_reg)
7981 {
7982 	bool src_known = tnum_is_const(src_reg->var_off);
7983 	bool dst_known = tnum_is_const(dst_reg->var_off);
7984 	s64 smin_val = src_reg->smin_value;
7985 
7986 	if (src_known && dst_known) {
7987 		/* dst_reg->var_off.value has been updated earlier */
7988 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7989 		return;
7990 	}
7991 
7992 	/* We get both minimum and maximum from the var_off. */
7993 	dst_reg->umin_value = dst_reg->var_off.value;
7994 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7995 
7996 	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
7997 		/* XORing two positive sign numbers gives a positive,
7998 		 * so safe to cast u64 result into s64.
7999 		 */
8000 		dst_reg->smin_value = dst_reg->umin_value;
8001 		dst_reg->smax_value = dst_reg->umax_value;
8002 	} else {
8003 		dst_reg->smin_value = S64_MIN;
8004 		dst_reg->smax_value = S64_MAX;
8005 	}
8006 
8007 	__update_reg_bounds(dst_reg);
8008 }
8009 
8010 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8011 				   u64 umin_val, u64 umax_val)
8012 {
8013 	/* We lose all sign bit information (except what we can pick
8014 	 * up from var_off)
8015 	 */
8016 	dst_reg->s32_min_value = S32_MIN;
8017 	dst_reg->s32_max_value = S32_MAX;
8018 	/* If we might shift our top bit out, then we know nothing */
8019 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
8020 		dst_reg->u32_min_value = 0;
8021 		dst_reg->u32_max_value = U32_MAX;
8022 	} else {
8023 		dst_reg->u32_min_value <<= umin_val;
8024 		dst_reg->u32_max_value <<= umax_val;
8025 	}
8026 }
8027 
8028 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8029 				 struct bpf_reg_state *src_reg)
8030 {
8031 	u32 umax_val = src_reg->u32_max_value;
8032 	u32 umin_val = src_reg->u32_min_value;
8033 	/* u32 alu operation will zext upper bits */
8034 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
8035 
8036 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8037 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
8038 	/* Not required but being careful mark reg64 bounds as unknown so
8039 	 * that we are forced to pick them up from tnum and zext later and
8040 	 * if some path skips this step we are still safe.
8041 	 */
8042 	__mark_reg64_unbounded(dst_reg);
8043 	__update_reg32_bounds(dst_reg);
8044 }
8045 
8046 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
8047 				   u64 umin_val, u64 umax_val)
8048 {
8049 	/* Special case <<32 because it is a common compiler pattern to sign
8050 	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
8051 	 * positive we know this shift will also be positive so we can track
8052 	 * bounds correctly. Otherwise we lose all sign bit information except
8053 	 * what we can pick up from var_off. Perhaps we can generalize this
8054 	 * later to shifts of any length.
8055 	 */
8056 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
8057 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
8058 	else
8059 		dst_reg->smax_value = S64_MAX;
8060 
8061 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
8062 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
8063 	else
8064 		dst_reg->smin_value = S64_MIN;
8065 
8066 	/* If we might shift our top bit out, then we know nothing */
8067 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
8068 		dst_reg->umin_value = 0;
8069 		dst_reg->umax_value = U64_MAX;
8070 	} else {
8071 		dst_reg->umin_value <<= umin_val;
8072 		dst_reg->umax_value <<= umax_val;
8073 	}
8074 }
8075 
8076 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
8077 			       struct bpf_reg_state *src_reg)
8078 {
8079 	u64 umax_val = src_reg->umax_value;
8080 	u64 umin_val = src_reg->umin_value;
8081 
8082 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
8083 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
8084 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8085 
8086 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
8087 	/* We may learn something more from the var_off */
8088 	__update_reg_bounds(dst_reg);
8089 }
8090 
8091 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
8092 				 struct bpf_reg_state *src_reg)
8093 {
8094 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
8095 	u32 umax_val = src_reg->u32_max_value;
8096 	u32 umin_val = src_reg->u32_min_value;
8097 
8098 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8099 	 * be negative, then either:
8100 	 * 1) src_reg might be zero, so the sign bit of the result is
8101 	 *    unknown, so we lose our signed bounds
8102 	 * 2) it's known negative, thus the unsigned bounds capture the
8103 	 *    signed bounds
8104 	 * 3) the signed bounds cross zero, so they tell us nothing
8105 	 *    about the result
8106 	 * If the value in dst_reg is known nonnegative, then again the
8107 	 * unsigned bounds capture the signed bounds.
8108 	 * Thus, in all cases it suffices to blow away our signed bounds
8109 	 * and rely on inferring new ones from the unsigned bounds and
8110 	 * var_off of the result.
8111 	 */
8112 	dst_reg->s32_min_value = S32_MIN;
8113 	dst_reg->s32_max_value = S32_MAX;
8114 
8115 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
8116 	dst_reg->u32_min_value >>= umax_val;
8117 	dst_reg->u32_max_value >>= umin_val;
8118 
8119 	__mark_reg64_unbounded(dst_reg);
8120 	__update_reg32_bounds(dst_reg);
8121 }
8122 
8123 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
8124 			       struct bpf_reg_state *src_reg)
8125 {
8126 	u64 umax_val = src_reg->umax_value;
8127 	u64 umin_val = src_reg->umin_value;
8128 
8129 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8130 	 * be negative, then either:
8131 	 * 1) src_reg might be zero, so the sign bit of the result is
8132 	 *    unknown, so we lose our signed bounds
8133 	 * 2) it's known negative, thus the unsigned bounds capture the
8134 	 *    signed bounds
8135 	 * 3) the signed bounds cross zero, so they tell us nothing
8136 	 *    about the result
8137 	 * If the value in dst_reg is known nonnegative, then again the
8138 	 * unsigned bounds capture the signed bounds.
8139 	 * Thus, in all cases it suffices to blow away our signed bounds
8140 	 * and rely on inferring new ones from the unsigned bounds and
8141 	 * var_off of the result.
8142 	 */
8143 	dst_reg->smin_value = S64_MIN;
8144 	dst_reg->smax_value = S64_MAX;
8145 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
8146 	dst_reg->umin_value >>= umax_val;
8147 	dst_reg->umax_value >>= umin_val;
8148 
8149 	/* Its not easy to operate on alu32 bounds here because it depends
8150 	 * on bits being shifted in. Take easy way out and mark unbounded
8151 	 * so we can recalculate later from tnum.
8152 	 */
8153 	__mark_reg32_unbounded(dst_reg);
8154 	__update_reg_bounds(dst_reg);
8155 }
8156 
8157 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
8158 				  struct bpf_reg_state *src_reg)
8159 {
8160 	u64 umin_val = src_reg->u32_min_value;
8161 
8162 	/* Upon reaching here, src_known is true and
8163 	 * umax_val is equal to umin_val.
8164 	 */
8165 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
8166 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
8167 
8168 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
8169 
8170 	/* blow away the dst_reg umin_value/umax_value and rely on
8171 	 * dst_reg var_off to refine the result.
8172 	 */
8173 	dst_reg->u32_min_value = 0;
8174 	dst_reg->u32_max_value = U32_MAX;
8175 
8176 	__mark_reg64_unbounded(dst_reg);
8177 	__update_reg32_bounds(dst_reg);
8178 }
8179 
8180 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
8181 				struct bpf_reg_state *src_reg)
8182 {
8183 	u64 umin_val = src_reg->umin_value;
8184 
8185 	/* Upon reaching here, src_known is true and umax_val is equal
8186 	 * to umin_val.
8187 	 */
8188 	dst_reg->smin_value >>= umin_val;
8189 	dst_reg->smax_value >>= umin_val;
8190 
8191 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
8192 
8193 	/* blow away the dst_reg umin_value/umax_value and rely on
8194 	 * dst_reg var_off to refine the result.
8195 	 */
8196 	dst_reg->umin_value = 0;
8197 	dst_reg->umax_value = U64_MAX;
8198 
8199 	/* Its not easy to operate on alu32 bounds here because it depends
8200 	 * on bits being shifted in from upper 32-bits. Take easy way out
8201 	 * and mark unbounded so we can recalculate later from tnum.
8202 	 */
8203 	__mark_reg32_unbounded(dst_reg);
8204 	__update_reg_bounds(dst_reg);
8205 }
8206 
8207 /* WARNING: This function does calculations on 64-bit values, but the actual
8208  * execution may occur on 32-bit values. Therefore, things like bitshifts
8209  * need extra checks in the 32-bit case.
8210  */
8211 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8212 				      struct bpf_insn *insn,
8213 				      struct bpf_reg_state *dst_reg,
8214 				      struct bpf_reg_state src_reg)
8215 {
8216 	struct bpf_reg_state *regs = cur_regs(env);
8217 	u8 opcode = BPF_OP(insn->code);
8218 	bool src_known;
8219 	s64 smin_val, smax_val;
8220 	u64 umin_val, umax_val;
8221 	s32 s32_min_val, s32_max_val;
8222 	u32 u32_min_val, u32_max_val;
8223 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
8224 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
8225 	int ret;
8226 
8227 	smin_val = src_reg.smin_value;
8228 	smax_val = src_reg.smax_value;
8229 	umin_val = src_reg.umin_value;
8230 	umax_val = src_reg.umax_value;
8231 
8232 	s32_min_val = src_reg.s32_min_value;
8233 	s32_max_val = src_reg.s32_max_value;
8234 	u32_min_val = src_reg.u32_min_value;
8235 	u32_max_val = src_reg.u32_max_value;
8236 
8237 	if (alu32) {
8238 		src_known = tnum_subreg_is_const(src_reg.var_off);
8239 		if ((src_known &&
8240 		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
8241 		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
8242 			/* Taint dst register if offset had invalid bounds
8243 			 * derived from e.g. dead branches.
8244 			 */
8245 			__mark_reg_unknown(env, dst_reg);
8246 			return 0;
8247 		}
8248 	} else {
8249 		src_known = tnum_is_const(src_reg.var_off);
8250 		if ((src_known &&
8251 		     (smin_val != smax_val || umin_val != umax_val)) ||
8252 		    smin_val > smax_val || umin_val > umax_val) {
8253 			/* Taint dst register if offset had invalid bounds
8254 			 * derived from e.g. dead branches.
8255 			 */
8256 			__mark_reg_unknown(env, dst_reg);
8257 			return 0;
8258 		}
8259 	}
8260 
8261 	if (!src_known &&
8262 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8263 		__mark_reg_unknown(env, dst_reg);
8264 		return 0;
8265 	}
8266 
8267 	if (sanitize_needed(opcode)) {
8268 		ret = sanitize_val_alu(env, insn);
8269 		if (ret < 0)
8270 			return sanitize_err(env, insn, ret, NULL, NULL);
8271 	}
8272 
8273 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8274 	 * There are two classes of instructions: The first class we track both
8275 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
8276 	 * greatest amount of precision when alu operations are mixed with jmp32
8277 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8278 	 * and BPF_OR. This is possible because these ops have fairly easy to
8279 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8280 	 * See alu32 verifier tests for examples. The second class of
8281 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8282 	 * with regards to tracking sign/unsigned bounds because the bits may
8283 	 * cross subreg boundaries in the alu64 case. When this happens we mark
8284 	 * the reg unbounded in the subreg bound space and use the resulting
8285 	 * tnum to calculate an approximation of the sign/unsigned bounds.
8286 	 */
8287 	switch (opcode) {
8288 	case BPF_ADD:
8289 		scalar32_min_max_add(dst_reg, &src_reg);
8290 		scalar_min_max_add(dst_reg, &src_reg);
8291 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8292 		break;
8293 	case BPF_SUB:
8294 		scalar32_min_max_sub(dst_reg, &src_reg);
8295 		scalar_min_max_sub(dst_reg, &src_reg);
8296 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8297 		break;
8298 	case BPF_MUL:
8299 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8300 		scalar32_min_max_mul(dst_reg, &src_reg);
8301 		scalar_min_max_mul(dst_reg, &src_reg);
8302 		break;
8303 	case BPF_AND:
8304 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8305 		scalar32_min_max_and(dst_reg, &src_reg);
8306 		scalar_min_max_and(dst_reg, &src_reg);
8307 		break;
8308 	case BPF_OR:
8309 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8310 		scalar32_min_max_or(dst_reg, &src_reg);
8311 		scalar_min_max_or(dst_reg, &src_reg);
8312 		break;
8313 	case BPF_XOR:
8314 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8315 		scalar32_min_max_xor(dst_reg, &src_reg);
8316 		scalar_min_max_xor(dst_reg, &src_reg);
8317 		break;
8318 	case BPF_LSH:
8319 		if (umax_val >= insn_bitness) {
8320 			/* Shifts greater than 31 or 63 are undefined.
8321 			 * This includes shifts by a negative number.
8322 			 */
8323 			mark_reg_unknown(env, regs, insn->dst_reg);
8324 			break;
8325 		}
8326 		if (alu32)
8327 			scalar32_min_max_lsh(dst_reg, &src_reg);
8328 		else
8329 			scalar_min_max_lsh(dst_reg, &src_reg);
8330 		break;
8331 	case BPF_RSH:
8332 		if (umax_val >= insn_bitness) {
8333 			/* Shifts greater than 31 or 63 are undefined.
8334 			 * This includes shifts by a negative number.
8335 			 */
8336 			mark_reg_unknown(env, regs, insn->dst_reg);
8337 			break;
8338 		}
8339 		if (alu32)
8340 			scalar32_min_max_rsh(dst_reg, &src_reg);
8341 		else
8342 			scalar_min_max_rsh(dst_reg, &src_reg);
8343 		break;
8344 	case BPF_ARSH:
8345 		if (umax_val >= insn_bitness) {
8346 			/* Shifts greater than 31 or 63 are undefined.
8347 			 * This includes shifts by a negative number.
8348 			 */
8349 			mark_reg_unknown(env, regs, insn->dst_reg);
8350 			break;
8351 		}
8352 		if (alu32)
8353 			scalar32_min_max_arsh(dst_reg, &src_reg);
8354 		else
8355 			scalar_min_max_arsh(dst_reg, &src_reg);
8356 		break;
8357 	default:
8358 		mark_reg_unknown(env, regs, insn->dst_reg);
8359 		break;
8360 	}
8361 
8362 	/* ALU32 ops are zero extended into 64bit register */
8363 	if (alu32)
8364 		zext_32_to_64(dst_reg);
8365 
8366 	__update_reg_bounds(dst_reg);
8367 	__reg_deduce_bounds(dst_reg);
8368 	__reg_bound_offset(dst_reg);
8369 	return 0;
8370 }
8371 
8372 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8373  * and var_off.
8374  */
8375 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8376 				   struct bpf_insn *insn)
8377 {
8378 	struct bpf_verifier_state *vstate = env->cur_state;
8379 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8380 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
8381 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8382 	u8 opcode = BPF_OP(insn->code);
8383 	int err;
8384 
8385 	dst_reg = &regs[insn->dst_reg];
8386 	src_reg = NULL;
8387 	if (dst_reg->type != SCALAR_VALUE)
8388 		ptr_reg = dst_reg;
8389 	else
8390 		/* Make sure ID is cleared otherwise dst_reg min/max could be
8391 		 * incorrectly propagated into other registers by find_equal_scalars()
8392 		 */
8393 		dst_reg->id = 0;
8394 	if (BPF_SRC(insn->code) == BPF_X) {
8395 		src_reg = &regs[insn->src_reg];
8396 		if (src_reg->type != SCALAR_VALUE) {
8397 			if (dst_reg->type != SCALAR_VALUE) {
8398 				/* Combining two pointers by any ALU op yields
8399 				 * an arbitrary scalar. Disallow all math except
8400 				 * pointer subtraction
8401 				 */
8402 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8403 					mark_reg_unknown(env, regs, insn->dst_reg);
8404 					return 0;
8405 				}
8406 				verbose(env, "R%d pointer %s pointer prohibited\n",
8407 					insn->dst_reg,
8408 					bpf_alu_string[opcode >> 4]);
8409 				return -EACCES;
8410 			} else {
8411 				/* scalar += pointer
8412 				 * This is legal, but we have to reverse our
8413 				 * src/dest handling in computing the range
8414 				 */
8415 				err = mark_chain_precision(env, insn->dst_reg);
8416 				if (err)
8417 					return err;
8418 				return adjust_ptr_min_max_vals(env, insn,
8419 							       src_reg, dst_reg);
8420 			}
8421 		} else if (ptr_reg) {
8422 			/* pointer += scalar */
8423 			err = mark_chain_precision(env, insn->src_reg);
8424 			if (err)
8425 				return err;
8426 			return adjust_ptr_min_max_vals(env, insn,
8427 						       dst_reg, src_reg);
8428 		}
8429 	} else {
8430 		/* Pretend the src is a reg with a known value, since we only
8431 		 * need to be able to read from this state.
8432 		 */
8433 		off_reg.type = SCALAR_VALUE;
8434 		__mark_reg_known(&off_reg, insn->imm);
8435 		src_reg = &off_reg;
8436 		if (ptr_reg) /* pointer += K */
8437 			return adjust_ptr_min_max_vals(env, insn,
8438 						       ptr_reg, src_reg);
8439 	}
8440 
8441 	/* Got here implies adding two SCALAR_VALUEs */
8442 	if (WARN_ON_ONCE(ptr_reg)) {
8443 		print_verifier_state(env, state, true);
8444 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
8445 		return -EINVAL;
8446 	}
8447 	if (WARN_ON(!src_reg)) {
8448 		print_verifier_state(env, state, true);
8449 		verbose(env, "verifier internal error: no src_reg\n");
8450 		return -EINVAL;
8451 	}
8452 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
8453 }
8454 
8455 /* check validity of 32-bit and 64-bit arithmetic operations */
8456 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8457 {
8458 	struct bpf_reg_state *regs = cur_regs(env);
8459 	u8 opcode = BPF_OP(insn->code);
8460 	int err;
8461 
8462 	if (opcode == BPF_END || opcode == BPF_NEG) {
8463 		if (opcode == BPF_NEG) {
8464 			if (BPF_SRC(insn->code) != 0 ||
8465 			    insn->src_reg != BPF_REG_0 ||
8466 			    insn->off != 0 || insn->imm != 0) {
8467 				verbose(env, "BPF_NEG uses reserved fields\n");
8468 				return -EINVAL;
8469 			}
8470 		} else {
8471 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
8472 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8473 			    BPF_CLASS(insn->code) == BPF_ALU64) {
8474 				verbose(env, "BPF_END uses reserved fields\n");
8475 				return -EINVAL;
8476 			}
8477 		}
8478 
8479 		/* check src operand */
8480 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8481 		if (err)
8482 			return err;
8483 
8484 		if (is_pointer_value(env, insn->dst_reg)) {
8485 			verbose(env, "R%d pointer arithmetic prohibited\n",
8486 				insn->dst_reg);
8487 			return -EACCES;
8488 		}
8489 
8490 		/* check dest operand */
8491 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
8492 		if (err)
8493 			return err;
8494 
8495 	} else if (opcode == BPF_MOV) {
8496 
8497 		if (BPF_SRC(insn->code) == BPF_X) {
8498 			if (insn->imm != 0 || insn->off != 0) {
8499 				verbose(env, "BPF_MOV uses reserved fields\n");
8500 				return -EINVAL;
8501 			}
8502 
8503 			/* check src operand */
8504 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
8505 			if (err)
8506 				return err;
8507 		} else {
8508 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8509 				verbose(env, "BPF_MOV uses reserved fields\n");
8510 				return -EINVAL;
8511 			}
8512 		}
8513 
8514 		/* check dest operand, mark as required later */
8515 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8516 		if (err)
8517 			return err;
8518 
8519 		if (BPF_SRC(insn->code) == BPF_X) {
8520 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
8521 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8522 
8523 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
8524 				/* case: R1 = R2
8525 				 * copy register state to dest reg
8526 				 */
8527 				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8528 					/* Assign src and dst registers the same ID
8529 					 * that will be used by find_equal_scalars()
8530 					 * to propagate min/max range.
8531 					 */
8532 					src_reg->id = ++env->id_gen;
8533 				*dst_reg = *src_reg;
8534 				dst_reg->live |= REG_LIVE_WRITTEN;
8535 				dst_reg->subreg_def = DEF_NOT_SUBREG;
8536 			} else {
8537 				/* R1 = (u32) R2 */
8538 				if (is_pointer_value(env, insn->src_reg)) {
8539 					verbose(env,
8540 						"R%d partial copy of pointer\n",
8541 						insn->src_reg);
8542 					return -EACCES;
8543 				} else if (src_reg->type == SCALAR_VALUE) {
8544 					*dst_reg = *src_reg;
8545 					/* Make sure ID is cleared otherwise
8546 					 * dst_reg min/max could be incorrectly
8547 					 * propagated into src_reg by find_equal_scalars()
8548 					 */
8549 					dst_reg->id = 0;
8550 					dst_reg->live |= REG_LIVE_WRITTEN;
8551 					dst_reg->subreg_def = env->insn_idx + 1;
8552 				} else {
8553 					mark_reg_unknown(env, regs,
8554 							 insn->dst_reg);
8555 				}
8556 				zext_32_to_64(dst_reg);
8557 
8558 				__update_reg_bounds(dst_reg);
8559 				__reg_deduce_bounds(dst_reg);
8560 				__reg_bound_offset(dst_reg);
8561 			}
8562 		} else {
8563 			/* case: R = imm
8564 			 * remember the value we stored into this reg
8565 			 */
8566 			/* clear any state __mark_reg_known doesn't set */
8567 			mark_reg_unknown(env, regs, insn->dst_reg);
8568 			regs[insn->dst_reg].type = SCALAR_VALUE;
8569 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
8570 				__mark_reg_known(regs + insn->dst_reg,
8571 						 insn->imm);
8572 			} else {
8573 				__mark_reg_known(regs + insn->dst_reg,
8574 						 (u32)insn->imm);
8575 			}
8576 		}
8577 
8578 	} else if (opcode > BPF_END) {
8579 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
8580 		return -EINVAL;
8581 
8582 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
8583 
8584 		if (BPF_SRC(insn->code) == BPF_X) {
8585 			if (insn->imm != 0 || insn->off != 0) {
8586 				verbose(env, "BPF_ALU uses reserved fields\n");
8587 				return -EINVAL;
8588 			}
8589 			/* check src1 operand */
8590 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
8591 			if (err)
8592 				return err;
8593 		} else {
8594 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8595 				verbose(env, "BPF_ALU uses reserved fields\n");
8596 				return -EINVAL;
8597 			}
8598 		}
8599 
8600 		/* check src2 operand */
8601 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8602 		if (err)
8603 			return err;
8604 
8605 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8606 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
8607 			verbose(env, "div by zero\n");
8608 			return -EINVAL;
8609 		}
8610 
8611 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8612 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8613 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8614 
8615 			if (insn->imm < 0 || insn->imm >= size) {
8616 				verbose(env, "invalid shift %d\n", insn->imm);
8617 				return -EINVAL;
8618 			}
8619 		}
8620 
8621 		/* check dest operand */
8622 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8623 		if (err)
8624 			return err;
8625 
8626 		return adjust_reg_min_max_vals(env, insn);
8627 	}
8628 
8629 	return 0;
8630 }
8631 
8632 static void __find_good_pkt_pointers(struct bpf_func_state *state,
8633 				     struct bpf_reg_state *dst_reg,
8634 				     enum bpf_reg_type type, int new_range)
8635 {
8636 	struct bpf_reg_state *reg;
8637 	int i;
8638 
8639 	for (i = 0; i < MAX_BPF_REG; i++) {
8640 		reg = &state->regs[i];
8641 		if (reg->type == type && reg->id == dst_reg->id)
8642 			/* keep the maximum range already checked */
8643 			reg->range = max(reg->range, new_range);
8644 	}
8645 
8646 	bpf_for_each_spilled_reg(i, state, reg) {
8647 		if (!reg)
8648 			continue;
8649 		if (reg->type == type && reg->id == dst_reg->id)
8650 			reg->range = max(reg->range, new_range);
8651 	}
8652 }
8653 
8654 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
8655 				   struct bpf_reg_state *dst_reg,
8656 				   enum bpf_reg_type type,
8657 				   bool range_right_open)
8658 {
8659 	int new_range, i;
8660 
8661 	if (dst_reg->off < 0 ||
8662 	    (dst_reg->off == 0 && range_right_open))
8663 		/* This doesn't give us any range */
8664 		return;
8665 
8666 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
8667 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
8668 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
8669 		 * than pkt_end, but that's because it's also less than pkt.
8670 		 */
8671 		return;
8672 
8673 	new_range = dst_reg->off;
8674 	if (range_right_open)
8675 		new_range++;
8676 
8677 	/* Examples for register markings:
8678 	 *
8679 	 * pkt_data in dst register:
8680 	 *
8681 	 *   r2 = r3;
8682 	 *   r2 += 8;
8683 	 *   if (r2 > pkt_end) goto <handle exception>
8684 	 *   <access okay>
8685 	 *
8686 	 *   r2 = r3;
8687 	 *   r2 += 8;
8688 	 *   if (r2 < pkt_end) goto <access okay>
8689 	 *   <handle exception>
8690 	 *
8691 	 *   Where:
8692 	 *     r2 == dst_reg, pkt_end == src_reg
8693 	 *     r2=pkt(id=n,off=8,r=0)
8694 	 *     r3=pkt(id=n,off=0,r=0)
8695 	 *
8696 	 * pkt_data in src register:
8697 	 *
8698 	 *   r2 = r3;
8699 	 *   r2 += 8;
8700 	 *   if (pkt_end >= r2) goto <access okay>
8701 	 *   <handle exception>
8702 	 *
8703 	 *   r2 = r3;
8704 	 *   r2 += 8;
8705 	 *   if (pkt_end <= r2) goto <handle exception>
8706 	 *   <access okay>
8707 	 *
8708 	 *   Where:
8709 	 *     pkt_end == dst_reg, r2 == src_reg
8710 	 *     r2=pkt(id=n,off=8,r=0)
8711 	 *     r3=pkt(id=n,off=0,r=0)
8712 	 *
8713 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
8714 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
8715 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
8716 	 * the check.
8717 	 */
8718 
8719 	/* If our ids match, then we must have the same max_value.  And we
8720 	 * don't care about the other reg's fixed offset, since if it's too big
8721 	 * the range won't allow anything.
8722 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
8723 	 */
8724 	for (i = 0; i <= vstate->curframe; i++)
8725 		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
8726 					 new_range);
8727 }
8728 
8729 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
8730 {
8731 	struct tnum subreg = tnum_subreg(reg->var_off);
8732 	s32 sval = (s32)val;
8733 
8734 	switch (opcode) {
8735 	case BPF_JEQ:
8736 		if (tnum_is_const(subreg))
8737 			return !!tnum_equals_const(subreg, val);
8738 		break;
8739 	case BPF_JNE:
8740 		if (tnum_is_const(subreg))
8741 			return !tnum_equals_const(subreg, val);
8742 		break;
8743 	case BPF_JSET:
8744 		if ((~subreg.mask & subreg.value) & val)
8745 			return 1;
8746 		if (!((subreg.mask | subreg.value) & val))
8747 			return 0;
8748 		break;
8749 	case BPF_JGT:
8750 		if (reg->u32_min_value > val)
8751 			return 1;
8752 		else if (reg->u32_max_value <= val)
8753 			return 0;
8754 		break;
8755 	case BPF_JSGT:
8756 		if (reg->s32_min_value > sval)
8757 			return 1;
8758 		else if (reg->s32_max_value <= sval)
8759 			return 0;
8760 		break;
8761 	case BPF_JLT:
8762 		if (reg->u32_max_value < val)
8763 			return 1;
8764 		else if (reg->u32_min_value >= val)
8765 			return 0;
8766 		break;
8767 	case BPF_JSLT:
8768 		if (reg->s32_max_value < sval)
8769 			return 1;
8770 		else if (reg->s32_min_value >= sval)
8771 			return 0;
8772 		break;
8773 	case BPF_JGE:
8774 		if (reg->u32_min_value >= val)
8775 			return 1;
8776 		else if (reg->u32_max_value < val)
8777 			return 0;
8778 		break;
8779 	case BPF_JSGE:
8780 		if (reg->s32_min_value >= sval)
8781 			return 1;
8782 		else if (reg->s32_max_value < sval)
8783 			return 0;
8784 		break;
8785 	case BPF_JLE:
8786 		if (reg->u32_max_value <= val)
8787 			return 1;
8788 		else if (reg->u32_min_value > val)
8789 			return 0;
8790 		break;
8791 	case BPF_JSLE:
8792 		if (reg->s32_max_value <= sval)
8793 			return 1;
8794 		else if (reg->s32_min_value > sval)
8795 			return 0;
8796 		break;
8797 	}
8798 
8799 	return -1;
8800 }
8801 
8802 
8803 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
8804 {
8805 	s64 sval = (s64)val;
8806 
8807 	switch (opcode) {
8808 	case BPF_JEQ:
8809 		if (tnum_is_const(reg->var_off))
8810 			return !!tnum_equals_const(reg->var_off, val);
8811 		break;
8812 	case BPF_JNE:
8813 		if (tnum_is_const(reg->var_off))
8814 			return !tnum_equals_const(reg->var_off, val);
8815 		break;
8816 	case BPF_JSET:
8817 		if ((~reg->var_off.mask & reg->var_off.value) & val)
8818 			return 1;
8819 		if (!((reg->var_off.mask | reg->var_off.value) & val))
8820 			return 0;
8821 		break;
8822 	case BPF_JGT:
8823 		if (reg->umin_value > val)
8824 			return 1;
8825 		else if (reg->umax_value <= val)
8826 			return 0;
8827 		break;
8828 	case BPF_JSGT:
8829 		if (reg->smin_value > sval)
8830 			return 1;
8831 		else if (reg->smax_value <= sval)
8832 			return 0;
8833 		break;
8834 	case BPF_JLT:
8835 		if (reg->umax_value < val)
8836 			return 1;
8837 		else if (reg->umin_value >= val)
8838 			return 0;
8839 		break;
8840 	case BPF_JSLT:
8841 		if (reg->smax_value < sval)
8842 			return 1;
8843 		else if (reg->smin_value >= sval)
8844 			return 0;
8845 		break;
8846 	case BPF_JGE:
8847 		if (reg->umin_value >= val)
8848 			return 1;
8849 		else if (reg->umax_value < val)
8850 			return 0;
8851 		break;
8852 	case BPF_JSGE:
8853 		if (reg->smin_value >= sval)
8854 			return 1;
8855 		else if (reg->smax_value < sval)
8856 			return 0;
8857 		break;
8858 	case BPF_JLE:
8859 		if (reg->umax_value <= val)
8860 			return 1;
8861 		else if (reg->umin_value > val)
8862 			return 0;
8863 		break;
8864 	case BPF_JSLE:
8865 		if (reg->smax_value <= sval)
8866 			return 1;
8867 		else if (reg->smin_value > sval)
8868 			return 0;
8869 		break;
8870 	}
8871 
8872 	return -1;
8873 }
8874 
8875 /* compute branch direction of the expression "if (reg opcode val) goto target;"
8876  * and return:
8877  *  1 - branch will be taken and "goto target" will be executed
8878  *  0 - branch will not be taken and fall-through to next insn
8879  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
8880  *      range [0,10]
8881  */
8882 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
8883 			   bool is_jmp32)
8884 {
8885 	if (__is_pointer_value(false, reg)) {
8886 		if (!reg_type_not_null(reg->type))
8887 			return -1;
8888 
8889 		/* If pointer is valid tests against zero will fail so we can
8890 		 * use this to direct branch taken.
8891 		 */
8892 		if (val != 0)
8893 			return -1;
8894 
8895 		switch (opcode) {
8896 		case BPF_JEQ:
8897 			return 0;
8898 		case BPF_JNE:
8899 			return 1;
8900 		default:
8901 			return -1;
8902 		}
8903 	}
8904 
8905 	if (is_jmp32)
8906 		return is_branch32_taken(reg, val, opcode);
8907 	return is_branch64_taken(reg, val, opcode);
8908 }
8909 
8910 static int flip_opcode(u32 opcode)
8911 {
8912 	/* How can we transform "a <op> b" into "b <op> a"? */
8913 	static const u8 opcode_flip[16] = {
8914 		/* these stay the same */
8915 		[BPF_JEQ  >> 4] = BPF_JEQ,
8916 		[BPF_JNE  >> 4] = BPF_JNE,
8917 		[BPF_JSET >> 4] = BPF_JSET,
8918 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
8919 		[BPF_JGE  >> 4] = BPF_JLE,
8920 		[BPF_JGT  >> 4] = BPF_JLT,
8921 		[BPF_JLE  >> 4] = BPF_JGE,
8922 		[BPF_JLT  >> 4] = BPF_JGT,
8923 		[BPF_JSGE >> 4] = BPF_JSLE,
8924 		[BPF_JSGT >> 4] = BPF_JSLT,
8925 		[BPF_JSLE >> 4] = BPF_JSGE,
8926 		[BPF_JSLT >> 4] = BPF_JSGT
8927 	};
8928 	return opcode_flip[opcode >> 4];
8929 }
8930 
8931 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
8932 				   struct bpf_reg_state *src_reg,
8933 				   u8 opcode)
8934 {
8935 	struct bpf_reg_state *pkt;
8936 
8937 	if (src_reg->type == PTR_TO_PACKET_END) {
8938 		pkt = dst_reg;
8939 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
8940 		pkt = src_reg;
8941 		opcode = flip_opcode(opcode);
8942 	} else {
8943 		return -1;
8944 	}
8945 
8946 	if (pkt->range >= 0)
8947 		return -1;
8948 
8949 	switch (opcode) {
8950 	case BPF_JLE:
8951 		/* pkt <= pkt_end */
8952 		fallthrough;
8953 	case BPF_JGT:
8954 		/* pkt > pkt_end */
8955 		if (pkt->range == BEYOND_PKT_END)
8956 			/* pkt has at last one extra byte beyond pkt_end */
8957 			return opcode == BPF_JGT;
8958 		break;
8959 	case BPF_JLT:
8960 		/* pkt < pkt_end */
8961 		fallthrough;
8962 	case BPF_JGE:
8963 		/* pkt >= pkt_end */
8964 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
8965 			return opcode == BPF_JGE;
8966 		break;
8967 	}
8968 	return -1;
8969 }
8970 
8971 /* Adjusts the register min/max values in the case that the dst_reg is the
8972  * variable register that we are working on, and src_reg is a constant or we're
8973  * simply doing a BPF_K check.
8974  * In JEQ/JNE cases we also adjust the var_off values.
8975  */
8976 static void reg_set_min_max(struct bpf_reg_state *true_reg,
8977 			    struct bpf_reg_state *false_reg,
8978 			    u64 val, u32 val32,
8979 			    u8 opcode, bool is_jmp32)
8980 {
8981 	struct tnum false_32off = tnum_subreg(false_reg->var_off);
8982 	struct tnum false_64off = false_reg->var_off;
8983 	struct tnum true_32off = tnum_subreg(true_reg->var_off);
8984 	struct tnum true_64off = true_reg->var_off;
8985 	s64 sval = (s64)val;
8986 	s32 sval32 = (s32)val32;
8987 
8988 	/* If the dst_reg is a pointer, we can't learn anything about its
8989 	 * variable offset from the compare (unless src_reg were a pointer into
8990 	 * the same object, but we don't bother with that.
8991 	 * Since false_reg and true_reg have the same type by construction, we
8992 	 * only need to check one of them for pointerness.
8993 	 */
8994 	if (__is_pointer_value(false, false_reg))
8995 		return;
8996 
8997 	switch (opcode) {
8998 	case BPF_JEQ:
8999 	case BPF_JNE:
9000 	{
9001 		struct bpf_reg_state *reg =
9002 			opcode == BPF_JEQ ? true_reg : false_reg;
9003 
9004 		/* JEQ/JNE comparison doesn't change the register equivalence.
9005 		 * r1 = r2;
9006 		 * if (r1 == 42) goto label;
9007 		 * ...
9008 		 * label: // here both r1 and r2 are known to be 42.
9009 		 *
9010 		 * Hence when marking register as known preserve it's ID.
9011 		 */
9012 		if (is_jmp32)
9013 			__mark_reg32_known(reg, val32);
9014 		else
9015 			___mark_reg_known(reg, val);
9016 		break;
9017 	}
9018 	case BPF_JSET:
9019 		if (is_jmp32) {
9020 			false_32off = tnum_and(false_32off, tnum_const(~val32));
9021 			if (is_power_of_2(val32))
9022 				true_32off = tnum_or(true_32off,
9023 						     tnum_const(val32));
9024 		} else {
9025 			false_64off = tnum_and(false_64off, tnum_const(~val));
9026 			if (is_power_of_2(val))
9027 				true_64off = tnum_or(true_64off,
9028 						     tnum_const(val));
9029 		}
9030 		break;
9031 	case BPF_JGE:
9032 	case BPF_JGT:
9033 	{
9034 		if (is_jmp32) {
9035 			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
9036 			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
9037 
9038 			false_reg->u32_max_value = min(false_reg->u32_max_value,
9039 						       false_umax);
9040 			true_reg->u32_min_value = max(true_reg->u32_min_value,
9041 						      true_umin);
9042 		} else {
9043 			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
9044 			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
9045 
9046 			false_reg->umax_value = min(false_reg->umax_value, false_umax);
9047 			true_reg->umin_value = max(true_reg->umin_value, true_umin);
9048 		}
9049 		break;
9050 	}
9051 	case BPF_JSGE:
9052 	case BPF_JSGT:
9053 	{
9054 		if (is_jmp32) {
9055 			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
9056 			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
9057 
9058 			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
9059 			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
9060 		} else {
9061 			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
9062 			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
9063 
9064 			false_reg->smax_value = min(false_reg->smax_value, false_smax);
9065 			true_reg->smin_value = max(true_reg->smin_value, true_smin);
9066 		}
9067 		break;
9068 	}
9069 	case BPF_JLE:
9070 	case BPF_JLT:
9071 	{
9072 		if (is_jmp32) {
9073 			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
9074 			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
9075 
9076 			false_reg->u32_min_value = max(false_reg->u32_min_value,
9077 						       false_umin);
9078 			true_reg->u32_max_value = min(true_reg->u32_max_value,
9079 						      true_umax);
9080 		} else {
9081 			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
9082 			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
9083 
9084 			false_reg->umin_value = max(false_reg->umin_value, false_umin);
9085 			true_reg->umax_value = min(true_reg->umax_value, true_umax);
9086 		}
9087 		break;
9088 	}
9089 	case BPF_JSLE:
9090 	case BPF_JSLT:
9091 	{
9092 		if (is_jmp32) {
9093 			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
9094 			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
9095 
9096 			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
9097 			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
9098 		} else {
9099 			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
9100 			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
9101 
9102 			false_reg->smin_value = max(false_reg->smin_value, false_smin);
9103 			true_reg->smax_value = min(true_reg->smax_value, true_smax);
9104 		}
9105 		break;
9106 	}
9107 	default:
9108 		return;
9109 	}
9110 
9111 	if (is_jmp32) {
9112 		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
9113 					     tnum_subreg(false_32off));
9114 		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
9115 					    tnum_subreg(true_32off));
9116 		__reg_combine_32_into_64(false_reg);
9117 		__reg_combine_32_into_64(true_reg);
9118 	} else {
9119 		false_reg->var_off = false_64off;
9120 		true_reg->var_off = true_64off;
9121 		__reg_combine_64_into_32(false_reg);
9122 		__reg_combine_64_into_32(true_reg);
9123 	}
9124 }
9125 
9126 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
9127  * the variable reg.
9128  */
9129 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
9130 				struct bpf_reg_state *false_reg,
9131 				u64 val, u32 val32,
9132 				u8 opcode, bool is_jmp32)
9133 {
9134 	opcode = flip_opcode(opcode);
9135 	/* This uses zero as "not present in table"; luckily the zero opcode,
9136 	 * BPF_JA, can't get here.
9137 	 */
9138 	if (opcode)
9139 		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
9140 }
9141 
9142 /* Regs are known to be equal, so intersect their min/max/var_off */
9143 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
9144 				  struct bpf_reg_state *dst_reg)
9145 {
9146 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
9147 							dst_reg->umin_value);
9148 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
9149 							dst_reg->umax_value);
9150 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
9151 							dst_reg->smin_value);
9152 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
9153 							dst_reg->smax_value);
9154 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
9155 							     dst_reg->var_off);
9156 	/* We might have learned new bounds from the var_off. */
9157 	__update_reg_bounds(src_reg);
9158 	__update_reg_bounds(dst_reg);
9159 	/* We might have learned something about the sign bit. */
9160 	__reg_deduce_bounds(src_reg);
9161 	__reg_deduce_bounds(dst_reg);
9162 	/* We might have learned some bits from the bounds. */
9163 	__reg_bound_offset(src_reg);
9164 	__reg_bound_offset(dst_reg);
9165 	/* Intersecting with the old var_off might have improved our bounds
9166 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
9167 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
9168 	 */
9169 	__update_reg_bounds(src_reg);
9170 	__update_reg_bounds(dst_reg);
9171 }
9172 
9173 static void reg_combine_min_max(struct bpf_reg_state *true_src,
9174 				struct bpf_reg_state *true_dst,
9175 				struct bpf_reg_state *false_src,
9176 				struct bpf_reg_state *false_dst,
9177 				u8 opcode)
9178 {
9179 	switch (opcode) {
9180 	case BPF_JEQ:
9181 		__reg_combine_min_max(true_src, true_dst);
9182 		break;
9183 	case BPF_JNE:
9184 		__reg_combine_min_max(false_src, false_dst);
9185 		break;
9186 	}
9187 }
9188 
9189 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
9190 				 struct bpf_reg_state *reg, u32 id,
9191 				 bool is_null)
9192 {
9193 	if (type_may_be_null(reg->type) && reg->id == id &&
9194 	    !WARN_ON_ONCE(!reg->id)) {
9195 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
9196 				 !tnum_equals_const(reg->var_off, 0) ||
9197 				 reg->off)) {
9198 			/* Old offset (both fixed and variable parts) should
9199 			 * have been known-zero, because we don't allow pointer
9200 			 * arithmetic on pointers that might be NULL. If we
9201 			 * see this happening, don't convert the register.
9202 			 */
9203 			return;
9204 		}
9205 		if (is_null) {
9206 			reg->type = SCALAR_VALUE;
9207 			/* We don't need id and ref_obj_id from this point
9208 			 * onwards anymore, thus we should better reset it,
9209 			 * so that state pruning has chances to take effect.
9210 			 */
9211 			reg->id = 0;
9212 			reg->ref_obj_id = 0;
9213 
9214 			return;
9215 		}
9216 
9217 		mark_ptr_not_null_reg(reg);
9218 
9219 		if (!reg_may_point_to_spin_lock(reg)) {
9220 			/* For not-NULL ptr, reg->ref_obj_id will be reset
9221 			 * in release_reg_references().
9222 			 *
9223 			 * reg->id is still used by spin_lock ptr. Other
9224 			 * than spin_lock ptr type, reg->id can be reset.
9225 			 */
9226 			reg->id = 0;
9227 		}
9228 	}
9229 }
9230 
9231 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
9232 				    bool is_null)
9233 {
9234 	struct bpf_reg_state *reg;
9235 	int i;
9236 
9237 	for (i = 0; i < MAX_BPF_REG; i++)
9238 		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
9239 
9240 	bpf_for_each_spilled_reg(i, state, reg) {
9241 		if (!reg)
9242 			continue;
9243 		mark_ptr_or_null_reg(state, reg, id, is_null);
9244 	}
9245 }
9246 
9247 /* The logic is similar to find_good_pkt_pointers(), both could eventually
9248  * be folded together at some point.
9249  */
9250 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9251 				  bool is_null)
9252 {
9253 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9254 	struct bpf_reg_state *regs = state->regs;
9255 	u32 ref_obj_id = regs[regno].ref_obj_id;
9256 	u32 id = regs[regno].id;
9257 	int i;
9258 
9259 	if (ref_obj_id && ref_obj_id == id && is_null)
9260 		/* regs[regno] is in the " == NULL" branch.
9261 		 * No one could have freed the reference state before
9262 		 * doing the NULL check.
9263 		 */
9264 		WARN_ON_ONCE(release_reference_state(state, id));
9265 
9266 	for (i = 0; i <= vstate->curframe; i++)
9267 		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9268 }
9269 
9270 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9271 				   struct bpf_reg_state *dst_reg,
9272 				   struct bpf_reg_state *src_reg,
9273 				   struct bpf_verifier_state *this_branch,
9274 				   struct bpf_verifier_state *other_branch)
9275 {
9276 	if (BPF_SRC(insn->code) != BPF_X)
9277 		return false;
9278 
9279 	/* Pointers are always 64-bit. */
9280 	if (BPF_CLASS(insn->code) == BPF_JMP32)
9281 		return false;
9282 
9283 	switch (BPF_OP(insn->code)) {
9284 	case BPF_JGT:
9285 		if ((dst_reg->type == PTR_TO_PACKET &&
9286 		     src_reg->type == PTR_TO_PACKET_END) ||
9287 		    (dst_reg->type == PTR_TO_PACKET_META &&
9288 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9289 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9290 			find_good_pkt_pointers(this_branch, dst_reg,
9291 					       dst_reg->type, false);
9292 			mark_pkt_end(other_branch, insn->dst_reg, true);
9293 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9294 			    src_reg->type == PTR_TO_PACKET) ||
9295 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9296 			    src_reg->type == PTR_TO_PACKET_META)) {
9297 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
9298 			find_good_pkt_pointers(other_branch, src_reg,
9299 					       src_reg->type, true);
9300 			mark_pkt_end(this_branch, insn->src_reg, false);
9301 		} else {
9302 			return false;
9303 		}
9304 		break;
9305 	case BPF_JLT:
9306 		if ((dst_reg->type == PTR_TO_PACKET &&
9307 		     src_reg->type == PTR_TO_PACKET_END) ||
9308 		    (dst_reg->type == PTR_TO_PACKET_META &&
9309 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9310 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9311 			find_good_pkt_pointers(other_branch, dst_reg,
9312 					       dst_reg->type, true);
9313 			mark_pkt_end(this_branch, insn->dst_reg, false);
9314 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9315 			    src_reg->type == PTR_TO_PACKET) ||
9316 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9317 			    src_reg->type == PTR_TO_PACKET_META)) {
9318 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
9319 			find_good_pkt_pointers(this_branch, src_reg,
9320 					       src_reg->type, false);
9321 			mark_pkt_end(other_branch, insn->src_reg, true);
9322 		} else {
9323 			return false;
9324 		}
9325 		break;
9326 	case BPF_JGE:
9327 		if ((dst_reg->type == PTR_TO_PACKET &&
9328 		     src_reg->type == PTR_TO_PACKET_END) ||
9329 		    (dst_reg->type == PTR_TO_PACKET_META &&
9330 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9331 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9332 			find_good_pkt_pointers(this_branch, dst_reg,
9333 					       dst_reg->type, true);
9334 			mark_pkt_end(other_branch, insn->dst_reg, false);
9335 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9336 			    src_reg->type == PTR_TO_PACKET) ||
9337 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9338 			    src_reg->type == PTR_TO_PACKET_META)) {
9339 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9340 			find_good_pkt_pointers(other_branch, src_reg,
9341 					       src_reg->type, false);
9342 			mark_pkt_end(this_branch, insn->src_reg, true);
9343 		} else {
9344 			return false;
9345 		}
9346 		break;
9347 	case BPF_JLE:
9348 		if ((dst_reg->type == PTR_TO_PACKET &&
9349 		     src_reg->type == PTR_TO_PACKET_END) ||
9350 		    (dst_reg->type == PTR_TO_PACKET_META &&
9351 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9352 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9353 			find_good_pkt_pointers(other_branch, dst_reg,
9354 					       dst_reg->type, false);
9355 			mark_pkt_end(this_branch, insn->dst_reg, true);
9356 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9357 			    src_reg->type == PTR_TO_PACKET) ||
9358 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9359 			    src_reg->type == PTR_TO_PACKET_META)) {
9360 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9361 			find_good_pkt_pointers(this_branch, src_reg,
9362 					       src_reg->type, true);
9363 			mark_pkt_end(other_branch, insn->src_reg, false);
9364 		} else {
9365 			return false;
9366 		}
9367 		break;
9368 	default:
9369 		return false;
9370 	}
9371 
9372 	return true;
9373 }
9374 
9375 static void find_equal_scalars(struct bpf_verifier_state *vstate,
9376 			       struct bpf_reg_state *known_reg)
9377 {
9378 	struct bpf_func_state *state;
9379 	struct bpf_reg_state *reg;
9380 	int i, j;
9381 
9382 	for (i = 0; i <= vstate->curframe; i++) {
9383 		state = vstate->frame[i];
9384 		for (j = 0; j < MAX_BPF_REG; j++) {
9385 			reg = &state->regs[j];
9386 			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9387 				*reg = *known_reg;
9388 		}
9389 
9390 		bpf_for_each_spilled_reg(j, state, reg) {
9391 			if (!reg)
9392 				continue;
9393 			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9394 				*reg = *known_reg;
9395 		}
9396 	}
9397 }
9398 
9399 static int check_cond_jmp_op(struct bpf_verifier_env *env,
9400 			     struct bpf_insn *insn, int *insn_idx)
9401 {
9402 	struct bpf_verifier_state *this_branch = env->cur_state;
9403 	struct bpf_verifier_state *other_branch;
9404 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
9405 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
9406 	u8 opcode = BPF_OP(insn->code);
9407 	bool is_jmp32;
9408 	int pred = -1;
9409 	int err;
9410 
9411 	/* Only conditional jumps are expected to reach here. */
9412 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
9413 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
9414 		return -EINVAL;
9415 	}
9416 
9417 	if (BPF_SRC(insn->code) == BPF_X) {
9418 		if (insn->imm != 0) {
9419 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9420 			return -EINVAL;
9421 		}
9422 
9423 		/* check src1 operand */
9424 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
9425 		if (err)
9426 			return err;
9427 
9428 		if (is_pointer_value(env, insn->src_reg)) {
9429 			verbose(env, "R%d pointer comparison prohibited\n",
9430 				insn->src_reg);
9431 			return -EACCES;
9432 		}
9433 		src_reg = &regs[insn->src_reg];
9434 	} else {
9435 		if (insn->src_reg != BPF_REG_0) {
9436 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9437 			return -EINVAL;
9438 		}
9439 	}
9440 
9441 	/* check src2 operand */
9442 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9443 	if (err)
9444 		return err;
9445 
9446 	dst_reg = &regs[insn->dst_reg];
9447 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
9448 
9449 	if (BPF_SRC(insn->code) == BPF_K) {
9450 		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9451 	} else if (src_reg->type == SCALAR_VALUE &&
9452 		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9453 		pred = is_branch_taken(dst_reg,
9454 				       tnum_subreg(src_reg->var_off).value,
9455 				       opcode,
9456 				       is_jmp32);
9457 	} else if (src_reg->type == SCALAR_VALUE &&
9458 		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9459 		pred = is_branch_taken(dst_reg,
9460 				       src_reg->var_off.value,
9461 				       opcode,
9462 				       is_jmp32);
9463 	} else if (reg_is_pkt_pointer_any(dst_reg) &&
9464 		   reg_is_pkt_pointer_any(src_reg) &&
9465 		   !is_jmp32) {
9466 		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
9467 	}
9468 
9469 	if (pred >= 0) {
9470 		/* If we get here with a dst_reg pointer type it is because
9471 		 * above is_branch_taken() special cased the 0 comparison.
9472 		 */
9473 		if (!__is_pointer_value(false, dst_reg))
9474 			err = mark_chain_precision(env, insn->dst_reg);
9475 		if (BPF_SRC(insn->code) == BPF_X && !err &&
9476 		    !__is_pointer_value(false, src_reg))
9477 			err = mark_chain_precision(env, insn->src_reg);
9478 		if (err)
9479 			return err;
9480 	}
9481 
9482 	if (pred == 1) {
9483 		/* Only follow the goto, ignore fall-through. If needed, push
9484 		 * the fall-through branch for simulation under speculative
9485 		 * execution.
9486 		 */
9487 		if (!env->bypass_spec_v1 &&
9488 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
9489 					       *insn_idx))
9490 			return -EFAULT;
9491 		*insn_idx += insn->off;
9492 		return 0;
9493 	} else if (pred == 0) {
9494 		/* Only follow the fall-through branch, since that's where the
9495 		 * program will go. If needed, push the goto branch for
9496 		 * simulation under speculative execution.
9497 		 */
9498 		if (!env->bypass_spec_v1 &&
9499 		    !sanitize_speculative_path(env, insn,
9500 					       *insn_idx + insn->off + 1,
9501 					       *insn_idx))
9502 			return -EFAULT;
9503 		return 0;
9504 	}
9505 
9506 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9507 				  false);
9508 	if (!other_branch)
9509 		return -EFAULT;
9510 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
9511 
9512 	/* detect if we are comparing against a constant value so we can adjust
9513 	 * our min/max values for our dst register.
9514 	 * this is only legit if both are scalars (or pointers to the same
9515 	 * object, I suppose, but we don't support that right now), because
9516 	 * otherwise the different base pointers mean the offsets aren't
9517 	 * comparable.
9518 	 */
9519 	if (BPF_SRC(insn->code) == BPF_X) {
9520 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
9521 
9522 		if (dst_reg->type == SCALAR_VALUE &&
9523 		    src_reg->type == SCALAR_VALUE) {
9524 			if (tnum_is_const(src_reg->var_off) ||
9525 			    (is_jmp32 &&
9526 			     tnum_is_const(tnum_subreg(src_reg->var_off))))
9527 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
9528 						dst_reg,
9529 						src_reg->var_off.value,
9530 						tnum_subreg(src_reg->var_off).value,
9531 						opcode, is_jmp32);
9532 			else if (tnum_is_const(dst_reg->var_off) ||
9533 				 (is_jmp32 &&
9534 				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
9535 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
9536 						    src_reg,
9537 						    dst_reg->var_off.value,
9538 						    tnum_subreg(dst_reg->var_off).value,
9539 						    opcode, is_jmp32);
9540 			else if (!is_jmp32 &&
9541 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
9542 				/* Comparing for equality, we can combine knowledge */
9543 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
9544 						    &other_branch_regs[insn->dst_reg],
9545 						    src_reg, dst_reg, opcode);
9546 			if (src_reg->id &&
9547 			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
9548 				find_equal_scalars(this_branch, src_reg);
9549 				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9550 			}
9551 
9552 		}
9553 	} else if (dst_reg->type == SCALAR_VALUE) {
9554 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
9555 					dst_reg, insn->imm, (u32)insn->imm,
9556 					opcode, is_jmp32);
9557 	}
9558 
9559 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9560 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
9561 		find_equal_scalars(this_branch, dst_reg);
9562 		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9563 	}
9564 
9565 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9566 	 * NOTE: these optimizations below are related with pointer comparison
9567 	 *       which will never be JMP32.
9568 	 */
9569 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
9570 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
9571 	    type_may_be_null(dst_reg->type)) {
9572 		/* Mark all identical registers in each branch as either
9573 		 * safe or unknown depending R == 0 or R != 0 conditional.
9574 		 */
9575 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9576 				      opcode == BPF_JNE);
9577 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9578 				      opcode == BPF_JEQ);
9579 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9580 					   this_branch, other_branch) &&
9581 		   is_pointer_value(env, insn->dst_reg)) {
9582 		verbose(env, "R%d pointer comparison prohibited\n",
9583 			insn->dst_reg);
9584 		return -EACCES;
9585 	}
9586 	if (env->log.level & BPF_LOG_LEVEL)
9587 		print_insn_state(env, this_branch->frame[this_branch->curframe]);
9588 	return 0;
9589 }
9590 
9591 /* verify BPF_LD_IMM64 instruction */
9592 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
9593 {
9594 	struct bpf_insn_aux_data *aux = cur_aux(env);
9595 	struct bpf_reg_state *regs = cur_regs(env);
9596 	struct bpf_reg_state *dst_reg;
9597 	struct bpf_map *map;
9598 	int err;
9599 
9600 	if (BPF_SIZE(insn->code) != BPF_DW) {
9601 		verbose(env, "invalid BPF_LD_IMM insn\n");
9602 		return -EINVAL;
9603 	}
9604 	if (insn->off != 0) {
9605 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
9606 		return -EINVAL;
9607 	}
9608 
9609 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
9610 	if (err)
9611 		return err;
9612 
9613 	dst_reg = &regs[insn->dst_reg];
9614 	if (insn->src_reg == 0) {
9615 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9616 
9617 		dst_reg->type = SCALAR_VALUE;
9618 		__mark_reg_known(&regs[insn->dst_reg], imm);
9619 		return 0;
9620 	}
9621 
9622 	/* All special src_reg cases are listed below. From this point onwards
9623 	 * we either succeed and assign a corresponding dst_reg->type after
9624 	 * zeroing the offset, or fail and reject the program.
9625 	 */
9626 	mark_reg_known_zero(env, regs, insn->dst_reg);
9627 
9628 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
9629 		dst_reg->type = aux->btf_var.reg_type;
9630 		switch (base_type(dst_reg->type)) {
9631 		case PTR_TO_MEM:
9632 			dst_reg->mem_size = aux->btf_var.mem_size;
9633 			break;
9634 		case PTR_TO_BTF_ID:
9635 		case PTR_TO_PERCPU_BTF_ID:
9636 			dst_reg->btf = aux->btf_var.btf;
9637 			dst_reg->btf_id = aux->btf_var.btf_id;
9638 			break;
9639 		default:
9640 			verbose(env, "bpf verifier is misconfigured\n");
9641 			return -EFAULT;
9642 		}
9643 		return 0;
9644 	}
9645 
9646 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
9647 		struct bpf_prog_aux *aux = env->prog->aux;
9648 		u32 subprogno = find_subprog(env,
9649 					     env->insn_idx + insn->imm + 1);
9650 
9651 		if (!aux->func_info) {
9652 			verbose(env, "missing btf func_info\n");
9653 			return -EINVAL;
9654 		}
9655 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9656 			verbose(env, "callback function not static\n");
9657 			return -EINVAL;
9658 		}
9659 
9660 		dst_reg->type = PTR_TO_FUNC;
9661 		dst_reg->subprogno = subprogno;
9662 		return 0;
9663 	}
9664 
9665 	map = env->used_maps[aux->map_index];
9666 	dst_reg->map_ptr = map;
9667 
9668 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9669 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
9670 		dst_reg->type = PTR_TO_MAP_VALUE;
9671 		dst_reg->off = aux->map_off;
9672 		if (map_value_has_spin_lock(map))
9673 			dst_reg->id = ++env->id_gen;
9674 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
9675 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
9676 		dst_reg->type = CONST_PTR_TO_MAP;
9677 	} else {
9678 		verbose(env, "bpf verifier is misconfigured\n");
9679 		return -EINVAL;
9680 	}
9681 
9682 	return 0;
9683 }
9684 
9685 static bool may_access_skb(enum bpf_prog_type type)
9686 {
9687 	switch (type) {
9688 	case BPF_PROG_TYPE_SOCKET_FILTER:
9689 	case BPF_PROG_TYPE_SCHED_CLS:
9690 	case BPF_PROG_TYPE_SCHED_ACT:
9691 		return true;
9692 	default:
9693 		return false;
9694 	}
9695 }
9696 
9697 /* verify safety of LD_ABS|LD_IND instructions:
9698  * - they can only appear in the programs where ctx == skb
9699  * - since they are wrappers of function calls, they scratch R1-R5 registers,
9700  *   preserve R6-R9, and store return value into R0
9701  *
9702  * Implicit input:
9703  *   ctx == skb == R6 == CTX
9704  *
9705  * Explicit input:
9706  *   SRC == any register
9707  *   IMM == 32-bit immediate
9708  *
9709  * Output:
9710  *   R0 - 8/16/32-bit skb data converted to cpu endianness
9711  */
9712 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
9713 {
9714 	struct bpf_reg_state *regs = cur_regs(env);
9715 	static const int ctx_reg = BPF_REG_6;
9716 	u8 mode = BPF_MODE(insn->code);
9717 	int i, err;
9718 
9719 	if (!may_access_skb(resolve_prog_type(env->prog))) {
9720 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
9721 		return -EINVAL;
9722 	}
9723 
9724 	if (!env->ops->gen_ld_abs) {
9725 		verbose(env, "bpf verifier is misconfigured\n");
9726 		return -EINVAL;
9727 	}
9728 
9729 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
9730 	    BPF_SIZE(insn->code) == BPF_DW ||
9731 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
9732 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
9733 		return -EINVAL;
9734 	}
9735 
9736 	/* check whether implicit source operand (register R6) is readable */
9737 	err = check_reg_arg(env, ctx_reg, SRC_OP);
9738 	if (err)
9739 		return err;
9740 
9741 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
9742 	 * gen_ld_abs() may terminate the program at runtime, leading to
9743 	 * reference leak.
9744 	 */
9745 	err = check_reference_leak(env);
9746 	if (err) {
9747 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
9748 		return err;
9749 	}
9750 
9751 	if (env->cur_state->active_spin_lock) {
9752 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
9753 		return -EINVAL;
9754 	}
9755 
9756 	if (regs[ctx_reg].type != PTR_TO_CTX) {
9757 		verbose(env,
9758 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
9759 		return -EINVAL;
9760 	}
9761 
9762 	if (mode == BPF_IND) {
9763 		/* check explicit source operand */
9764 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
9765 		if (err)
9766 			return err;
9767 	}
9768 
9769 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
9770 	if (err < 0)
9771 		return err;
9772 
9773 	/* reset caller saved regs to unreadable */
9774 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9775 		mark_reg_not_init(env, regs, caller_saved[i]);
9776 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9777 	}
9778 
9779 	/* mark destination R0 register as readable, since it contains
9780 	 * the value fetched from the packet.
9781 	 * Already marked as written above.
9782 	 */
9783 	mark_reg_unknown(env, regs, BPF_REG_0);
9784 	/* ld_abs load up to 32-bit skb data. */
9785 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
9786 	return 0;
9787 }
9788 
9789 static int check_return_code(struct bpf_verifier_env *env)
9790 {
9791 	struct tnum enforce_attach_type_range = tnum_unknown;
9792 	const struct bpf_prog *prog = env->prog;
9793 	struct bpf_reg_state *reg;
9794 	struct tnum range = tnum_range(0, 1);
9795 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9796 	int err;
9797 	struct bpf_func_state *frame = env->cur_state->frame[0];
9798 	const bool is_subprog = frame->subprogno;
9799 
9800 	/* LSM and struct_ops func-ptr's return type could be "void" */
9801 	if (!is_subprog &&
9802 	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
9803 	     prog_type == BPF_PROG_TYPE_LSM) &&
9804 	    !prog->aux->attach_func_proto->type)
9805 		return 0;
9806 
9807 	/* eBPF calling convention is such that R0 is used
9808 	 * to return the value from eBPF program.
9809 	 * Make sure that it's readable at this time
9810 	 * of bpf_exit, which means that program wrote
9811 	 * something into it earlier
9812 	 */
9813 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
9814 	if (err)
9815 		return err;
9816 
9817 	if (is_pointer_value(env, BPF_REG_0)) {
9818 		verbose(env, "R0 leaks addr as return value\n");
9819 		return -EACCES;
9820 	}
9821 
9822 	reg = cur_regs(env) + BPF_REG_0;
9823 
9824 	if (frame->in_async_callback_fn) {
9825 		/* enforce return zero from async callbacks like timer */
9826 		if (reg->type != SCALAR_VALUE) {
9827 			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
9828 				reg_type_str(env, reg->type));
9829 			return -EINVAL;
9830 		}
9831 
9832 		if (!tnum_in(tnum_const(0), reg->var_off)) {
9833 			verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
9834 			return -EINVAL;
9835 		}
9836 		return 0;
9837 	}
9838 
9839 	if (is_subprog) {
9840 		if (reg->type != SCALAR_VALUE) {
9841 			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
9842 				reg_type_str(env, reg->type));
9843 			return -EINVAL;
9844 		}
9845 		return 0;
9846 	}
9847 
9848 	switch (prog_type) {
9849 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
9850 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
9851 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
9852 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
9853 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
9854 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
9855 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
9856 			range = tnum_range(1, 1);
9857 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
9858 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
9859 			range = tnum_range(0, 3);
9860 		break;
9861 	case BPF_PROG_TYPE_CGROUP_SKB:
9862 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
9863 			range = tnum_range(0, 3);
9864 			enforce_attach_type_range = tnum_range(2, 3);
9865 		}
9866 		break;
9867 	case BPF_PROG_TYPE_CGROUP_SOCK:
9868 	case BPF_PROG_TYPE_SOCK_OPS:
9869 	case BPF_PROG_TYPE_CGROUP_DEVICE:
9870 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
9871 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
9872 		break;
9873 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
9874 		if (!env->prog->aux->attach_btf_id)
9875 			return 0;
9876 		range = tnum_const(0);
9877 		break;
9878 	case BPF_PROG_TYPE_TRACING:
9879 		switch (env->prog->expected_attach_type) {
9880 		case BPF_TRACE_FENTRY:
9881 		case BPF_TRACE_FEXIT:
9882 			range = tnum_const(0);
9883 			break;
9884 		case BPF_TRACE_RAW_TP:
9885 		case BPF_MODIFY_RETURN:
9886 			return 0;
9887 		case BPF_TRACE_ITER:
9888 			break;
9889 		default:
9890 			return -ENOTSUPP;
9891 		}
9892 		break;
9893 	case BPF_PROG_TYPE_SK_LOOKUP:
9894 		range = tnum_range(SK_DROP, SK_PASS);
9895 		break;
9896 	case BPF_PROG_TYPE_EXT:
9897 		/* freplace program can return anything as its return value
9898 		 * depends on the to-be-replaced kernel func or bpf program.
9899 		 */
9900 	default:
9901 		return 0;
9902 	}
9903 
9904 	if (reg->type != SCALAR_VALUE) {
9905 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
9906 			reg_type_str(env, reg->type));
9907 		return -EINVAL;
9908 	}
9909 
9910 	if (!tnum_in(range, reg->var_off)) {
9911 		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
9912 		return -EINVAL;
9913 	}
9914 
9915 	if (!tnum_is_unknown(enforce_attach_type_range) &&
9916 	    tnum_in(enforce_attach_type_range, reg->var_off))
9917 		env->prog->enforce_expected_attach_type = 1;
9918 	return 0;
9919 }
9920 
9921 /* non-recursive DFS pseudo code
9922  * 1  procedure DFS-iterative(G,v):
9923  * 2      label v as discovered
9924  * 3      let S be a stack
9925  * 4      S.push(v)
9926  * 5      while S is not empty
9927  * 6            t <- S.pop()
9928  * 7            if t is what we're looking for:
9929  * 8                return t
9930  * 9            for all edges e in G.adjacentEdges(t) do
9931  * 10               if edge e is already labelled
9932  * 11                   continue with the next edge
9933  * 12               w <- G.adjacentVertex(t,e)
9934  * 13               if vertex w is not discovered and not explored
9935  * 14                   label e as tree-edge
9936  * 15                   label w as discovered
9937  * 16                   S.push(w)
9938  * 17                   continue at 5
9939  * 18               else if vertex w is discovered
9940  * 19                   label e as back-edge
9941  * 20               else
9942  * 21                   // vertex w is explored
9943  * 22                   label e as forward- or cross-edge
9944  * 23           label t as explored
9945  * 24           S.pop()
9946  *
9947  * convention:
9948  * 0x10 - discovered
9949  * 0x11 - discovered and fall-through edge labelled
9950  * 0x12 - discovered and fall-through and branch edges labelled
9951  * 0x20 - explored
9952  */
9953 
9954 enum {
9955 	DISCOVERED = 0x10,
9956 	EXPLORED = 0x20,
9957 	FALLTHROUGH = 1,
9958 	BRANCH = 2,
9959 };
9960 
9961 static u32 state_htab_size(struct bpf_verifier_env *env)
9962 {
9963 	return env->prog->len;
9964 }
9965 
9966 static struct bpf_verifier_state_list **explored_state(
9967 					struct bpf_verifier_env *env,
9968 					int idx)
9969 {
9970 	struct bpf_verifier_state *cur = env->cur_state;
9971 	struct bpf_func_state *state = cur->frame[cur->curframe];
9972 
9973 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
9974 }
9975 
9976 static void init_explored_state(struct bpf_verifier_env *env, int idx)
9977 {
9978 	env->insn_aux_data[idx].prune_point = true;
9979 }
9980 
9981 enum {
9982 	DONE_EXPLORING = 0,
9983 	KEEP_EXPLORING = 1,
9984 };
9985 
9986 /* t, w, e - match pseudo-code above:
9987  * t - index of current instruction
9988  * w - next instruction
9989  * e - edge
9990  */
9991 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
9992 		     bool loop_ok)
9993 {
9994 	int *insn_stack = env->cfg.insn_stack;
9995 	int *insn_state = env->cfg.insn_state;
9996 
9997 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
9998 		return DONE_EXPLORING;
9999 
10000 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
10001 		return DONE_EXPLORING;
10002 
10003 	if (w < 0 || w >= env->prog->len) {
10004 		verbose_linfo(env, t, "%d: ", t);
10005 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
10006 		return -EINVAL;
10007 	}
10008 
10009 	if (e == BRANCH)
10010 		/* mark branch target for state pruning */
10011 		init_explored_state(env, w);
10012 
10013 	if (insn_state[w] == 0) {
10014 		/* tree-edge */
10015 		insn_state[t] = DISCOVERED | e;
10016 		insn_state[w] = DISCOVERED;
10017 		if (env->cfg.cur_stack >= env->prog->len)
10018 			return -E2BIG;
10019 		insn_stack[env->cfg.cur_stack++] = w;
10020 		return KEEP_EXPLORING;
10021 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
10022 		if (loop_ok && env->bpf_capable)
10023 			return DONE_EXPLORING;
10024 		verbose_linfo(env, t, "%d: ", t);
10025 		verbose_linfo(env, w, "%d: ", w);
10026 		verbose(env, "back-edge from insn %d to %d\n", t, w);
10027 		return -EINVAL;
10028 	} else if (insn_state[w] == EXPLORED) {
10029 		/* forward- or cross-edge */
10030 		insn_state[t] = DISCOVERED | e;
10031 	} else {
10032 		verbose(env, "insn state internal bug\n");
10033 		return -EFAULT;
10034 	}
10035 	return DONE_EXPLORING;
10036 }
10037 
10038 static int visit_func_call_insn(int t, int insn_cnt,
10039 				struct bpf_insn *insns,
10040 				struct bpf_verifier_env *env,
10041 				bool visit_callee)
10042 {
10043 	int ret;
10044 
10045 	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
10046 	if (ret)
10047 		return ret;
10048 
10049 	if (t + 1 < insn_cnt)
10050 		init_explored_state(env, t + 1);
10051 	if (visit_callee) {
10052 		init_explored_state(env, t);
10053 		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
10054 				/* It's ok to allow recursion from CFG point of
10055 				 * view. __check_func_call() will do the actual
10056 				 * check.
10057 				 */
10058 				bpf_pseudo_func(insns + t));
10059 	}
10060 	return ret;
10061 }
10062 
10063 /* Visits the instruction at index t and returns one of the following:
10064  *  < 0 - an error occurred
10065  *  DONE_EXPLORING - the instruction was fully explored
10066  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
10067  */
10068 static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
10069 {
10070 	struct bpf_insn *insns = env->prog->insnsi;
10071 	int ret;
10072 
10073 	if (bpf_pseudo_func(insns + t))
10074 		return visit_func_call_insn(t, insn_cnt, insns, env, true);
10075 
10076 	/* All non-branch instructions have a single fall-through edge. */
10077 	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
10078 	    BPF_CLASS(insns[t].code) != BPF_JMP32)
10079 		return push_insn(t, t + 1, FALLTHROUGH, env, false);
10080 
10081 	switch (BPF_OP(insns[t].code)) {
10082 	case BPF_EXIT:
10083 		return DONE_EXPLORING;
10084 
10085 	case BPF_CALL:
10086 		if (insns[t].imm == BPF_FUNC_timer_set_callback)
10087 			/* Mark this call insn to trigger is_state_visited() check
10088 			 * before call itself is processed by __check_func_call().
10089 			 * Otherwise new async state will be pushed for further
10090 			 * exploration.
10091 			 */
10092 			init_explored_state(env, t);
10093 		return visit_func_call_insn(t, insn_cnt, insns, env,
10094 					    insns[t].src_reg == BPF_PSEUDO_CALL);
10095 
10096 	case BPF_JA:
10097 		if (BPF_SRC(insns[t].code) != BPF_K)
10098 			return -EINVAL;
10099 
10100 		/* unconditional jump with single edge */
10101 		ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
10102 				true);
10103 		if (ret)
10104 			return ret;
10105 
10106 		/* unconditional jmp is not a good pruning point,
10107 		 * but it's marked, since backtracking needs
10108 		 * to record jmp history in is_state_visited().
10109 		 */
10110 		init_explored_state(env, t + insns[t].off + 1);
10111 		/* tell verifier to check for equivalent states
10112 		 * after every call and jump
10113 		 */
10114 		if (t + 1 < insn_cnt)
10115 			init_explored_state(env, t + 1);
10116 
10117 		return ret;
10118 
10119 	default:
10120 		/* conditional jump with two edges */
10121 		init_explored_state(env, t);
10122 		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
10123 		if (ret)
10124 			return ret;
10125 
10126 		return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
10127 	}
10128 }
10129 
10130 /* non-recursive depth-first-search to detect loops in BPF program
10131  * loop == back-edge in directed graph
10132  */
10133 static int check_cfg(struct bpf_verifier_env *env)
10134 {
10135 	int insn_cnt = env->prog->len;
10136 	int *insn_stack, *insn_state;
10137 	int ret = 0;
10138 	int i;
10139 
10140 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10141 	if (!insn_state)
10142 		return -ENOMEM;
10143 
10144 	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10145 	if (!insn_stack) {
10146 		kvfree(insn_state);
10147 		return -ENOMEM;
10148 	}
10149 
10150 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
10151 	insn_stack[0] = 0; /* 0 is the first instruction */
10152 	env->cfg.cur_stack = 1;
10153 
10154 	while (env->cfg.cur_stack > 0) {
10155 		int t = insn_stack[env->cfg.cur_stack - 1];
10156 
10157 		ret = visit_insn(t, insn_cnt, env);
10158 		switch (ret) {
10159 		case DONE_EXPLORING:
10160 			insn_state[t] = EXPLORED;
10161 			env->cfg.cur_stack--;
10162 			break;
10163 		case KEEP_EXPLORING:
10164 			break;
10165 		default:
10166 			if (ret > 0) {
10167 				verbose(env, "visit_insn internal bug\n");
10168 				ret = -EFAULT;
10169 			}
10170 			goto err_free;
10171 		}
10172 	}
10173 
10174 	if (env->cfg.cur_stack < 0) {
10175 		verbose(env, "pop stack internal bug\n");
10176 		ret = -EFAULT;
10177 		goto err_free;
10178 	}
10179 
10180 	for (i = 0; i < insn_cnt; i++) {
10181 		if (insn_state[i] != EXPLORED) {
10182 			verbose(env, "unreachable insn %d\n", i);
10183 			ret = -EINVAL;
10184 			goto err_free;
10185 		}
10186 	}
10187 	ret = 0; /* cfg looks good */
10188 
10189 err_free:
10190 	kvfree(insn_state);
10191 	kvfree(insn_stack);
10192 	env->cfg.insn_state = env->cfg.insn_stack = NULL;
10193 	return ret;
10194 }
10195 
10196 static int check_abnormal_return(struct bpf_verifier_env *env)
10197 {
10198 	int i;
10199 
10200 	for (i = 1; i < env->subprog_cnt; i++) {
10201 		if (env->subprog_info[i].has_ld_abs) {
10202 			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
10203 			return -EINVAL;
10204 		}
10205 		if (env->subprog_info[i].has_tail_call) {
10206 			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
10207 			return -EINVAL;
10208 		}
10209 	}
10210 	return 0;
10211 }
10212 
10213 /* The minimum supported BTF func info size */
10214 #define MIN_BPF_FUNCINFO_SIZE	8
10215 #define MAX_FUNCINFO_REC_SIZE	252
10216 
10217 static int check_btf_func(struct bpf_verifier_env *env,
10218 			  const union bpf_attr *attr,
10219 			  bpfptr_t uattr)
10220 {
10221 	const struct btf_type *type, *func_proto, *ret_type;
10222 	u32 i, nfuncs, urec_size, min_size;
10223 	u32 krec_size = sizeof(struct bpf_func_info);
10224 	struct bpf_func_info *krecord;
10225 	struct bpf_func_info_aux *info_aux = NULL;
10226 	struct bpf_prog *prog;
10227 	const struct btf *btf;
10228 	bpfptr_t urecord;
10229 	u32 prev_offset = 0;
10230 	bool scalar_return;
10231 	int ret = -ENOMEM;
10232 
10233 	nfuncs = attr->func_info_cnt;
10234 	if (!nfuncs) {
10235 		if (check_abnormal_return(env))
10236 			return -EINVAL;
10237 		return 0;
10238 	}
10239 
10240 	if (nfuncs != env->subprog_cnt) {
10241 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
10242 		return -EINVAL;
10243 	}
10244 
10245 	urec_size = attr->func_info_rec_size;
10246 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
10247 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
10248 	    urec_size % sizeof(u32)) {
10249 		verbose(env, "invalid func info rec size %u\n", urec_size);
10250 		return -EINVAL;
10251 	}
10252 
10253 	prog = env->prog;
10254 	btf = prog->aux->btf;
10255 
10256 	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
10257 	min_size = min_t(u32, krec_size, urec_size);
10258 
10259 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
10260 	if (!krecord)
10261 		return -ENOMEM;
10262 	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10263 	if (!info_aux)
10264 		goto err_free;
10265 
10266 	for (i = 0; i < nfuncs; i++) {
10267 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10268 		if (ret) {
10269 			if (ret == -E2BIG) {
10270 				verbose(env, "nonzero tailing record in func info");
10271 				/* set the size kernel expects so loader can zero
10272 				 * out the rest of the record.
10273 				 */
10274 				if (copy_to_bpfptr_offset(uattr,
10275 							  offsetof(union bpf_attr, func_info_rec_size),
10276 							  &min_size, sizeof(min_size)))
10277 					ret = -EFAULT;
10278 			}
10279 			goto err_free;
10280 		}
10281 
10282 		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10283 			ret = -EFAULT;
10284 			goto err_free;
10285 		}
10286 
10287 		/* check insn_off */
10288 		ret = -EINVAL;
10289 		if (i == 0) {
10290 			if (krecord[i].insn_off) {
10291 				verbose(env,
10292 					"nonzero insn_off %u for the first func info record",
10293 					krecord[i].insn_off);
10294 				goto err_free;
10295 			}
10296 		} else if (krecord[i].insn_off <= prev_offset) {
10297 			verbose(env,
10298 				"same or smaller insn offset (%u) than previous func info record (%u)",
10299 				krecord[i].insn_off, prev_offset);
10300 			goto err_free;
10301 		}
10302 
10303 		if (env->subprog_info[i].start != krecord[i].insn_off) {
10304 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10305 			goto err_free;
10306 		}
10307 
10308 		/* check type_id */
10309 		type = btf_type_by_id(btf, krecord[i].type_id);
10310 		if (!type || !btf_type_is_func(type)) {
10311 			verbose(env, "invalid type id %d in func info",
10312 				krecord[i].type_id);
10313 			goto err_free;
10314 		}
10315 		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10316 
10317 		func_proto = btf_type_by_id(btf, type->type);
10318 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10319 			/* btf_func_check() already verified it during BTF load */
10320 			goto err_free;
10321 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10322 		scalar_return =
10323 			btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10324 		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10325 			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10326 			goto err_free;
10327 		}
10328 		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10329 			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10330 			goto err_free;
10331 		}
10332 
10333 		prev_offset = krecord[i].insn_off;
10334 		bpfptr_add(&urecord, urec_size);
10335 	}
10336 
10337 	prog->aux->func_info = krecord;
10338 	prog->aux->func_info_cnt = nfuncs;
10339 	prog->aux->func_info_aux = info_aux;
10340 	return 0;
10341 
10342 err_free:
10343 	kvfree(krecord);
10344 	kfree(info_aux);
10345 	return ret;
10346 }
10347 
10348 static void adjust_btf_func(struct bpf_verifier_env *env)
10349 {
10350 	struct bpf_prog_aux *aux = env->prog->aux;
10351 	int i;
10352 
10353 	if (!aux->func_info)
10354 		return;
10355 
10356 	for (i = 0; i < env->subprog_cnt; i++)
10357 		aux->func_info[i].insn_off = env->subprog_info[i].start;
10358 }
10359 
10360 #define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
10361 		sizeof(((struct bpf_line_info *)(0))->line_col))
10362 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
10363 
10364 static int check_btf_line(struct bpf_verifier_env *env,
10365 			  const union bpf_attr *attr,
10366 			  bpfptr_t uattr)
10367 {
10368 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10369 	struct bpf_subprog_info *sub;
10370 	struct bpf_line_info *linfo;
10371 	struct bpf_prog *prog;
10372 	const struct btf *btf;
10373 	bpfptr_t ulinfo;
10374 	int err;
10375 
10376 	nr_linfo = attr->line_info_cnt;
10377 	if (!nr_linfo)
10378 		return 0;
10379 	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10380 		return -EINVAL;
10381 
10382 	rec_size = attr->line_info_rec_size;
10383 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10384 	    rec_size > MAX_LINEINFO_REC_SIZE ||
10385 	    rec_size & (sizeof(u32) - 1))
10386 		return -EINVAL;
10387 
10388 	/* Need to zero it in case the userspace may
10389 	 * pass in a smaller bpf_line_info object.
10390 	 */
10391 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10392 			 GFP_KERNEL | __GFP_NOWARN);
10393 	if (!linfo)
10394 		return -ENOMEM;
10395 
10396 	prog = env->prog;
10397 	btf = prog->aux->btf;
10398 
10399 	s = 0;
10400 	sub = env->subprog_info;
10401 	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
10402 	expected_size = sizeof(struct bpf_line_info);
10403 	ncopy = min_t(u32, expected_size, rec_size);
10404 	for (i = 0; i < nr_linfo; i++) {
10405 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10406 		if (err) {
10407 			if (err == -E2BIG) {
10408 				verbose(env, "nonzero tailing record in line_info");
10409 				if (copy_to_bpfptr_offset(uattr,
10410 							  offsetof(union bpf_attr, line_info_rec_size),
10411 							  &expected_size, sizeof(expected_size)))
10412 					err = -EFAULT;
10413 			}
10414 			goto err_free;
10415 		}
10416 
10417 		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
10418 			err = -EFAULT;
10419 			goto err_free;
10420 		}
10421 
10422 		/*
10423 		 * Check insn_off to ensure
10424 		 * 1) strictly increasing AND
10425 		 * 2) bounded by prog->len
10426 		 *
10427 		 * The linfo[0].insn_off == 0 check logically falls into
10428 		 * the later "missing bpf_line_info for func..." case
10429 		 * because the first linfo[0].insn_off must be the
10430 		 * first sub also and the first sub must have
10431 		 * subprog_info[0].start == 0.
10432 		 */
10433 		if ((i && linfo[i].insn_off <= prev_offset) ||
10434 		    linfo[i].insn_off >= prog->len) {
10435 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10436 				i, linfo[i].insn_off, prev_offset,
10437 				prog->len);
10438 			err = -EINVAL;
10439 			goto err_free;
10440 		}
10441 
10442 		if (!prog->insnsi[linfo[i].insn_off].code) {
10443 			verbose(env,
10444 				"Invalid insn code at line_info[%u].insn_off\n",
10445 				i);
10446 			err = -EINVAL;
10447 			goto err_free;
10448 		}
10449 
10450 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10451 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
10452 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10453 			err = -EINVAL;
10454 			goto err_free;
10455 		}
10456 
10457 		if (s != env->subprog_cnt) {
10458 			if (linfo[i].insn_off == sub[s].start) {
10459 				sub[s].linfo_idx = i;
10460 				s++;
10461 			} else if (sub[s].start < linfo[i].insn_off) {
10462 				verbose(env, "missing bpf_line_info for func#%u\n", s);
10463 				err = -EINVAL;
10464 				goto err_free;
10465 			}
10466 		}
10467 
10468 		prev_offset = linfo[i].insn_off;
10469 		bpfptr_add(&ulinfo, rec_size);
10470 	}
10471 
10472 	if (s != env->subprog_cnt) {
10473 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10474 			env->subprog_cnt - s, s);
10475 		err = -EINVAL;
10476 		goto err_free;
10477 	}
10478 
10479 	prog->aux->linfo = linfo;
10480 	prog->aux->nr_linfo = nr_linfo;
10481 
10482 	return 0;
10483 
10484 err_free:
10485 	kvfree(linfo);
10486 	return err;
10487 }
10488 
10489 #define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
10490 #define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
10491 
10492 static int check_core_relo(struct bpf_verifier_env *env,
10493 			   const union bpf_attr *attr,
10494 			   bpfptr_t uattr)
10495 {
10496 	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
10497 	struct bpf_core_relo core_relo = {};
10498 	struct bpf_prog *prog = env->prog;
10499 	const struct btf *btf = prog->aux->btf;
10500 	struct bpf_core_ctx ctx = {
10501 		.log = &env->log,
10502 		.btf = btf,
10503 	};
10504 	bpfptr_t u_core_relo;
10505 	int err;
10506 
10507 	nr_core_relo = attr->core_relo_cnt;
10508 	if (!nr_core_relo)
10509 		return 0;
10510 	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
10511 		return -EINVAL;
10512 
10513 	rec_size = attr->core_relo_rec_size;
10514 	if (rec_size < MIN_CORE_RELO_SIZE ||
10515 	    rec_size > MAX_CORE_RELO_SIZE ||
10516 	    rec_size % sizeof(u32))
10517 		return -EINVAL;
10518 
10519 	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
10520 	expected_size = sizeof(struct bpf_core_relo);
10521 	ncopy = min_t(u32, expected_size, rec_size);
10522 
10523 	/* Unlike func_info and line_info, copy and apply each CO-RE
10524 	 * relocation record one at a time.
10525 	 */
10526 	for (i = 0; i < nr_core_relo; i++) {
10527 		/* future proofing when sizeof(bpf_core_relo) changes */
10528 		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
10529 		if (err) {
10530 			if (err == -E2BIG) {
10531 				verbose(env, "nonzero tailing record in core_relo");
10532 				if (copy_to_bpfptr_offset(uattr,
10533 							  offsetof(union bpf_attr, core_relo_rec_size),
10534 							  &expected_size, sizeof(expected_size)))
10535 					err = -EFAULT;
10536 			}
10537 			break;
10538 		}
10539 
10540 		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
10541 			err = -EFAULT;
10542 			break;
10543 		}
10544 
10545 		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
10546 			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
10547 				i, core_relo.insn_off, prog->len);
10548 			err = -EINVAL;
10549 			break;
10550 		}
10551 
10552 		err = bpf_core_apply(&ctx, &core_relo, i,
10553 				     &prog->insnsi[core_relo.insn_off / 8]);
10554 		if (err)
10555 			break;
10556 		bpfptr_add(&u_core_relo, rec_size);
10557 	}
10558 	return err;
10559 }
10560 
10561 static int check_btf_info(struct bpf_verifier_env *env,
10562 			  const union bpf_attr *attr,
10563 			  bpfptr_t uattr)
10564 {
10565 	struct btf *btf;
10566 	int err;
10567 
10568 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
10569 		if (check_abnormal_return(env))
10570 			return -EINVAL;
10571 		return 0;
10572 	}
10573 
10574 	btf = btf_get_by_fd(attr->prog_btf_fd);
10575 	if (IS_ERR(btf))
10576 		return PTR_ERR(btf);
10577 	if (btf_is_kernel(btf)) {
10578 		btf_put(btf);
10579 		return -EACCES;
10580 	}
10581 	env->prog->aux->btf = btf;
10582 
10583 	err = check_btf_func(env, attr, uattr);
10584 	if (err)
10585 		return err;
10586 
10587 	err = check_btf_line(env, attr, uattr);
10588 	if (err)
10589 		return err;
10590 
10591 	err = check_core_relo(env, attr, uattr);
10592 	if (err)
10593 		return err;
10594 
10595 	return 0;
10596 }
10597 
10598 /* check %cur's range satisfies %old's */
10599 static bool range_within(struct bpf_reg_state *old,
10600 			 struct bpf_reg_state *cur)
10601 {
10602 	return old->umin_value <= cur->umin_value &&
10603 	       old->umax_value >= cur->umax_value &&
10604 	       old->smin_value <= cur->smin_value &&
10605 	       old->smax_value >= cur->smax_value &&
10606 	       old->u32_min_value <= cur->u32_min_value &&
10607 	       old->u32_max_value >= cur->u32_max_value &&
10608 	       old->s32_min_value <= cur->s32_min_value &&
10609 	       old->s32_max_value >= cur->s32_max_value;
10610 }
10611 
10612 /* If in the old state two registers had the same id, then they need to have
10613  * the same id in the new state as well.  But that id could be different from
10614  * the old state, so we need to track the mapping from old to new ids.
10615  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10616  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
10617  * regs with a different old id could still have new id 9, we don't care about
10618  * that.
10619  * So we look through our idmap to see if this old id has been seen before.  If
10620  * so, we require the new id to match; otherwise, we add the id pair to the map.
10621  */
10622 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
10623 {
10624 	unsigned int i;
10625 
10626 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
10627 		if (!idmap[i].old) {
10628 			/* Reached an empty slot; haven't seen this id before */
10629 			idmap[i].old = old_id;
10630 			idmap[i].cur = cur_id;
10631 			return true;
10632 		}
10633 		if (idmap[i].old == old_id)
10634 			return idmap[i].cur == cur_id;
10635 	}
10636 	/* We ran out of idmap slots, which should be impossible */
10637 	WARN_ON_ONCE(1);
10638 	return false;
10639 }
10640 
10641 static void clean_func_state(struct bpf_verifier_env *env,
10642 			     struct bpf_func_state *st)
10643 {
10644 	enum bpf_reg_liveness live;
10645 	int i, j;
10646 
10647 	for (i = 0; i < BPF_REG_FP; i++) {
10648 		live = st->regs[i].live;
10649 		/* liveness must not touch this register anymore */
10650 		st->regs[i].live |= REG_LIVE_DONE;
10651 		if (!(live & REG_LIVE_READ))
10652 			/* since the register is unused, clear its state
10653 			 * to make further comparison simpler
10654 			 */
10655 			__mark_reg_not_init(env, &st->regs[i]);
10656 	}
10657 
10658 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10659 		live = st->stack[i].spilled_ptr.live;
10660 		/* liveness must not touch this stack slot anymore */
10661 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10662 		if (!(live & REG_LIVE_READ)) {
10663 			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
10664 			for (j = 0; j < BPF_REG_SIZE; j++)
10665 				st->stack[i].slot_type[j] = STACK_INVALID;
10666 		}
10667 	}
10668 }
10669 
10670 static void clean_verifier_state(struct bpf_verifier_env *env,
10671 				 struct bpf_verifier_state *st)
10672 {
10673 	int i;
10674 
10675 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
10676 		/* all regs in this state in all frames were already marked */
10677 		return;
10678 
10679 	for (i = 0; i <= st->curframe; i++)
10680 		clean_func_state(env, st->frame[i]);
10681 }
10682 
10683 /* the parentage chains form a tree.
10684  * the verifier states are added to state lists at given insn and
10685  * pushed into state stack for future exploration.
10686  * when the verifier reaches bpf_exit insn some of the verifer states
10687  * stored in the state lists have their final liveness state already,
10688  * but a lot of states will get revised from liveness point of view when
10689  * the verifier explores other branches.
10690  * Example:
10691  * 1: r0 = 1
10692  * 2: if r1 == 100 goto pc+1
10693  * 3: r0 = 2
10694  * 4: exit
10695  * when the verifier reaches exit insn the register r0 in the state list of
10696  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
10697  * of insn 2 and goes exploring further. At the insn 4 it will walk the
10698  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
10699  *
10700  * Since the verifier pushes the branch states as it sees them while exploring
10701  * the program the condition of walking the branch instruction for the second
10702  * time means that all states below this branch were already explored and
10703  * their final liveness marks are already propagated.
10704  * Hence when the verifier completes the search of state list in is_state_visited()
10705  * we can call this clean_live_states() function to mark all liveness states
10706  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
10707  * will not be used.
10708  * This function also clears the registers and stack for states that !READ
10709  * to simplify state merging.
10710  *
10711  * Important note here that walking the same branch instruction in the callee
10712  * doesn't meant that the states are DONE. The verifier has to compare
10713  * the callsites
10714  */
10715 static void clean_live_states(struct bpf_verifier_env *env, int insn,
10716 			      struct bpf_verifier_state *cur)
10717 {
10718 	struct bpf_verifier_state_list *sl;
10719 	int i;
10720 
10721 	sl = *explored_state(env, insn);
10722 	while (sl) {
10723 		if (sl->state.branches)
10724 			goto next;
10725 		if (sl->state.insn_idx != insn ||
10726 		    sl->state.curframe != cur->curframe)
10727 			goto next;
10728 		for (i = 0; i <= cur->curframe; i++)
10729 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
10730 				goto next;
10731 		clean_verifier_state(env, &sl->state);
10732 next:
10733 		sl = sl->next;
10734 	}
10735 }
10736 
10737 /* Returns true if (rold safe implies rcur safe) */
10738 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
10739 		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
10740 {
10741 	bool equal;
10742 
10743 	if (!(rold->live & REG_LIVE_READ))
10744 		/* explored state didn't use this */
10745 		return true;
10746 
10747 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
10748 
10749 	if (rold->type == PTR_TO_STACK)
10750 		/* two stack pointers are equal only if they're pointing to
10751 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
10752 		 */
10753 		return equal && rold->frameno == rcur->frameno;
10754 
10755 	if (equal)
10756 		return true;
10757 
10758 	if (rold->type == NOT_INIT)
10759 		/* explored state can't have used this */
10760 		return true;
10761 	if (rcur->type == NOT_INIT)
10762 		return false;
10763 	switch (base_type(rold->type)) {
10764 	case SCALAR_VALUE:
10765 		if (env->explore_alu_limits)
10766 			return false;
10767 		if (rcur->type == SCALAR_VALUE) {
10768 			if (!rold->precise && !rcur->precise)
10769 				return true;
10770 			/* new val must satisfy old val knowledge */
10771 			return range_within(rold, rcur) &&
10772 			       tnum_in(rold->var_off, rcur->var_off);
10773 		} else {
10774 			/* We're trying to use a pointer in place of a scalar.
10775 			 * Even if the scalar was unbounded, this could lead to
10776 			 * pointer leaks because scalars are allowed to leak
10777 			 * while pointers are not. We could make this safe in
10778 			 * special cases if root is calling us, but it's
10779 			 * probably not worth the hassle.
10780 			 */
10781 			return false;
10782 		}
10783 	case PTR_TO_MAP_KEY:
10784 	case PTR_TO_MAP_VALUE:
10785 		/* a PTR_TO_MAP_VALUE could be safe to use as a
10786 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
10787 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
10788 		 * checked, doing so could have affected others with the same
10789 		 * id, and we can't check for that because we lost the id when
10790 		 * we converted to a PTR_TO_MAP_VALUE.
10791 		 */
10792 		if (type_may_be_null(rold->type)) {
10793 			if (!type_may_be_null(rcur->type))
10794 				return false;
10795 			if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
10796 				return false;
10797 			/* Check our ids match any regs they're supposed to */
10798 			return check_ids(rold->id, rcur->id, idmap);
10799 		}
10800 
10801 		/* If the new min/max/var_off satisfy the old ones and
10802 		 * everything else matches, we are OK.
10803 		 * 'id' is not compared, since it's only used for maps with
10804 		 * bpf_spin_lock inside map element and in such cases if
10805 		 * the rest of the prog is valid for one map element then
10806 		 * it's valid for all map elements regardless of the key
10807 		 * used in bpf_map_lookup()
10808 		 */
10809 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
10810 		       range_within(rold, rcur) &&
10811 		       tnum_in(rold->var_off, rcur->var_off);
10812 	case PTR_TO_PACKET_META:
10813 	case PTR_TO_PACKET:
10814 		if (rcur->type != rold->type)
10815 			return false;
10816 		/* We must have at least as much range as the old ptr
10817 		 * did, so that any accesses which were safe before are
10818 		 * still safe.  This is true even if old range < old off,
10819 		 * since someone could have accessed through (ptr - k), or
10820 		 * even done ptr -= k in a register, to get a safe access.
10821 		 */
10822 		if (rold->range > rcur->range)
10823 			return false;
10824 		/* If the offsets don't match, we can't trust our alignment;
10825 		 * nor can we be sure that we won't fall out of range.
10826 		 */
10827 		if (rold->off != rcur->off)
10828 			return false;
10829 		/* id relations must be preserved */
10830 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
10831 			return false;
10832 		/* new val must satisfy old val knowledge */
10833 		return range_within(rold, rcur) &&
10834 		       tnum_in(rold->var_off, rcur->var_off);
10835 	case PTR_TO_CTX:
10836 	case CONST_PTR_TO_MAP:
10837 	case PTR_TO_PACKET_END:
10838 	case PTR_TO_FLOW_KEYS:
10839 	case PTR_TO_SOCKET:
10840 	case PTR_TO_SOCK_COMMON:
10841 	case PTR_TO_TCP_SOCK:
10842 	case PTR_TO_XDP_SOCK:
10843 		/* Only valid matches are exact, which memcmp() above
10844 		 * would have accepted
10845 		 */
10846 	default:
10847 		/* Don't know what's going on, just say it's not safe */
10848 		return false;
10849 	}
10850 
10851 	/* Shouldn't get here; if we do, say it's not safe */
10852 	WARN_ON_ONCE(1);
10853 	return false;
10854 }
10855 
10856 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
10857 		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
10858 {
10859 	int i, spi;
10860 
10861 	/* walk slots of the explored stack and ignore any additional
10862 	 * slots in the current stack, since explored(safe) state
10863 	 * didn't use them
10864 	 */
10865 	for (i = 0; i < old->allocated_stack; i++) {
10866 		spi = i / BPF_REG_SIZE;
10867 
10868 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
10869 			i += BPF_REG_SIZE - 1;
10870 			/* explored state didn't use this */
10871 			continue;
10872 		}
10873 
10874 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
10875 			continue;
10876 
10877 		/* explored stack has more populated slots than current stack
10878 		 * and these slots were used
10879 		 */
10880 		if (i >= cur->allocated_stack)
10881 			return false;
10882 
10883 		/* if old state was safe with misc data in the stack
10884 		 * it will be safe with zero-initialized stack.
10885 		 * The opposite is not true
10886 		 */
10887 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
10888 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
10889 			continue;
10890 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
10891 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
10892 			/* Ex: old explored (safe) state has STACK_SPILL in
10893 			 * this stack slot, but current has STACK_MISC ->
10894 			 * this verifier states are not equivalent,
10895 			 * return false to continue verification of this path
10896 			 */
10897 			return false;
10898 		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
10899 			continue;
10900 		if (!is_spilled_reg(&old->stack[spi]))
10901 			continue;
10902 		if (!regsafe(env, &old->stack[spi].spilled_ptr,
10903 			     &cur->stack[spi].spilled_ptr, idmap))
10904 			/* when explored and current stack slot are both storing
10905 			 * spilled registers, check that stored pointers types
10906 			 * are the same as well.
10907 			 * Ex: explored safe path could have stored
10908 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
10909 			 * but current path has stored:
10910 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
10911 			 * such verifier states are not equivalent.
10912 			 * return false to continue verification of this path
10913 			 */
10914 			return false;
10915 	}
10916 	return true;
10917 }
10918 
10919 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
10920 {
10921 	if (old->acquired_refs != cur->acquired_refs)
10922 		return false;
10923 	return !memcmp(old->refs, cur->refs,
10924 		       sizeof(*old->refs) * old->acquired_refs);
10925 }
10926 
10927 /* compare two verifier states
10928  *
10929  * all states stored in state_list are known to be valid, since
10930  * verifier reached 'bpf_exit' instruction through them
10931  *
10932  * this function is called when verifier exploring different branches of
10933  * execution popped from the state stack. If it sees an old state that has
10934  * more strict register state and more strict stack state then this execution
10935  * branch doesn't need to be explored further, since verifier already
10936  * concluded that more strict state leads to valid finish.
10937  *
10938  * Therefore two states are equivalent if register state is more conservative
10939  * and explored stack state is more conservative than the current one.
10940  * Example:
10941  *       explored                   current
10942  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
10943  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
10944  *
10945  * In other words if current stack state (one being explored) has more
10946  * valid slots than old one that already passed validation, it means
10947  * the verifier can stop exploring and conclude that current state is valid too
10948  *
10949  * Similarly with registers. If explored state has register type as invalid
10950  * whereas register type in current state is meaningful, it means that
10951  * the current state will reach 'bpf_exit' instruction safely
10952  */
10953 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
10954 			      struct bpf_func_state *cur)
10955 {
10956 	int i;
10957 
10958 	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
10959 	for (i = 0; i < MAX_BPF_REG; i++)
10960 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
10961 			     env->idmap_scratch))
10962 			return false;
10963 
10964 	if (!stacksafe(env, old, cur, env->idmap_scratch))
10965 		return false;
10966 
10967 	if (!refsafe(old, cur))
10968 		return false;
10969 
10970 	return true;
10971 }
10972 
10973 static bool states_equal(struct bpf_verifier_env *env,
10974 			 struct bpf_verifier_state *old,
10975 			 struct bpf_verifier_state *cur)
10976 {
10977 	int i;
10978 
10979 	if (old->curframe != cur->curframe)
10980 		return false;
10981 
10982 	/* Verification state from speculative execution simulation
10983 	 * must never prune a non-speculative execution one.
10984 	 */
10985 	if (old->speculative && !cur->speculative)
10986 		return false;
10987 
10988 	if (old->active_spin_lock != cur->active_spin_lock)
10989 		return false;
10990 
10991 	/* for states to be equal callsites have to be the same
10992 	 * and all frame states need to be equivalent
10993 	 */
10994 	for (i = 0; i <= old->curframe; i++) {
10995 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
10996 			return false;
10997 		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
10998 			return false;
10999 	}
11000 	return true;
11001 }
11002 
11003 /* Return 0 if no propagation happened. Return negative error code if error
11004  * happened. Otherwise, return the propagated bit.
11005  */
11006 static int propagate_liveness_reg(struct bpf_verifier_env *env,
11007 				  struct bpf_reg_state *reg,
11008 				  struct bpf_reg_state *parent_reg)
11009 {
11010 	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
11011 	u8 flag = reg->live & REG_LIVE_READ;
11012 	int err;
11013 
11014 	/* When comes here, read flags of PARENT_REG or REG could be any of
11015 	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
11016 	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
11017 	 */
11018 	if (parent_flag == REG_LIVE_READ64 ||
11019 	    /* Or if there is no read flag from REG. */
11020 	    !flag ||
11021 	    /* Or if the read flag from REG is the same as PARENT_REG. */
11022 	    parent_flag == flag)
11023 		return 0;
11024 
11025 	err = mark_reg_read(env, reg, parent_reg, flag);
11026 	if (err)
11027 		return err;
11028 
11029 	return flag;
11030 }
11031 
11032 /* A write screens off any subsequent reads; but write marks come from the
11033  * straight-line code between a state and its parent.  When we arrive at an
11034  * equivalent state (jump target or such) we didn't arrive by the straight-line
11035  * code, so read marks in the state must propagate to the parent regardless
11036  * of the state's write marks. That's what 'parent == state->parent' comparison
11037  * in mark_reg_read() is for.
11038  */
11039 static int propagate_liveness(struct bpf_verifier_env *env,
11040 			      const struct bpf_verifier_state *vstate,
11041 			      struct bpf_verifier_state *vparent)
11042 {
11043 	struct bpf_reg_state *state_reg, *parent_reg;
11044 	struct bpf_func_state *state, *parent;
11045 	int i, frame, err = 0;
11046 
11047 	if (vparent->curframe != vstate->curframe) {
11048 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
11049 		     vparent->curframe, vstate->curframe);
11050 		return -EFAULT;
11051 	}
11052 	/* Propagate read liveness of registers... */
11053 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
11054 	for (frame = 0; frame <= vstate->curframe; frame++) {
11055 		parent = vparent->frame[frame];
11056 		state = vstate->frame[frame];
11057 		parent_reg = parent->regs;
11058 		state_reg = state->regs;
11059 		/* We don't need to worry about FP liveness, it's read-only */
11060 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
11061 			err = propagate_liveness_reg(env, &state_reg[i],
11062 						     &parent_reg[i]);
11063 			if (err < 0)
11064 				return err;
11065 			if (err == REG_LIVE_READ64)
11066 				mark_insn_zext(env, &parent_reg[i]);
11067 		}
11068 
11069 		/* Propagate stack slots. */
11070 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
11071 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
11072 			parent_reg = &parent->stack[i].spilled_ptr;
11073 			state_reg = &state->stack[i].spilled_ptr;
11074 			err = propagate_liveness_reg(env, state_reg,
11075 						     parent_reg);
11076 			if (err < 0)
11077 				return err;
11078 		}
11079 	}
11080 	return 0;
11081 }
11082 
11083 /* find precise scalars in the previous equivalent state and
11084  * propagate them into the current state
11085  */
11086 static int propagate_precision(struct bpf_verifier_env *env,
11087 			       const struct bpf_verifier_state *old)
11088 {
11089 	struct bpf_reg_state *state_reg;
11090 	struct bpf_func_state *state;
11091 	int i, err = 0;
11092 
11093 	state = old->frame[old->curframe];
11094 	state_reg = state->regs;
11095 	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
11096 		if (state_reg->type != SCALAR_VALUE ||
11097 		    !state_reg->precise)
11098 			continue;
11099 		if (env->log.level & BPF_LOG_LEVEL2)
11100 			verbose(env, "propagating r%d\n", i);
11101 		err = mark_chain_precision(env, i);
11102 		if (err < 0)
11103 			return err;
11104 	}
11105 
11106 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
11107 		if (!is_spilled_reg(&state->stack[i]))
11108 			continue;
11109 		state_reg = &state->stack[i].spilled_ptr;
11110 		if (state_reg->type != SCALAR_VALUE ||
11111 		    !state_reg->precise)
11112 			continue;
11113 		if (env->log.level & BPF_LOG_LEVEL2)
11114 			verbose(env, "propagating fp%d\n",
11115 				(-i - 1) * BPF_REG_SIZE);
11116 		err = mark_chain_precision_stack(env, i);
11117 		if (err < 0)
11118 			return err;
11119 	}
11120 	return 0;
11121 }
11122 
11123 static bool states_maybe_looping(struct bpf_verifier_state *old,
11124 				 struct bpf_verifier_state *cur)
11125 {
11126 	struct bpf_func_state *fold, *fcur;
11127 	int i, fr = cur->curframe;
11128 
11129 	if (old->curframe != fr)
11130 		return false;
11131 
11132 	fold = old->frame[fr];
11133 	fcur = cur->frame[fr];
11134 	for (i = 0; i < MAX_BPF_REG; i++)
11135 		if (memcmp(&fold->regs[i], &fcur->regs[i],
11136 			   offsetof(struct bpf_reg_state, parent)))
11137 			return false;
11138 	return true;
11139 }
11140 
11141 
11142 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
11143 {
11144 	struct bpf_verifier_state_list *new_sl;
11145 	struct bpf_verifier_state_list *sl, **pprev;
11146 	struct bpf_verifier_state *cur = env->cur_state, *new;
11147 	int i, j, err, states_cnt = 0;
11148 	bool add_new_state = env->test_state_freq ? true : false;
11149 
11150 	cur->last_insn_idx = env->prev_insn_idx;
11151 	if (!env->insn_aux_data[insn_idx].prune_point)
11152 		/* this 'insn_idx' instruction wasn't marked, so we will not
11153 		 * be doing state search here
11154 		 */
11155 		return 0;
11156 
11157 	/* bpf progs typically have pruning point every 4 instructions
11158 	 * http://vger.kernel.org/bpfconf2019.html#session-1
11159 	 * Do not add new state for future pruning if the verifier hasn't seen
11160 	 * at least 2 jumps and at least 8 instructions.
11161 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
11162 	 * In tests that amounts to up to 50% reduction into total verifier
11163 	 * memory consumption and 20% verifier time speedup.
11164 	 */
11165 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
11166 	    env->insn_processed - env->prev_insn_processed >= 8)
11167 		add_new_state = true;
11168 
11169 	pprev = explored_state(env, insn_idx);
11170 	sl = *pprev;
11171 
11172 	clean_live_states(env, insn_idx, cur);
11173 
11174 	while (sl) {
11175 		states_cnt++;
11176 		if (sl->state.insn_idx != insn_idx)
11177 			goto next;
11178 
11179 		if (sl->state.branches) {
11180 			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
11181 
11182 			if (frame->in_async_callback_fn &&
11183 			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
11184 				/* Different async_entry_cnt means that the verifier is
11185 				 * processing another entry into async callback.
11186 				 * Seeing the same state is not an indication of infinite
11187 				 * loop or infinite recursion.
11188 				 * But finding the same state doesn't mean that it's safe
11189 				 * to stop processing the current state. The previous state
11190 				 * hasn't yet reached bpf_exit, since state.branches > 0.
11191 				 * Checking in_async_callback_fn alone is not enough either.
11192 				 * Since the verifier still needs to catch infinite loops
11193 				 * inside async callbacks.
11194 				 */
11195 			} else if (states_maybe_looping(&sl->state, cur) &&
11196 				   states_equal(env, &sl->state, cur)) {
11197 				verbose_linfo(env, insn_idx, "; ");
11198 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
11199 				return -EINVAL;
11200 			}
11201 			/* if the verifier is processing a loop, avoid adding new state
11202 			 * too often, since different loop iterations have distinct
11203 			 * states and may not help future pruning.
11204 			 * This threshold shouldn't be too low to make sure that
11205 			 * a loop with large bound will be rejected quickly.
11206 			 * The most abusive loop will be:
11207 			 * r1 += 1
11208 			 * if r1 < 1000000 goto pc-2
11209 			 * 1M insn_procssed limit / 100 == 10k peak states.
11210 			 * This threshold shouldn't be too high either, since states
11211 			 * at the end of the loop are likely to be useful in pruning.
11212 			 */
11213 			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
11214 			    env->insn_processed - env->prev_insn_processed < 100)
11215 				add_new_state = false;
11216 			goto miss;
11217 		}
11218 		if (states_equal(env, &sl->state, cur)) {
11219 			sl->hit_cnt++;
11220 			/* reached equivalent register/stack state,
11221 			 * prune the search.
11222 			 * Registers read by the continuation are read by us.
11223 			 * If we have any write marks in env->cur_state, they
11224 			 * will prevent corresponding reads in the continuation
11225 			 * from reaching our parent (an explored_state).  Our
11226 			 * own state will get the read marks recorded, but
11227 			 * they'll be immediately forgotten as we're pruning
11228 			 * this state and will pop a new one.
11229 			 */
11230 			err = propagate_liveness(env, &sl->state, cur);
11231 
11232 			/* if previous state reached the exit with precision and
11233 			 * current state is equivalent to it (except precsion marks)
11234 			 * the precision needs to be propagated back in
11235 			 * the current state.
11236 			 */
11237 			err = err ? : push_jmp_history(env, cur);
11238 			err = err ? : propagate_precision(env, &sl->state);
11239 			if (err)
11240 				return err;
11241 			return 1;
11242 		}
11243 miss:
11244 		/* when new state is not going to be added do not increase miss count.
11245 		 * Otherwise several loop iterations will remove the state
11246 		 * recorded earlier. The goal of these heuristics is to have
11247 		 * states from some iterations of the loop (some in the beginning
11248 		 * and some at the end) to help pruning.
11249 		 */
11250 		if (add_new_state)
11251 			sl->miss_cnt++;
11252 		/* heuristic to determine whether this state is beneficial
11253 		 * to keep checking from state equivalence point of view.
11254 		 * Higher numbers increase max_states_per_insn and verification time,
11255 		 * but do not meaningfully decrease insn_processed.
11256 		 */
11257 		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
11258 			/* the state is unlikely to be useful. Remove it to
11259 			 * speed up verification
11260 			 */
11261 			*pprev = sl->next;
11262 			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
11263 				u32 br = sl->state.branches;
11264 
11265 				WARN_ONCE(br,
11266 					  "BUG live_done but branches_to_explore %d\n",
11267 					  br);
11268 				free_verifier_state(&sl->state, false);
11269 				kfree(sl);
11270 				env->peak_states--;
11271 			} else {
11272 				/* cannot free this state, since parentage chain may
11273 				 * walk it later. Add it for free_list instead to
11274 				 * be freed at the end of verification
11275 				 */
11276 				sl->next = env->free_list;
11277 				env->free_list = sl;
11278 			}
11279 			sl = *pprev;
11280 			continue;
11281 		}
11282 next:
11283 		pprev = &sl->next;
11284 		sl = *pprev;
11285 	}
11286 
11287 	if (env->max_states_per_insn < states_cnt)
11288 		env->max_states_per_insn = states_cnt;
11289 
11290 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
11291 		return push_jmp_history(env, cur);
11292 
11293 	if (!add_new_state)
11294 		return push_jmp_history(env, cur);
11295 
11296 	/* There were no equivalent states, remember the current one.
11297 	 * Technically the current state is not proven to be safe yet,
11298 	 * but it will either reach outer most bpf_exit (which means it's safe)
11299 	 * or it will be rejected. When there are no loops the verifier won't be
11300 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
11301 	 * again on the way to bpf_exit.
11302 	 * When looping the sl->state.branches will be > 0 and this state
11303 	 * will not be considered for equivalence until branches == 0.
11304 	 */
11305 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
11306 	if (!new_sl)
11307 		return -ENOMEM;
11308 	env->total_states++;
11309 	env->peak_states++;
11310 	env->prev_jmps_processed = env->jmps_processed;
11311 	env->prev_insn_processed = env->insn_processed;
11312 
11313 	/* add new state to the head of linked list */
11314 	new = &new_sl->state;
11315 	err = copy_verifier_state(new, cur);
11316 	if (err) {
11317 		free_verifier_state(new, false);
11318 		kfree(new_sl);
11319 		return err;
11320 	}
11321 	new->insn_idx = insn_idx;
11322 	WARN_ONCE(new->branches != 1,
11323 		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
11324 
11325 	cur->parent = new;
11326 	cur->first_insn_idx = insn_idx;
11327 	clear_jmp_history(cur);
11328 	new_sl->next = *explored_state(env, insn_idx);
11329 	*explored_state(env, insn_idx) = new_sl;
11330 	/* connect new state to parentage chain. Current frame needs all
11331 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
11332 	 * to the stack implicitly by JITs) so in callers' frames connect just
11333 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
11334 	 * the state of the call instruction (with WRITTEN set), and r0 comes
11335 	 * from callee with its full parentage chain, anyway.
11336 	 */
11337 	/* clear write marks in current state: the writes we did are not writes
11338 	 * our child did, so they don't screen off its reads from us.
11339 	 * (There are no read marks in current state, because reads always mark
11340 	 * their parent and current state never has children yet.  Only
11341 	 * explored_states can get read marks.)
11342 	 */
11343 	for (j = 0; j <= cur->curframe; j++) {
11344 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11345 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11346 		for (i = 0; i < BPF_REG_FP; i++)
11347 			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11348 	}
11349 
11350 	/* all stack frames are accessible from callee, clear them all */
11351 	for (j = 0; j <= cur->curframe; j++) {
11352 		struct bpf_func_state *frame = cur->frame[j];
11353 		struct bpf_func_state *newframe = new->frame[j];
11354 
11355 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
11356 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
11357 			frame->stack[i].spilled_ptr.parent =
11358 						&newframe->stack[i].spilled_ptr;
11359 		}
11360 	}
11361 	return 0;
11362 }
11363 
11364 /* Return true if it's OK to have the same insn return a different type. */
11365 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11366 {
11367 	switch (base_type(type)) {
11368 	case PTR_TO_CTX:
11369 	case PTR_TO_SOCKET:
11370 	case PTR_TO_SOCK_COMMON:
11371 	case PTR_TO_TCP_SOCK:
11372 	case PTR_TO_XDP_SOCK:
11373 	case PTR_TO_BTF_ID:
11374 		return false;
11375 	default:
11376 		return true;
11377 	}
11378 }
11379 
11380 /* If an instruction was previously used with particular pointer types, then we
11381  * need to be careful to avoid cases such as the below, where it may be ok
11382  * for one branch accessing the pointer, but not ok for the other branch:
11383  *
11384  * R1 = sock_ptr
11385  * goto X;
11386  * ...
11387  * R1 = some_other_valid_ptr;
11388  * goto X;
11389  * ...
11390  * R2 = *(u32 *)(R1 + 0);
11391  */
11392 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11393 {
11394 	return src != prev && (!reg_type_mismatch_ok(src) ||
11395 			       !reg_type_mismatch_ok(prev));
11396 }
11397 
11398 static int do_check(struct bpf_verifier_env *env)
11399 {
11400 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11401 	struct bpf_verifier_state *state = env->cur_state;
11402 	struct bpf_insn *insns = env->prog->insnsi;
11403 	struct bpf_reg_state *regs;
11404 	int insn_cnt = env->prog->len;
11405 	bool do_print_state = false;
11406 	int prev_insn_idx = -1;
11407 
11408 	for (;;) {
11409 		struct bpf_insn *insn;
11410 		u8 class;
11411 		int err;
11412 
11413 		env->prev_insn_idx = prev_insn_idx;
11414 		if (env->insn_idx >= insn_cnt) {
11415 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
11416 				env->insn_idx, insn_cnt);
11417 			return -EFAULT;
11418 		}
11419 
11420 		insn = &insns[env->insn_idx];
11421 		class = BPF_CLASS(insn->code);
11422 
11423 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
11424 			verbose(env,
11425 				"BPF program is too large. Processed %d insn\n",
11426 				env->insn_processed);
11427 			return -E2BIG;
11428 		}
11429 
11430 		err = is_state_visited(env, env->insn_idx);
11431 		if (err < 0)
11432 			return err;
11433 		if (err == 1) {
11434 			/* found equivalent state, can prune the search */
11435 			if (env->log.level & BPF_LOG_LEVEL) {
11436 				if (do_print_state)
11437 					verbose(env, "\nfrom %d to %d%s: safe\n",
11438 						env->prev_insn_idx, env->insn_idx,
11439 						env->cur_state->speculative ?
11440 						" (speculative execution)" : "");
11441 				else
11442 					verbose(env, "%d: safe\n", env->insn_idx);
11443 			}
11444 			goto process_bpf_exit;
11445 		}
11446 
11447 		if (signal_pending(current))
11448 			return -EAGAIN;
11449 
11450 		if (need_resched())
11451 			cond_resched();
11452 
11453 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
11454 			verbose(env, "\nfrom %d to %d%s:",
11455 				env->prev_insn_idx, env->insn_idx,
11456 				env->cur_state->speculative ?
11457 				" (speculative execution)" : "");
11458 			print_verifier_state(env, state->frame[state->curframe], true);
11459 			do_print_state = false;
11460 		}
11461 
11462 		if (env->log.level & BPF_LOG_LEVEL) {
11463 			const struct bpf_insn_cbs cbs = {
11464 				.cb_call	= disasm_kfunc_name,
11465 				.cb_print	= verbose,
11466 				.private_data	= env,
11467 			};
11468 
11469 			if (verifier_state_scratched(env))
11470 				print_insn_state(env, state->frame[state->curframe]);
11471 
11472 			verbose_linfo(env, env->insn_idx, "; ");
11473 			env->prev_log_len = env->log.len_used;
11474 			verbose(env, "%d: ", env->insn_idx);
11475 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
11476 			env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
11477 			env->prev_log_len = env->log.len_used;
11478 		}
11479 
11480 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
11481 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11482 							   env->prev_insn_idx);
11483 			if (err)
11484 				return err;
11485 		}
11486 
11487 		regs = cur_regs(env);
11488 		sanitize_mark_insn_seen(env);
11489 		prev_insn_idx = env->insn_idx;
11490 
11491 		if (class == BPF_ALU || class == BPF_ALU64) {
11492 			err = check_alu_op(env, insn);
11493 			if (err)
11494 				return err;
11495 
11496 		} else if (class == BPF_LDX) {
11497 			enum bpf_reg_type *prev_src_type, src_reg_type;
11498 
11499 			/* check for reserved fields is already done */
11500 
11501 			/* check src operand */
11502 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
11503 			if (err)
11504 				return err;
11505 
11506 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11507 			if (err)
11508 				return err;
11509 
11510 			src_reg_type = regs[insn->src_reg].type;
11511 
11512 			/* check that memory (src_reg + off) is readable,
11513 			 * the state of dst_reg will be updated by this func
11514 			 */
11515 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
11516 					       insn->off, BPF_SIZE(insn->code),
11517 					       BPF_READ, insn->dst_reg, false);
11518 			if (err)
11519 				return err;
11520 
11521 			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11522 
11523 			if (*prev_src_type == NOT_INIT) {
11524 				/* saw a valid insn
11525 				 * dst_reg = *(u32 *)(src_reg + off)
11526 				 * save type to validate intersecting paths
11527 				 */
11528 				*prev_src_type = src_reg_type;
11529 
11530 			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
11531 				/* ABuser program is trying to use the same insn
11532 				 * dst_reg = *(u32*) (src_reg + off)
11533 				 * with different pointer types:
11534 				 * src_reg == ctx in one branch and
11535 				 * src_reg == stack|map in some other branch.
11536 				 * Reject it.
11537 				 */
11538 				verbose(env, "same insn cannot be used with different pointers\n");
11539 				return -EINVAL;
11540 			}
11541 
11542 		} else if (class == BPF_STX) {
11543 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
11544 
11545 			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11546 				err = check_atomic(env, env->insn_idx, insn);
11547 				if (err)
11548 					return err;
11549 				env->insn_idx++;
11550 				continue;
11551 			}
11552 
11553 			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11554 				verbose(env, "BPF_STX uses reserved fields\n");
11555 				return -EINVAL;
11556 			}
11557 
11558 			/* check src1 operand */
11559 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
11560 			if (err)
11561 				return err;
11562 			/* check src2 operand */
11563 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11564 			if (err)
11565 				return err;
11566 
11567 			dst_reg_type = regs[insn->dst_reg].type;
11568 
11569 			/* check that memory (dst_reg + off) is writeable */
11570 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11571 					       insn->off, BPF_SIZE(insn->code),
11572 					       BPF_WRITE, insn->src_reg, false);
11573 			if (err)
11574 				return err;
11575 
11576 			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11577 
11578 			if (*prev_dst_type == NOT_INIT) {
11579 				*prev_dst_type = dst_reg_type;
11580 			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
11581 				verbose(env, "same insn cannot be used with different pointers\n");
11582 				return -EINVAL;
11583 			}
11584 
11585 		} else if (class == BPF_ST) {
11586 			if (BPF_MODE(insn->code) != BPF_MEM ||
11587 			    insn->src_reg != BPF_REG_0) {
11588 				verbose(env, "BPF_ST uses reserved fields\n");
11589 				return -EINVAL;
11590 			}
11591 			/* check src operand */
11592 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11593 			if (err)
11594 				return err;
11595 
11596 			if (is_ctx_reg(env, insn->dst_reg)) {
11597 				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
11598 					insn->dst_reg,
11599 					reg_type_str(env, reg_state(env, insn->dst_reg)->type));
11600 				return -EACCES;
11601 			}
11602 
11603 			/* check that memory (dst_reg + off) is writeable */
11604 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11605 					       insn->off, BPF_SIZE(insn->code),
11606 					       BPF_WRITE, -1, false);
11607 			if (err)
11608 				return err;
11609 
11610 		} else if (class == BPF_JMP || class == BPF_JMP32) {
11611 			u8 opcode = BPF_OP(insn->code);
11612 
11613 			env->jmps_processed++;
11614 			if (opcode == BPF_CALL) {
11615 				if (BPF_SRC(insn->code) != BPF_K ||
11616 				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11617 				     && insn->off != 0) ||
11618 				    (insn->src_reg != BPF_REG_0 &&
11619 				     insn->src_reg != BPF_PSEUDO_CALL &&
11620 				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
11621 				    insn->dst_reg != BPF_REG_0 ||
11622 				    class == BPF_JMP32) {
11623 					verbose(env, "BPF_CALL uses reserved fields\n");
11624 					return -EINVAL;
11625 				}
11626 
11627 				if (env->cur_state->active_spin_lock &&
11628 				    (insn->src_reg == BPF_PSEUDO_CALL ||
11629 				     insn->imm != BPF_FUNC_spin_unlock)) {
11630 					verbose(env, "function calls are not allowed while holding a lock\n");
11631 					return -EINVAL;
11632 				}
11633 				if (insn->src_reg == BPF_PSEUDO_CALL)
11634 					err = check_func_call(env, insn, &env->insn_idx);
11635 				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
11636 					err = check_kfunc_call(env, insn, &env->insn_idx);
11637 				else
11638 					err = check_helper_call(env, insn, &env->insn_idx);
11639 				if (err)
11640 					return err;
11641 			} else if (opcode == BPF_JA) {
11642 				if (BPF_SRC(insn->code) != BPF_K ||
11643 				    insn->imm != 0 ||
11644 				    insn->src_reg != BPF_REG_0 ||
11645 				    insn->dst_reg != BPF_REG_0 ||
11646 				    class == BPF_JMP32) {
11647 					verbose(env, "BPF_JA uses reserved fields\n");
11648 					return -EINVAL;
11649 				}
11650 
11651 				env->insn_idx += insn->off + 1;
11652 				continue;
11653 
11654 			} else if (opcode == BPF_EXIT) {
11655 				if (BPF_SRC(insn->code) != BPF_K ||
11656 				    insn->imm != 0 ||
11657 				    insn->src_reg != BPF_REG_0 ||
11658 				    insn->dst_reg != BPF_REG_0 ||
11659 				    class == BPF_JMP32) {
11660 					verbose(env, "BPF_EXIT uses reserved fields\n");
11661 					return -EINVAL;
11662 				}
11663 
11664 				if (env->cur_state->active_spin_lock) {
11665 					verbose(env, "bpf_spin_unlock is missing\n");
11666 					return -EINVAL;
11667 				}
11668 
11669 				if (state->curframe) {
11670 					/* exit from nested function */
11671 					err = prepare_func_exit(env, &env->insn_idx);
11672 					if (err)
11673 						return err;
11674 					do_print_state = true;
11675 					continue;
11676 				}
11677 
11678 				err = check_reference_leak(env);
11679 				if (err)
11680 					return err;
11681 
11682 				err = check_return_code(env);
11683 				if (err)
11684 					return err;
11685 process_bpf_exit:
11686 				mark_verifier_state_scratched(env);
11687 				update_branch_counts(env, env->cur_state);
11688 				err = pop_stack(env, &prev_insn_idx,
11689 						&env->insn_idx, pop_log);
11690 				if (err < 0) {
11691 					if (err != -ENOENT)
11692 						return err;
11693 					break;
11694 				} else {
11695 					do_print_state = true;
11696 					continue;
11697 				}
11698 			} else {
11699 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
11700 				if (err)
11701 					return err;
11702 			}
11703 		} else if (class == BPF_LD) {
11704 			u8 mode = BPF_MODE(insn->code);
11705 
11706 			if (mode == BPF_ABS || mode == BPF_IND) {
11707 				err = check_ld_abs(env, insn);
11708 				if (err)
11709 					return err;
11710 
11711 			} else if (mode == BPF_IMM) {
11712 				err = check_ld_imm(env, insn);
11713 				if (err)
11714 					return err;
11715 
11716 				env->insn_idx++;
11717 				sanitize_mark_insn_seen(env);
11718 			} else {
11719 				verbose(env, "invalid BPF_LD mode\n");
11720 				return -EINVAL;
11721 			}
11722 		} else {
11723 			verbose(env, "unknown insn class %d\n", class);
11724 			return -EINVAL;
11725 		}
11726 
11727 		env->insn_idx++;
11728 	}
11729 
11730 	return 0;
11731 }
11732 
11733 static int find_btf_percpu_datasec(struct btf *btf)
11734 {
11735 	const struct btf_type *t;
11736 	const char *tname;
11737 	int i, n;
11738 
11739 	/*
11740 	 * Both vmlinux and module each have their own ".data..percpu"
11741 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
11742 	 * types to look at only module's own BTF types.
11743 	 */
11744 	n = btf_nr_types(btf);
11745 	if (btf_is_module(btf))
11746 		i = btf_nr_types(btf_vmlinux);
11747 	else
11748 		i = 1;
11749 
11750 	for(; i < n; i++) {
11751 		t = btf_type_by_id(btf, i);
11752 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
11753 			continue;
11754 
11755 		tname = btf_name_by_offset(btf, t->name_off);
11756 		if (!strcmp(tname, ".data..percpu"))
11757 			return i;
11758 	}
11759 
11760 	return -ENOENT;
11761 }
11762 
11763 /* replace pseudo btf_id with kernel symbol address */
11764 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
11765 			       struct bpf_insn *insn,
11766 			       struct bpf_insn_aux_data *aux)
11767 {
11768 	const struct btf_var_secinfo *vsi;
11769 	const struct btf_type *datasec;
11770 	struct btf_mod_pair *btf_mod;
11771 	const struct btf_type *t;
11772 	const char *sym_name;
11773 	bool percpu = false;
11774 	u32 type, id = insn->imm;
11775 	struct btf *btf;
11776 	s32 datasec_id;
11777 	u64 addr;
11778 	int i, btf_fd, err;
11779 
11780 	btf_fd = insn[1].imm;
11781 	if (btf_fd) {
11782 		btf = btf_get_by_fd(btf_fd);
11783 		if (IS_ERR(btf)) {
11784 			verbose(env, "invalid module BTF object FD specified.\n");
11785 			return -EINVAL;
11786 		}
11787 	} else {
11788 		if (!btf_vmlinux) {
11789 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
11790 			return -EINVAL;
11791 		}
11792 		btf = btf_vmlinux;
11793 		btf_get(btf);
11794 	}
11795 
11796 	t = btf_type_by_id(btf, id);
11797 	if (!t) {
11798 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
11799 		err = -ENOENT;
11800 		goto err_put;
11801 	}
11802 
11803 	if (!btf_type_is_var(t)) {
11804 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
11805 		err = -EINVAL;
11806 		goto err_put;
11807 	}
11808 
11809 	sym_name = btf_name_by_offset(btf, t->name_off);
11810 	addr = kallsyms_lookup_name(sym_name);
11811 	if (!addr) {
11812 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
11813 			sym_name);
11814 		err = -ENOENT;
11815 		goto err_put;
11816 	}
11817 
11818 	datasec_id = find_btf_percpu_datasec(btf);
11819 	if (datasec_id > 0) {
11820 		datasec = btf_type_by_id(btf, datasec_id);
11821 		for_each_vsi(i, datasec, vsi) {
11822 			if (vsi->type == id) {
11823 				percpu = true;
11824 				break;
11825 			}
11826 		}
11827 	}
11828 
11829 	insn[0].imm = (u32)addr;
11830 	insn[1].imm = addr >> 32;
11831 
11832 	type = t->type;
11833 	t = btf_type_skip_modifiers(btf, type, NULL);
11834 	if (percpu) {
11835 		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
11836 		aux->btf_var.btf = btf;
11837 		aux->btf_var.btf_id = type;
11838 	} else if (!btf_type_is_struct(t)) {
11839 		const struct btf_type *ret;
11840 		const char *tname;
11841 		u32 tsize;
11842 
11843 		/* resolve the type size of ksym. */
11844 		ret = btf_resolve_size(btf, t, &tsize);
11845 		if (IS_ERR(ret)) {
11846 			tname = btf_name_by_offset(btf, t->name_off);
11847 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
11848 				tname, PTR_ERR(ret));
11849 			err = -EINVAL;
11850 			goto err_put;
11851 		}
11852 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
11853 		aux->btf_var.mem_size = tsize;
11854 	} else {
11855 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
11856 		aux->btf_var.btf = btf;
11857 		aux->btf_var.btf_id = type;
11858 	}
11859 
11860 	/* check whether we recorded this BTF (and maybe module) already */
11861 	for (i = 0; i < env->used_btf_cnt; i++) {
11862 		if (env->used_btfs[i].btf == btf) {
11863 			btf_put(btf);
11864 			return 0;
11865 		}
11866 	}
11867 
11868 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
11869 		err = -E2BIG;
11870 		goto err_put;
11871 	}
11872 
11873 	btf_mod = &env->used_btfs[env->used_btf_cnt];
11874 	btf_mod->btf = btf;
11875 	btf_mod->module = NULL;
11876 
11877 	/* if we reference variables from kernel module, bump its refcount */
11878 	if (btf_is_module(btf)) {
11879 		btf_mod->module = btf_try_get_module(btf);
11880 		if (!btf_mod->module) {
11881 			err = -ENXIO;
11882 			goto err_put;
11883 		}
11884 	}
11885 
11886 	env->used_btf_cnt++;
11887 
11888 	return 0;
11889 err_put:
11890 	btf_put(btf);
11891 	return err;
11892 }
11893 
11894 static int check_map_prealloc(struct bpf_map *map)
11895 {
11896 	return (map->map_type != BPF_MAP_TYPE_HASH &&
11897 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
11898 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
11899 		!(map->map_flags & BPF_F_NO_PREALLOC);
11900 }
11901 
11902 static bool is_tracing_prog_type(enum bpf_prog_type type)
11903 {
11904 	switch (type) {
11905 	case BPF_PROG_TYPE_KPROBE:
11906 	case BPF_PROG_TYPE_TRACEPOINT:
11907 	case BPF_PROG_TYPE_PERF_EVENT:
11908 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
11909 		return true;
11910 	default:
11911 		return false;
11912 	}
11913 }
11914 
11915 static bool is_preallocated_map(struct bpf_map *map)
11916 {
11917 	if (!check_map_prealloc(map))
11918 		return false;
11919 	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
11920 		return false;
11921 	return true;
11922 }
11923 
11924 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
11925 					struct bpf_map *map,
11926 					struct bpf_prog *prog)
11927 
11928 {
11929 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
11930 	/*
11931 	 * Validate that trace type programs use preallocated hash maps.
11932 	 *
11933 	 * For programs attached to PERF events this is mandatory as the
11934 	 * perf NMI can hit any arbitrary code sequence.
11935 	 *
11936 	 * All other trace types using preallocated hash maps are unsafe as
11937 	 * well because tracepoint or kprobes can be inside locked regions
11938 	 * of the memory allocator or at a place where a recursion into the
11939 	 * memory allocator would see inconsistent state.
11940 	 *
11941 	 * On RT enabled kernels run-time allocation of all trace type
11942 	 * programs is strictly prohibited due to lock type constraints. On
11943 	 * !RT kernels it is allowed for backwards compatibility reasons for
11944 	 * now, but warnings are emitted so developers are made aware of
11945 	 * the unsafety and can fix their programs before this is enforced.
11946 	 */
11947 	if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
11948 		if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
11949 			verbose(env, "perf_event programs can only use preallocated hash map\n");
11950 			return -EINVAL;
11951 		}
11952 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
11953 			verbose(env, "trace type programs can only use preallocated hash map\n");
11954 			return -EINVAL;
11955 		}
11956 		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
11957 		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
11958 	}
11959 
11960 	if (map_value_has_spin_lock(map)) {
11961 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
11962 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
11963 			return -EINVAL;
11964 		}
11965 
11966 		if (is_tracing_prog_type(prog_type)) {
11967 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
11968 			return -EINVAL;
11969 		}
11970 
11971 		if (prog->aux->sleepable) {
11972 			verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
11973 			return -EINVAL;
11974 		}
11975 	}
11976 
11977 	if (map_value_has_timer(map)) {
11978 		if (is_tracing_prog_type(prog_type)) {
11979 			verbose(env, "tracing progs cannot use bpf_timer yet\n");
11980 			return -EINVAL;
11981 		}
11982 	}
11983 
11984 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
11985 	    !bpf_offload_prog_map_match(prog, map)) {
11986 		verbose(env, "offload device mismatch between prog and map\n");
11987 		return -EINVAL;
11988 	}
11989 
11990 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
11991 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
11992 		return -EINVAL;
11993 	}
11994 
11995 	if (prog->aux->sleepable)
11996 		switch (map->map_type) {
11997 		case BPF_MAP_TYPE_HASH:
11998 		case BPF_MAP_TYPE_LRU_HASH:
11999 		case BPF_MAP_TYPE_ARRAY:
12000 		case BPF_MAP_TYPE_PERCPU_HASH:
12001 		case BPF_MAP_TYPE_PERCPU_ARRAY:
12002 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
12003 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
12004 		case BPF_MAP_TYPE_HASH_OF_MAPS:
12005 			if (!is_preallocated_map(map)) {
12006 				verbose(env,
12007 					"Sleepable programs can only use preallocated maps\n");
12008 				return -EINVAL;
12009 			}
12010 			break;
12011 		case BPF_MAP_TYPE_RINGBUF:
12012 		case BPF_MAP_TYPE_INODE_STORAGE:
12013 		case BPF_MAP_TYPE_SK_STORAGE:
12014 		case BPF_MAP_TYPE_TASK_STORAGE:
12015 			break;
12016 		default:
12017 			verbose(env,
12018 				"Sleepable programs can only use array, hash, and ringbuf maps\n");
12019 			return -EINVAL;
12020 		}
12021 
12022 	return 0;
12023 }
12024 
12025 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
12026 {
12027 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
12028 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
12029 }
12030 
12031 /* find and rewrite pseudo imm in ld_imm64 instructions:
12032  *
12033  * 1. if it accesses map FD, replace it with actual map pointer.
12034  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
12035  *
12036  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
12037  */
12038 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
12039 {
12040 	struct bpf_insn *insn = env->prog->insnsi;
12041 	int insn_cnt = env->prog->len;
12042 	int i, j, err;
12043 
12044 	err = bpf_prog_calc_tag(env->prog);
12045 	if (err)
12046 		return err;
12047 
12048 	for (i = 0; i < insn_cnt; i++, insn++) {
12049 		if (BPF_CLASS(insn->code) == BPF_LDX &&
12050 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
12051 			verbose(env, "BPF_LDX uses reserved fields\n");
12052 			return -EINVAL;
12053 		}
12054 
12055 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
12056 			struct bpf_insn_aux_data *aux;
12057 			struct bpf_map *map;
12058 			struct fd f;
12059 			u64 addr;
12060 			u32 fd;
12061 
12062 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
12063 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
12064 			    insn[1].off != 0) {
12065 				verbose(env, "invalid bpf_ld_imm64 insn\n");
12066 				return -EINVAL;
12067 			}
12068 
12069 			if (insn[0].src_reg == 0)
12070 				/* valid generic load 64-bit imm */
12071 				goto next_insn;
12072 
12073 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
12074 				aux = &env->insn_aux_data[i];
12075 				err = check_pseudo_btf_id(env, insn, aux);
12076 				if (err)
12077 					return err;
12078 				goto next_insn;
12079 			}
12080 
12081 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
12082 				aux = &env->insn_aux_data[i];
12083 				aux->ptr_type = PTR_TO_FUNC;
12084 				goto next_insn;
12085 			}
12086 
12087 			/* In final convert_pseudo_ld_imm64() step, this is
12088 			 * converted into regular 64-bit imm load insn.
12089 			 */
12090 			switch (insn[0].src_reg) {
12091 			case BPF_PSEUDO_MAP_VALUE:
12092 			case BPF_PSEUDO_MAP_IDX_VALUE:
12093 				break;
12094 			case BPF_PSEUDO_MAP_FD:
12095 			case BPF_PSEUDO_MAP_IDX:
12096 				if (insn[1].imm == 0)
12097 					break;
12098 				fallthrough;
12099 			default:
12100 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
12101 				return -EINVAL;
12102 			}
12103 
12104 			switch (insn[0].src_reg) {
12105 			case BPF_PSEUDO_MAP_IDX_VALUE:
12106 			case BPF_PSEUDO_MAP_IDX:
12107 				if (bpfptr_is_null(env->fd_array)) {
12108 					verbose(env, "fd_idx without fd_array is invalid\n");
12109 					return -EPROTO;
12110 				}
12111 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
12112 							    insn[0].imm * sizeof(fd),
12113 							    sizeof(fd)))
12114 					return -EFAULT;
12115 				break;
12116 			default:
12117 				fd = insn[0].imm;
12118 				break;
12119 			}
12120 
12121 			f = fdget(fd);
12122 			map = __bpf_map_get(f);
12123 			if (IS_ERR(map)) {
12124 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
12125 					insn[0].imm);
12126 				return PTR_ERR(map);
12127 			}
12128 
12129 			err = check_map_prog_compatibility(env, map, env->prog);
12130 			if (err) {
12131 				fdput(f);
12132 				return err;
12133 			}
12134 
12135 			aux = &env->insn_aux_data[i];
12136 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
12137 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
12138 				addr = (unsigned long)map;
12139 			} else {
12140 				u32 off = insn[1].imm;
12141 
12142 				if (off >= BPF_MAX_VAR_OFF) {
12143 					verbose(env, "direct value offset of %u is not allowed\n", off);
12144 					fdput(f);
12145 					return -EINVAL;
12146 				}
12147 
12148 				if (!map->ops->map_direct_value_addr) {
12149 					verbose(env, "no direct value access support for this map type\n");
12150 					fdput(f);
12151 					return -EINVAL;
12152 				}
12153 
12154 				err = map->ops->map_direct_value_addr(map, &addr, off);
12155 				if (err) {
12156 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
12157 						map->value_size, off);
12158 					fdput(f);
12159 					return err;
12160 				}
12161 
12162 				aux->map_off = off;
12163 				addr += off;
12164 			}
12165 
12166 			insn[0].imm = (u32)addr;
12167 			insn[1].imm = addr >> 32;
12168 
12169 			/* check whether we recorded this map already */
12170 			for (j = 0; j < env->used_map_cnt; j++) {
12171 				if (env->used_maps[j] == map) {
12172 					aux->map_index = j;
12173 					fdput(f);
12174 					goto next_insn;
12175 				}
12176 			}
12177 
12178 			if (env->used_map_cnt >= MAX_USED_MAPS) {
12179 				fdput(f);
12180 				return -E2BIG;
12181 			}
12182 
12183 			/* hold the map. If the program is rejected by verifier,
12184 			 * the map will be released by release_maps() or it
12185 			 * will be used by the valid program until it's unloaded
12186 			 * and all maps are released in free_used_maps()
12187 			 */
12188 			bpf_map_inc(map);
12189 
12190 			aux->map_index = env->used_map_cnt;
12191 			env->used_maps[env->used_map_cnt++] = map;
12192 
12193 			if (bpf_map_is_cgroup_storage(map) &&
12194 			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
12195 				verbose(env, "only one cgroup storage of each type is allowed\n");
12196 				fdput(f);
12197 				return -EBUSY;
12198 			}
12199 
12200 			fdput(f);
12201 next_insn:
12202 			insn++;
12203 			i++;
12204 			continue;
12205 		}
12206 
12207 		/* Basic sanity check before we invest more work here. */
12208 		if (!bpf_opcode_in_insntable(insn->code)) {
12209 			verbose(env, "unknown opcode %02x\n", insn->code);
12210 			return -EINVAL;
12211 		}
12212 	}
12213 
12214 	/* now all pseudo BPF_LD_IMM64 instructions load valid
12215 	 * 'struct bpf_map *' into a register instead of user map_fd.
12216 	 * These pointers will be used later by verifier to validate map access.
12217 	 */
12218 	return 0;
12219 }
12220 
12221 /* drop refcnt of maps used by the rejected program */
12222 static void release_maps(struct bpf_verifier_env *env)
12223 {
12224 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
12225 			     env->used_map_cnt);
12226 }
12227 
12228 /* drop refcnt of maps used by the rejected program */
12229 static void release_btfs(struct bpf_verifier_env *env)
12230 {
12231 	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
12232 			     env->used_btf_cnt);
12233 }
12234 
12235 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
12236 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
12237 {
12238 	struct bpf_insn *insn = env->prog->insnsi;
12239 	int insn_cnt = env->prog->len;
12240 	int i;
12241 
12242 	for (i = 0; i < insn_cnt; i++, insn++) {
12243 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
12244 			continue;
12245 		if (insn->src_reg == BPF_PSEUDO_FUNC)
12246 			continue;
12247 		insn->src_reg = 0;
12248 	}
12249 }
12250 
12251 /* single env->prog->insni[off] instruction was replaced with the range
12252  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
12253  * [0, off) and [off, end) to new locations, so the patched range stays zero
12254  */
12255 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
12256 				 struct bpf_insn_aux_data *new_data,
12257 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
12258 {
12259 	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
12260 	struct bpf_insn *insn = new_prog->insnsi;
12261 	u32 old_seen = old_data[off].seen;
12262 	u32 prog_len;
12263 	int i;
12264 
12265 	/* aux info at OFF always needs adjustment, no matter fast path
12266 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
12267 	 * original insn at old prog.
12268 	 */
12269 	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
12270 
12271 	if (cnt == 1)
12272 		return;
12273 	prog_len = new_prog->len;
12274 
12275 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
12276 	memcpy(new_data + off + cnt - 1, old_data + off,
12277 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
12278 	for (i = off; i < off + cnt - 1; i++) {
12279 		/* Expand insni[off]'s seen count to the patched range. */
12280 		new_data[i].seen = old_seen;
12281 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
12282 	}
12283 	env->insn_aux_data = new_data;
12284 	vfree(old_data);
12285 }
12286 
12287 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
12288 {
12289 	int i;
12290 
12291 	if (len == 1)
12292 		return;
12293 	/* NOTE: fake 'exit' subprog should be updated as well. */
12294 	for (i = 0; i <= env->subprog_cnt; i++) {
12295 		if (env->subprog_info[i].start <= off)
12296 			continue;
12297 		env->subprog_info[i].start += len - 1;
12298 	}
12299 }
12300 
12301 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
12302 {
12303 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
12304 	int i, sz = prog->aux->size_poke_tab;
12305 	struct bpf_jit_poke_descriptor *desc;
12306 
12307 	for (i = 0; i < sz; i++) {
12308 		desc = &tab[i];
12309 		if (desc->insn_idx <= off)
12310 			continue;
12311 		desc->insn_idx += len - 1;
12312 	}
12313 }
12314 
12315 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
12316 					    const struct bpf_insn *patch, u32 len)
12317 {
12318 	struct bpf_prog *new_prog;
12319 	struct bpf_insn_aux_data *new_data = NULL;
12320 
12321 	if (len > 1) {
12322 		new_data = vzalloc(array_size(env->prog->len + len - 1,
12323 					      sizeof(struct bpf_insn_aux_data)));
12324 		if (!new_data)
12325 			return NULL;
12326 	}
12327 
12328 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
12329 	if (IS_ERR(new_prog)) {
12330 		if (PTR_ERR(new_prog) == -ERANGE)
12331 			verbose(env,
12332 				"insn %d cannot be patched due to 16-bit range\n",
12333 				env->insn_aux_data[off].orig_idx);
12334 		vfree(new_data);
12335 		return NULL;
12336 	}
12337 	adjust_insn_aux_data(env, new_data, new_prog, off, len);
12338 	adjust_subprog_starts(env, off, len);
12339 	adjust_poke_descs(new_prog, off, len);
12340 	return new_prog;
12341 }
12342 
12343 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
12344 					      u32 off, u32 cnt)
12345 {
12346 	int i, j;
12347 
12348 	/* find first prog starting at or after off (first to remove) */
12349 	for (i = 0; i < env->subprog_cnt; i++)
12350 		if (env->subprog_info[i].start >= off)
12351 			break;
12352 	/* find first prog starting at or after off + cnt (first to stay) */
12353 	for (j = i; j < env->subprog_cnt; j++)
12354 		if (env->subprog_info[j].start >= off + cnt)
12355 			break;
12356 	/* if j doesn't start exactly at off + cnt, we are just removing
12357 	 * the front of previous prog
12358 	 */
12359 	if (env->subprog_info[j].start != off + cnt)
12360 		j--;
12361 
12362 	if (j > i) {
12363 		struct bpf_prog_aux *aux = env->prog->aux;
12364 		int move;
12365 
12366 		/* move fake 'exit' subprog as well */
12367 		move = env->subprog_cnt + 1 - j;
12368 
12369 		memmove(env->subprog_info + i,
12370 			env->subprog_info + j,
12371 			sizeof(*env->subprog_info) * move);
12372 		env->subprog_cnt -= j - i;
12373 
12374 		/* remove func_info */
12375 		if (aux->func_info) {
12376 			move = aux->func_info_cnt - j;
12377 
12378 			memmove(aux->func_info + i,
12379 				aux->func_info + j,
12380 				sizeof(*aux->func_info) * move);
12381 			aux->func_info_cnt -= j - i;
12382 			/* func_info->insn_off is set after all code rewrites,
12383 			 * in adjust_btf_func() - no need to adjust
12384 			 */
12385 		}
12386 	} else {
12387 		/* convert i from "first prog to remove" to "first to adjust" */
12388 		if (env->subprog_info[i].start == off)
12389 			i++;
12390 	}
12391 
12392 	/* update fake 'exit' subprog as well */
12393 	for (; i <= env->subprog_cnt; i++)
12394 		env->subprog_info[i].start -= cnt;
12395 
12396 	return 0;
12397 }
12398 
12399 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12400 				      u32 cnt)
12401 {
12402 	struct bpf_prog *prog = env->prog;
12403 	u32 i, l_off, l_cnt, nr_linfo;
12404 	struct bpf_line_info *linfo;
12405 
12406 	nr_linfo = prog->aux->nr_linfo;
12407 	if (!nr_linfo)
12408 		return 0;
12409 
12410 	linfo = prog->aux->linfo;
12411 
12412 	/* find first line info to remove, count lines to be removed */
12413 	for (i = 0; i < nr_linfo; i++)
12414 		if (linfo[i].insn_off >= off)
12415 			break;
12416 
12417 	l_off = i;
12418 	l_cnt = 0;
12419 	for (; i < nr_linfo; i++)
12420 		if (linfo[i].insn_off < off + cnt)
12421 			l_cnt++;
12422 		else
12423 			break;
12424 
12425 	/* First live insn doesn't match first live linfo, it needs to "inherit"
12426 	 * last removed linfo.  prog is already modified, so prog->len == off
12427 	 * means no live instructions after (tail of the program was removed).
12428 	 */
12429 	if (prog->len != off && l_cnt &&
12430 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12431 		l_cnt--;
12432 		linfo[--i].insn_off = off + cnt;
12433 	}
12434 
12435 	/* remove the line info which refer to the removed instructions */
12436 	if (l_cnt) {
12437 		memmove(linfo + l_off, linfo + i,
12438 			sizeof(*linfo) * (nr_linfo - i));
12439 
12440 		prog->aux->nr_linfo -= l_cnt;
12441 		nr_linfo = prog->aux->nr_linfo;
12442 	}
12443 
12444 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
12445 	for (i = l_off; i < nr_linfo; i++)
12446 		linfo[i].insn_off -= cnt;
12447 
12448 	/* fix up all subprogs (incl. 'exit') which start >= off */
12449 	for (i = 0; i <= env->subprog_cnt; i++)
12450 		if (env->subprog_info[i].linfo_idx > l_off) {
12451 			/* program may have started in the removed region but
12452 			 * may not be fully removed
12453 			 */
12454 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12455 				env->subprog_info[i].linfo_idx -= l_cnt;
12456 			else
12457 				env->subprog_info[i].linfo_idx = l_off;
12458 		}
12459 
12460 	return 0;
12461 }
12462 
12463 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12464 {
12465 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12466 	unsigned int orig_prog_len = env->prog->len;
12467 	int err;
12468 
12469 	if (bpf_prog_is_dev_bound(env->prog->aux))
12470 		bpf_prog_offload_remove_insns(env, off, cnt);
12471 
12472 	err = bpf_remove_insns(env->prog, off, cnt);
12473 	if (err)
12474 		return err;
12475 
12476 	err = adjust_subprog_starts_after_remove(env, off, cnt);
12477 	if (err)
12478 		return err;
12479 
12480 	err = bpf_adj_linfo_after_remove(env, off, cnt);
12481 	if (err)
12482 		return err;
12483 
12484 	memmove(aux_data + off,	aux_data + off + cnt,
12485 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
12486 
12487 	return 0;
12488 }
12489 
12490 /* The verifier does more data flow analysis than llvm and will not
12491  * explore branches that are dead at run time. Malicious programs can
12492  * have dead code too. Therefore replace all dead at-run-time code
12493  * with 'ja -1'.
12494  *
12495  * Just nops are not optimal, e.g. if they would sit at the end of the
12496  * program and through another bug we would manage to jump there, then
12497  * we'd execute beyond program memory otherwise. Returning exception
12498  * code also wouldn't work since we can have subprogs where the dead
12499  * code could be located.
12500  */
12501 static void sanitize_dead_code(struct bpf_verifier_env *env)
12502 {
12503 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12504 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
12505 	struct bpf_insn *insn = env->prog->insnsi;
12506 	const int insn_cnt = env->prog->len;
12507 	int i;
12508 
12509 	for (i = 0; i < insn_cnt; i++) {
12510 		if (aux_data[i].seen)
12511 			continue;
12512 		memcpy(insn + i, &trap, sizeof(trap));
12513 		aux_data[i].zext_dst = false;
12514 	}
12515 }
12516 
12517 static bool insn_is_cond_jump(u8 code)
12518 {
12519 	u8 op;
12520 
12521 	if (BPF_CLASS(code) == BPF_JMP32)
12522 		return true;
12523 
12524 	if (BPF_CLASS(code) != BPF_JMP)
12525 		return false;
12526 
12527 	op = BPF_OP(code);
12528 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12529 }
12530 
12531 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12532 {
12533 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12534 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12535 	struct bpf_insn *insn = env->prog->insnsi;
12536 	const int insn_cnt = env->prog->len;
12537 	int i;
12538 
12539 	for (i = 0; i < insn_cnt; i++, insn++) {
12540 		if (!insn_is_cond_jump(insn->code))
12541 			continue;
12542 
12543 		if (!aux_data[i + 1].seen)
12544 			ja.off = insn->off;
12545 		else if (!aux_data[i + 1 + insn->off].seen)
12546 			ja.off = 0;
12547 		else
12548 			continue;
12549 
12550 		if (bpf_prog_is_dev_bound(env->prog->aux))
12551 			bpf_prog_offload_replace_insn(env, i, &ja);
12552 
12553 		memcpy(insn, &ja, sizeof(ja));
12554 	}
12555 }
12556 
12557 static int opt_remove_dead_code(struct bpf_verifier_env *env)
12558 {
12559 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12560 	int insn_cnt = env->prog->len;
12561 	int i, err;
12562 
12563 	for (i = 0; i < insn_cnt; i++) {
12564 		int j;
12565 
12566 		j = 0;
12567 		while (i + j < insn_cnt && !aux_data[i + j].seen)
12568 			j++;
12569 		if (!j)
12570 			continue;
12571 
12572 		err = verifier_remove_insns(env, i, j);
12573 		if (err)
12574 			return err;
12575 		insn_cnt = env->prog->len;
12576 	}
12577 
12578 	return 0;
12579 }
12580 
12581 static int opt_remove_nops(struct bpf_verifier_env *env)
12582 {
12583 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12584 	struct bpf_insn *insn = env->prog->insnsi;
12585 	int insn_cnt = env->prog->len;
12586 	int i, err;
12587 
12588 	for (i = 0; i < insn_cnt; i++) {
12589 		if (memcmp(&insn[i], &ja, sizeof(ja)))
12590 			continue;
12591 
12592 		err = verifier_remove_insns(env, i, 1);
12593 		if (err)
12594 			return err;
12595 		insn_cnt--;
12596 		i--;
12597 	}
12598 
12599 	return 0;
12600 }
12601 
12602 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12603 					 const union bpf_attr *attr)
12604 {
12605 	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
12606 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
12607 	int i, patch_len, delta = 0, len = env->prog->len;
12608 	struct bpf_insn *insns = env->prog->insnsi;
12609 	struct bpf_prog *new_prog;
12610 	bool rnd_hi32;
12611 
12612 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
12613 	zext_patch[1] = BPF_ZEXT_REG(0);
12614 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12615 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12616 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
12617 	for (i = 0; i < len; i++) {
12618 		int adj_idx = i + delta;
12619 		struct bpf_insn insn;
12620 		int load_reg;
12621 
12622 		insn = insns[adj_idx];
12623 		load_reg = insn_def_regno(&insn);
12624 		if (!aux[adj_idx].zext_dst) {
12625 			u8 code, class;
12626 			u32 imm_rnd;
12627 
12628 			if (!rnd_hi32)
12629 				continue;
12630 
12631 			code = insn.code;
12632 			class = BPF_CLASS(code);
12633 			if (load_reg == -1)
12634 				continue;
12635 
12636 			/* NOTE: arg "reg" (the fourth one) is only used for
12637 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
12638 			 *       here.
12639 			 */
12640 			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
12641 				if (class == BPF_LD &&
12642 				    BPF_MODE(code) == BPF_IMM)
12643 					i++;
12644 				continue;
12645 			}
12646 
12647 			/* ctx load could be transformed into wider load. */
12648 			if (class == BPF_LDX &&
12649 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
12650 				continue;
12651 
12652 			imm_rnd = get_random_int();
12653 			rnd_hi32_patch[0] = insn;
12654 			rnd_hi32_patch[1].imm = imm_rnd;
12655 			rnd_hi32_patch[3].dst_reg = load_reg;
12656 			patch = rnd_hi32_patch;
12657 			patch_len = 4;
12658 			goto apply_patch_buffer;
12659 		}
12660 
12661 		/* Add in an zero-extend instruction if a) the JIT has requested
12662 		 * it or b) it's a CMPXCHG.
12663 		 *
12664 		 * The latter is because: BPF_CMPXCHG always loads a value into
12665 		 * R0, therefore always zero-extends. However some archs'
12666 		 * equivalent instruction only does this load when the
12667 		 * comparison is successful. This detail of CMPXCHG is
12668 		 * orthogonal to the general zero-extension behaviour of the
12669 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
12670 		 */
12671 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
12672 			continue;
12673 
12674 		if (WARN_ON(load_reg == -1)) {
12675 			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
12676 			return -EFAULT;
12677 		}
12678 
12679 		zext_patch[0] = insn;
12680 		zext_patch[1].dst_reg = load_reg;
12681 		zext_patch[1].src_reg = load_reg;
12682 		patch = zext_patch;
12683 		patch_len = 2;
12684 apply_patch_buffer:
12685 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
12686 		if (!new_prog)
12687 			return -ENOMEM;
12688 		env->prog = new_prog;
12689 		insns = new_prog->insnsi;
12690 		aux = env->insn_aux_data;
12691 		delta += patch_len - 1;
12692 	}
12693 
12694 	return 0;
12695 }
12696 
12697 /* convert load instructions that access fields of a context type into a
12698  * sequence of instructions that access fields of the underlying structure:
12699  *     struct __sk_buff    -> struct sk_buff
12700  *     struct bpf_sock_ops -> struct sock
12701  */
12702 static int convert_ctx_accesses(struct bpf_verifier_env *env)
12703 {
12704 	const struct bpf_verifier_ops *ops = env->ops;
12705 	int i, cnt, size, ctx_field_size, delta = 0;
12706 	const int insn_cnt = env->prog->len;
12707 	struct bpf_insn insn_buf[16], *insn;
12708 	u32 target_size, size_default, off;
12709 	struct bpf_prog *new_prog;
12710 	enum bpf_access_type type;
12711 	bool is_narrower_load;
12712 
12713 	if (ops->gen_prologue || env->seen_direct_write) {
12714 		if (!ops->gen_prologue) {
12715 			verbose(env, "bpf verifier is misconfigured\n");
12716 			return -EINVAL;
12717 		}
12718 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
12719 					env->prog);
12720 		if (cnt >= ARRAY_SIZE(insn_buf)) {
12721 			verbose(env, "bpf verifier is misconfigured\n");
12722 			return -EINVAL;
12723 		} else if (cnt) {
12724 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
12725 			if (!new_prog)
12726 				return -ENOMEM;
12727 
12728 			env->prog = new_prog;
12729 			delta += cnt - 1;
12730 		}
12731 	}
12732 
12733 	if (bpf_prog_is_dev_bound(env->prog->aux))
12734 		return 0;
12735 
12736 	insn = env->prog->insnsi + delta;
12737 
12738 	for (i = 0; i < insn_cnt; i++, insn++) {
12739 		bpf_convert_ctx_access_t convert_ctx_access;
12740 		bool ctx_access;
12741 
12742 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
12743 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
12744 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
12745 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
12746 			type = BPF_READ;
12747 			ctx_access = true;
12748 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
12749 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
12750 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
12751 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
12752 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
12753 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
12754 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
12755 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
12756 			type = BPF_WRITE;
12757 			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
12758 		} else {
12759 			continue;
12760 		}
12761 
12762 		if (type == BPF_WRITE &&
12763 		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
12764 			struct bpf_insn patch[] = {
12765 				*insn,
12766 				BPF_ST_NOSPEC(),
12767 			};
12768 
12769 			cnt = ARRAY_SIZE(patch);
12770 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
12771 			if (!new_prog)
12772 				return -ENOMEM;
12773 
12774 			delta    += cnt - 1;
12775 			env->prog = new_prog;
12776 			insn      = new_prog->insnsi + i + delta;
12777 			continue;
12778 		}
12779 
12780 		if (!ctx_access)
12781 			continue;
12782 
12783 		switch (env->insn_aux_data[i + delta].ptr_type) {
12784 		case PTR_TO_CTX:
12785 			if (!ops->convert_ctx_access)
12786 				continue;
12787 			convert_ctx_access = ops->convert_ctx_access;
12788 			break;
12789 		case PTR_TO_SOCKET:
12790 		case PTR_TO_SOCK_COMMON:
12791 			convert_ctx_access = bpf_sock_convert_ctx_access;
12792 			break;
12793 		case PTR_TO_TCP_SOCK:
12794 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
12795 			break;
12796 		case PTR_TO_XDP_SOCK:
12797 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
12798 			break;
12799 		case PTR_TO_BTF_ID:
12800 			if (type == BPF_READ) {
12801 				insn->code = BPF_LDX | BPF_PROBE_MEM |
12802 					BPF_SIZE((insn)->code);
12803 				env->prog->aux->num_exentries++;
12804 			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
12805 				verbose(env, "Writes through BTF pointers are not allowed\n");
12806 				return -EINVAL;
12807 			}
12808 			continue;
12809 		default:
12810 			continue;
12811 		}
12812 
12813 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
12814 		size = BPF_LDST_BYTES(insn);
12815 
12816 		/* If the read access is a narrower load of the field,
12817 		 * convert to a 4/8-byte load, to minimum program type specific
12818 		 * convert_ctx_access changes. If conversion is successful,
12819 		 * we will apply proper mask to the result.
12820 		 */
12821 		is_narrower_load = size < ctx_field_size;
12822 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
12823 		off = insn->off;
12824 		if (is_narrower_load) {
12825 			u8 size_code;
12826 
12827 			if (type == BPF_WRITE) {
12828 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
12829 				return -EINVAL;
12830 			}
12831 
12832 			size_code = BPF_H;
12833 			if (ctx_field_size == 4)
12834 				size_code = BPF_W;
12835 			else if (ctx_field_size == 8)
12836 				size_code = BPF_DW;
12837 
12838 			insn->off = off & ~(size_default - 1);
12839 			insn->code = BPF_LDX | BPF_MEM | size_code;
12840 		}
12841 
12842 		target_size = 0;
12843 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
12844 					 &target_size);
12845 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
12846 		    (ctx_field_size && !target_size)) {
12847 			verbose(env, "bpf verifier is misconfigured\n");
12848 			return -EINVAL;
12849 		}
12850 
12851 		if (is_narrower_load && size < target_size) {
12852 			u8 shift = bpf_ctx_narrow_access_offset(
12853 				off, size, size_default) * 8;
12854 			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
12855 				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
12856 				return -EINVAL;
12857 			}
12858 			if (ctx_field_size <= 4) {
12859 				if (shift)
12860 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
12861 									insn->dst_reg,
12862 									shift);
12863 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
12864 								(1 << size * 8) - 1);
12865 			} else {
12866 				if (shift)
12867 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
12868 									insn->dst_reg,
12869 									shift);
12870 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
12871 								(1ULL << size * 8) - 1);
12872 			}
12873 		}
12874 
12875 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12876 		if (!new_prog)
12877 			return -ENOMEM;
12878 
12879 		delta += cnt - 1;
12880 
12881 		/* keep walking new program and skip insns we just inserted */
12882 		env->prog = new_prog;
12883 		insn      = new_prog->insnsi + i + delta;
12884 	}
12885 
12886 	return 0;
12887 }
12888 
12889 static int jit_subprogs(struct bpf_verifier_env *env)
12890 {
12891 	struct bpf_prog *prog = env->prog, **func, *tmp;
12892 	int i, j, subprog_start, subprog_end = 0, len, subprog;
12893 	struct bpf_map *map_ptr;
12894 	struct bpf_insn *insn;
12895 	void *old_bpf_func;
12896 	int err, num_exentries;
12897 
12898 	if (env->subprog_cnt <= 1)
12899 		return 0;
12900 
12901 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12902 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
12903 			continue;
12904 
12905 		/* Upon error here we cannot fall back to interpreter but
12906 		 * need a hard reject of the program. Thus -EFAULT is
12907 		 * propagated in any case.
12908 		 */
12909 		subprog = find_subprog(env, i + insn->imm + 1);
12910 		if (subprog < 0) {
12911 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
12912 				  i + insn->imm + 1);
12913 			return -EFAULT;
12914 		}
12915 		/* temporarily remember subprog id inside insn instead of
12916 		 * aux_data, since next loop will split up all insns into funcs
12917 		 */
12918 		insn->off = subprog;
12919 		/* remember original imm in case JIT fails and fallback
12920 		 * to interpreter will be needed
12921 		 */
12922 		env->insn_aux_data[i].call_imm = insn->imm;
12923 		/* point imm to __bpf_call_base+1 from JITs point of view */
12924 		insn->imm = 1;
12925 		if (bpf_pseudo_func(insn))
12926 			/* jit (e.g. x86_64) may emit fewer instructions
12927 			 * if it learns a u32 imm is the same as a u64 imm.
12928 			 * Force a non zero here.
12929 			 */
12930 			insn[1].imm = 1;
12931 	}
12932 
12933 	err = bpf_prog_alloc_jited_linfo(prog);
12934 	if (err)
12935 		goto out_undo_insn;
12936 
12937 	err = -ENOMEM;
12938 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
12939 	if (!func)
12940 		goto out_undo_insn;
12941 
12942 	for (i = 0; i < env->subprog_cnt; i++) {
12943 		subprog_start = subprog_end;
12944 		subprog_end = env->subprog_info[i + 1].start;
12945 
12946 		len = subprog_end - subprog_start;
12947 		/* bpf_prog_run() doesn't call subprogs directly,
12948 		 * hence main prog stats include the runtime of subprogs.
12949 		 * subprogs don't have IDs and not reachable via prog_get_next_id
12950 		 * func[i]->stats will never be accessed and stays NULL
12951 		 */
12952 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
12953 		if (!func[i])
12954 			goto out_free;
12955 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
12956 		       len * sizeof(struct bpf_insn));
12957 		func[i]->type = prog->type;
12958 		func[i]->len = len;
12959 		if (bpf_prog_calc_tag(func[i]))
12960 			goto out_free;
12961 		func[i]->is_func = 1;
12962 		func[i]->aux->func_idx = i;
12963 		/* Below members will be freed only at prog->aux */
12964 		func[i]->aux->btf = prog->aux->btf;
12965 		func[i]->aux->func_info = prog->aux->func_info;
12966 		func[i]->aux->poke_tab = prog->aux->poke_tab;
12967 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
12968 
12969 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
12970 			struct bpf_jit_poke_descriptor *poke;
12971 
12972 			poke = &prog->aux->poke_tab[j];
12973 			if (poke->insn_idx < subprog_end &&
12974 			    poke->insn_idx >= subprog_start)
12975 				poke->aux = func[i]->aux;
12976 		}
12977 
12978 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
12979 		 * Long term would need debug info to populate names
12980 		 */
12981 		func[i]->aux->name[0] = 'F';
12982 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
12983 		func[i]->jit_requested = 1;
12984 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
12985 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
12986 		func[i]->aux->linfo = prog->aux->linfo;
12987 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
12988 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
12989 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
12990 		num_exentries = 0;
12991 		insn = func[i]->insnsi;
12992 		for (j = 0; j < func[i]->len; j++, insn++) {
12993 			if (BPF_CLASS(insn->code) == BPF_LDX &&
12994 			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
12995 				num_exentries++;
12996 		}
12997 		func[i]->aux->num_exentries = num_exentries;
12998 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
12999 		func[i] = bpf_int_jit_compile(func[i]);
13000 		if (!func[i]->jited) {
13001 			err = -ENOTSUPP;
13002 			goto out_free;
13003 		}
13004 		cond_resched();
13005 	}
13006 
13007 	/* at this point all bpf functions were successfully JITed
13008 	 * now populate all bpf_calls with correct addresses and
13009 	 * run last pass of JIT
13010 	 */
13011 	for (i = 0; i < env->subprog_cnt; i++) {
13012 		insn = func[i]->insnsi;
13013 		for (j = 0; j < func[i]->len; j++, insn++) {
13014 			if (bpf_pseudo_func(insn)) {
13015 				subprog = insn->off;
13016 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
13017 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
13018 				continue;
13019 			}
13020 			if (!bpf_pseudo_call(insn))
13021 				continue;
13022 			subprog = insn->off;
13023 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
13024 		}
13025 
13026 		/* we use the aux data to keep a list of the start addresses
13027 		 * of the JITed images for each function in the program
13028 		 *
13029 		 * for some architectures, such as powerpc64, the imm field
13030 		 * might not be large enough to hold the offset of the start
13031 		 * address of the callee's JITed image from __bpf_call_base
13032 		 *
13033 		 * in such cases, we can lookup the start address of a callee
13034 		 * by using its subprog id, available from the off field of
13035 		 * the call instruction, as an index for this list
13036 		 */
13037 		func[i]->aux->func = func;
13038 		func[i]->aux->func_cnt = env->subprog_cnt;
13039 	}
13040 	for (i = 0; i < env->subprog_cnt; i++) {
13041 		old_bpf_func = func[i]->bpf_func;
13042 		tmp = bpf_int_jit_compile(func[i]);
13043 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
13044 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
13045 			err = -ENOTSUPP;
13046 			goto out_free;
13047 		}
13048 		cond_resched();
13049 	}
13050 
13051 	/* finally lock prog and jit images for all functions and
13052 	 * populate kallsysm
13053 	 */
13054 	for (i = 0; i < env->subprog_cnt; i++) {
13055 		bpf_prog_lock_ro(func[i]);
13056 		bpf_prog_kallsyms_add(func[i]);
13057 	}
13058 
13059 	/* Last step: make now unused interpreter insns from main
13060 	 * prog consistent for later dump requests, so they can
13061 	 * later look the same as if they were interpreted only.
13062 	 */
13063 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13064 		if (bpf_pseudo_func(insn)) {
13065 			insn[0].imm = env->insn_aux_data[i].call_imm;
13066 			insn[1].imm = insn->off;
13067 			insn->off = 0;
13068 			continue;
13069 		}
13070 		if (!bpf_pseudo_call(insn))
13071 			continue;
13072 		insn->off = env->insn_aux_data[i].call_imm;
13073 		subprog = find_subprog(env, i + insn->off + 1);
13074 		insn->imm = subprog;
13075 	}
13076 
13077 	prog->jited = 1;
13078 	prog->bpf_func = func[0]->bpf_func;
13079 	prog->jited_len = func[0]->jited_len;
13080 	prog->aux->func = func;
13081 	prog->aux->func_cnt = env->subprog_cnt;
13082 	bpf_prog_jit_attempt_done(prog);
13083 	return 0;
13084 out_free:
13085 	/* We failed JIT'ing, so at this point we need to unregister poke
13086 	 * descriptors from subprogs, so that kernel is not attempting to
13087 	 * patch it anymore as we're freeing the subprog JIT memory.
13088 	 */
13089 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
13090 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
13091 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
13092 	}
13093 	/* At this point we're guaranteed that poke descriptors are not
13094 	 * live anymore. We can just unlink its descriptor table as it's
13095 	 * released with the main prog.
13096 	 */
13097 	for (i = 0; i < env->subprog_cnt; i++) {
13098 		if (!func[i])
13099 			continue;
13100 		func[i]->aux->poke_tab = NULL;
13101 		bpf_jit_free(func[i]);
13102 	}
13103 	kfree(func);
13104 out_undo_insn:
13105 	/* cleanup main prog to be interpreted */
13106 	prog->jit_requested = 0;
13107 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13108 		if (!bpf_pseudo_call(insn))
13109 			continue;
13110 		insn->off = 0;
13111 		insn->imm = env->insn_aux_data[i].call_imm;
13112 	}
13113 	bpf_prog_jit_attempt_done(prog);
13114 	return err;
13115 }
13116 
13117 static int fixup_call_args(struct bpf_verifier_env *env)
13118 {
13119 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13120 	struct bpf_prog *prog = env->prog;
13121 	struct bpf_insn *insn = prog->insnsi;
13122 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
13123 	int i, depth;
13124 #endif
13125 	int err = 0;
13126 
13127 	if (env->prog->jit_requested &&
13128 	    !bpf_prog_is_dev_bound(env->prog->aux)) {
13129 		err = jit_subprogs(env);
13130 		if (err == 0)
13131 			return 0;
13132 		if (err == -EFAULT)
13133 			return err;
13134 	}
13135 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13136 	if (has_kfunc_call) {
13137 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
13138 		return -EINVAL;
13139 	}
13140 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
13141 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
13142 		 * have to be rejected, since interpreter doesn't support them yet.
13143 		 */
13144 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
13145 		return -EINVAL;
13146 	}
13147 	for (i = 0; i < prog->len; i++, insn++) {
13148 		if (bpf_pseudo_func(insn)) {
13149 			/* When JIT fails the progs with callback calls
13150 			 * have to be rejected, since interpreter doesn't support them yet.
13151 			 */
13152 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
13153 			return -EINVAL;
13154 		}
13155 
13156 		if (!bpf_pseudo_call(insn))
13157 			continue;
13158 		depth = get_callee_stack_depth(env, insn, i);
13159 		if (depth < 0)
13160 			return depth;
13161 		bpf_patch_call_args(insn, depth);
13162 	}
13163 	err = 0;
13164 #endif
13165 	return err;
13166 }
13167 
13168 static int fixup_kfunc_call(struct bpf_verifier_env *env,
13169 			    struct bpf_insn *insn)
13170 {
13171 	const struct bpf_kfunc_desc *desc;
13172 
13173 	if (!insn->imm) {
13174 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
13175 		return -EINVAL;
13176 	}
13177 
13178 	/* insn->imm has the btf func_id. Replace it with
13179 	 * an address (relative to __bpf_base_call).
13180 	 */
13181 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
13182 	if (!desc) {
13183 		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
13184 			insn->imm);
13185 		return -EFAULT;
13186 	}
13187 
13188 	insn->imm = desc->imm;
13189 
13190 	return 0;
13191 }
13192 
13193 /* Do various post-verification rewrites in a single program pass.
13194  * These rewrites simplify JIT and interpreter implementations.
13195  */
13196 static int do_misc_fixups(struct bpf_verifier_env *env)
13197 {
13198 	struct bpf_prog *prog = env->prog;
13199 	enum bpf_attach_type eatype = prog->expected_attach_type;
13200 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
13201 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
13202 	struct bpf_insn *insn = prog->insnsi;
13203 	const struct bpf_func_proto *fn;
13204 	const int insn_cnt = prog->len;
13205 	const struct bpf_map_ops *ops;
13206 	struct bpf_insn_aux_data *aux;
13207 	struct bpf_insn insn_buf[16];
13208 	struct bpf_prog *new_prog;
13209 	struct bpf_map *map_ptr;
13210 	int i, ret, cnt, delta = 0;
13211 
13212 	for (i = 0; i < insn_cnt; i++, insn++) {
13213 		/* Make divide-by-zero exceptions impossible. */
13214 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
13215 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
13216 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
13217 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
13218 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
13219 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
13220 			struct bpf_insn *patchlet;
13221 			struct bpf_insn chk_and_div[] = {
13222 				/* [R,W]x div 0 -> 0 */
13223 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13224 					     BPF_JNE | BPF_K, insn->src_reg,
13225 					     0, 2, 0),
13226 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
13227 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13228 				*insn,
13229 			};
13230 			struct bpf_insn chk_and_mod[] = {
13231 				/* [R,W]x mod 0 -> [R,W]x */
13232 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13233 					     BPF_JEQ | BPF_K, insn->src_reg,
13234 					     0, 1 + (is64 ? 0 : 1), 0),
13235 				*insn,
13236 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13237 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
13238 			};
13239 
13240 			patchlet = isdiv ? chk_and_div : chk_and_mod;
13241 			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
13242 				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
13243 
13244 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
13245 			if (!new_prog)
13246 				return -ENOMEM;
13247 
13248 			delta    += cnt - 1;
13249 			env->prog = prog = new_prog;
13250 			insn      = new_prog->insnsi + i + delta;
13251 			continue;
13252 		}
13253 
13254 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
13255 		if (BPF_CLASS(insn->code) == BPF_LD &&
13256 		    (BPF_MODE(insn->code) == BPF_ABS ||
13257 		     BPF_MODE(insn->code) == BPF_IND)) {
13258 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
13259 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13260 				verbose(env, "bpf verifier is misconfigured\n");
13261 				return -EINVAL;
13262 			}
13263 
13264 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13265 			if (!new_prog)
13266 				return -ENOMEM;
13267 
13268 			delta    += cnt - 1;
13269 			env->prog = prog = new_prog;
13270 			insn      = new_prog->insnsi + i + delta;
13271 			continue;
13272 		}
13273 
13274 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
13275 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
13276 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
13277 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
13278 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
13279 			struct bpf_insn *patch = &insn_buf[0];
13280 			bool issrc, isneg, isimm;
13281 			u32 off_reg;
13282 
13283 			aux = &env->insn_aux_data[i + delta];
13284 			if (!aux->alu_state ||
13285 			    aux->alu_state == BPF_ALU_NON_POINTER)
13286 				continue;
13287 
13288 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
13289 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
13290 				BPF_ALU_SANITIZE_SRC;
13291 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
13292 
13293 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
13294 			if (isimm) {
13295 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13296 			} else {
13297 				if (isneg)
13298 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13299 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13300 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
13301 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
13302 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
13303 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
13304 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
13305 			}
13306 			if (!issrc)
13307 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
13308 			insn->src_reg = BPF_REG_AX;
13309 			if (isneg)
13310 				insn->code = insn->code == code_add ?
13311 					     code_sub : code_add;
13312 			*patch++ = *insn;
13313 			if (issrc && isneg && !isimm)
13314 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13315 			cnt = patch - insn_buf;
13316 
13317 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13318 			if (!new_prog)
13319 				return -ENOMEM;
13320 
13321 			delta    += cnt - 1;
13322 			env->prog = prog = new_prog;
13323 			insn      = new_prog->insnsi + i + delta;
13324 			continue;
13325 		}
13326 
13327 		if (insn->code != (BPF_JMP | BPF_CALL))
13328 			continue;
13329 		if (insn->src_reg == BPF_PSEUDO_CALL)
13330 			continue;
13331 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
13332 			ret = fixup_kfunc_call(env, insn);
13333 			if (ret)
13334 				return ret;
13335 			continue;
13336 		}
13337 
13338 		if (insn->imm == BPF_FUNC_get_route_realm)
13339 			prog->dst_needed = 1;
13340 		if (insn->imm == BPF_FUNC_get_prandom_u32)
13341 			bpf_user_rnd_init_once();
13342 		if (insn->imm == BPF_FUNC_override_return)
13343 			prog->kprobe_override = 1;
13344 		if (insn->imm == BPF_FUNC_tail_call) {
13345 			/* If we tail call into other programs, we
13346 			 * cannot make any assumptions since they can
13347 			 * be replaced dynamically during runtime in
13348 			 * the program array.
13349 			 */
13350 			prog->cb_access = 1;
13351 			if (!allow_tail_call_in_subprogs(env))
13352 				prog->aux->stack_depth = MAX_BPF_STACK;
13353 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
13354 
13355 			/* mark bpf_tail_call as different opcode to avoid
13356 			 * conditional branch in the interpreter for every normal
13357 			 * call and to prevent accidental JITing by JIT compiler
13358 			 * that doesn't support bpf_tail_call yet
13359 			 */
13360 			insn->imm = 0;
13361 			insn->code = BPF_JMP | BPF_TAIL_CALL;
13362 
13363 			aux = &env->insn_aux_data[i + delta];
13364 			if (env->bpf_capable && !expect_blinding &&
13365 			    prog->jit_requested &&
13366 			    !bpf_map_key_poisoned(aux) &&
13367 			    !bpf_map_ptr_poisoned(aux) &&
13368 			    !bpf_map_ptr_unpriv(aux)) {
13369 				struct bpf_jit_poke_descriptor desc = {
13370 					.reason = BPF_POKE_REASON_TAIL_CALL,
13371 					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13372 					.tail_call.key = bpf_map_key_immediate(aux),
13373 					.insn_idx = i + delta,
13374 				};
13375 
13376 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
13377 				if (ret < 0) {
13378 					verbose(env, "adding tail call poke descriptor failed\n");
13379 					return ret;
13380 				}
13381 
13382 				insn->imm = ret + 1;
13383 				continue;
13384 			}
13385 
13386 			if (!bpf_map_ptr_unpriv(aux))
13387 				continue;
13388 
13389 			/* instead of changing every JIT dealing with tail_call
13390 			 * emit two extra insns:
13391 			 * if (index >= max_entries) goto out;
13392 			 * index &= array->index_mask;
13393 			 * to avoid out-of-bounds cpu speculation
13394 			 */
13395 			if (bpf_map_ptr_poisoned(aux)) {
13396 				verbose(env, "tail_call abusing map_ptr\n");
13397 				return -EINVAL;
13398 			}
13399 
13400 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13401 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13402 						  map_ptr->max_entries, 2);
13403 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13404 						    container_of(map_ptr,
13405 								 struct bpf_array,
13406 								 map)->index_mask);
13407 			insn_buf[2] = *insn;
13408 			cnt = 3;
13409 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13410 			if (!new_prog)
13411 				return -ENOMEM;
13412 
13413 			delta    += cnt - 1;
13414 			env->prog = prog = new_prog;
13415 			insn      = new_prog->insnsi + i + delta;
13416 			continue;
13417 		}
13418 
13419 		if (insn->imm == BPF_FUNC_timer_set_callback) {
13420 			/* The verifier will process callback_fn as many times as necessary
13421 			 * with different maps and the register states prepared by
13422 			 * set_timer_callback_state will be accurate.
13423 			 *
13424 			 * The following use case is valid:
13425 			 *   map1 is shared by prog1, prog2, prog3.
13426 			 *   prog1 calls bpf_timer_init for some map1 elements
13427 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
13428 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
13429 			 *   prog3 calls bpf_timer_start for some map1 elements.
13430 			 *     Those that were not both bpf_timer_init-ed and
13431 			 *     bpf_timer_set_callback-ed will return -EINVAL.
13432 			 */
13433 			struct bpf_insn ld_addrs[2] = {
13434 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13435 			};
13436 
13437 			insn_buf[0] = ld_addrs[0];
13438 			insn_buf[1] = ld_addrs[1];
13439 			insn_buf[2] = *insn;
13440 			cnt = 3;
13441 
13442 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13443 			if (!new_prog)
13444 				return -ENOMEM;
13445 
13446 			delta    += cnt - 1;
13447 			env->prog = prog = new_prog;
13448 			insn      = new_prog->insnsi + i + delta;
13449 			goto patch_call_imm;
13450 		}
13451 
13452 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
13453 		 * and other inlining handlers are currently limited to 64 bit
13454 		 * only.
13455 		 */
13456 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
13457 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
13458 		     insn->imm == BPF_FUNC_map_update_elem ||
13459 		     insn->imm == BPF_FUNC_map_delete_elem ||
13460 		     insn->imm == BPF_FUNC_map_push_elem   ||
13461 		     insn->imm == BPF_FUNC_map_pop_elem    ||
13462 		     insn->imm == BPF_FUNC_map_peek_elem   ||
13463 		     insn->imm == BPF_FUNC_redirect_map    ||
13464 		     insn->imm == BPF_FUNC_for_each_map_elem)) {
13465 			aux = &env->insn_aux_data[i + delta];
13466 			if (bpf_map_ptr_poisoned(aux))
13467 				goto patch_call_imm;
13468 
13469 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13470 			ops = map_ptr->ops;
13471 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
13472 			    ops->map_gen_lookup) {
13473 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
13474 				if (cnt == -EOPNOTSUPP)
13475 					goto patch_map_ops_generic;
13476 				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13477 					verbose(env, "bpf verifier is misconfigured\n");
13478 					return -EINVAL;
13479 				}
13480 
13481 				new_prog = bpf_patch_insn_data(env, i + delta,
13482 							       insn_buf, cnt);
13483 				if (!new_prog)
13484 					return -ENOMEM;
13485 
13486 				delta    += cnt - 1;
13487 				env->prog = prog = new_prog;
13488 				insn      = new_prog->insnsi + i + delta;
13489 				continue;
13490 			}
13491 
13492 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13493 				     (void *(*)(struct bpf_map *map, void *key))NULL));
13494 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13495 				     (int (*)(struct bpf_map *map, void *key))NULL));
13496 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13497 				     (int (*)(struct bpf_map *map, void *key, void *value,
13498 					      u64 flags))NULL));
13499 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13500 				     (int (*)(struct bpf_map *map, void *value,
13501 					      u64 flags))NULL));
13502 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13503 				     (int (*)(struct bpf_map *map, void *value))NULL));
13504 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13505 				     (int (*)(struct bpf_map *map, void *value))NULL));
13506 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
13507 				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
13508 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13509 				     (int (*)(struct bpf_map *map,
13510 					      bpf_callback_t callback_fn,
13511 					      void *callback_ctx,
13512 					      u64 flags))NULL));
13513 
13514 patch_map_ops_generic:
13515 			switch (insn->imm) {
13516 			case BPF_FUNC_map_lookup_elem:
13517 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
13518 				continue;
13519 			case BPF_FUNC_map_update_elem:
13520 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
13521 				continue;
13522 			case BPF_FUNC_map_delete_elem:
13523 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
13524 				continue;
13525 			case BPF_FUNC_map_push_elem:
13526 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
13527 				continue;
13528 			case BPF_FUNC_map_pop_elem:
13529 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
13530 				continue;
13531 			case BPF_FUNC_map_peek_elem:
13532 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
13533 				continue;
13534 			case BPF_FUNC_redirect_map:
13535 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
13536 				continue;
13537 			case BPF_FUNC_for_each_map_elem:
13538 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
13539 				continue;
13540 			}
13541 
13542 			goto patch_call_imm;
13543 		}
13544 
13545 		/* Implement bpf_jiffies64 inline. */
13546 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
13547 		    insn->imm == BPF_FUNC_jiffies64) {
13548 			struct bpf_insn ld_jiffies_addr[2] = {
13549 				BPF_LD_IMM64(BPF_REG_0,
13550 					     (unsigned long)&jiffies),
13551 			};
13552 
13553 			insn_buf[0] = ld_jiffies_addr[0];
13554 			insn_buf[1] = ld_jiffies_addr[1];
13555 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13556 						  BPF_REG_0, 0);
13557 			cnt = 3;
13558 
13559 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13560 						       cnt);
13561 			if (!new_prog)
13562 				return -ENOMEM;
13563 
13564 			delta    += cnt - 1;
13565 			env->prog = prog = new_prog;
13566 			insn      = new_prog->insnsi + i + delta;
13567 			continue;
13568 		}
13569 
13570 		/* Implement bpf_get_func_arg inline. */
13571 		if (prog_type == BPF_PROG_TYPE_TRACING &&
13572 		    insn->imm == BPF_FUNC_get_func_arg) {
13573 			/* Load nr_args from ctx - 8 */
13574 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13575 			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
13576 			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
13577 			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
13578 			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
13579 			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13580 			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
13581 			insn_buf[7] = BPF_JMP_A(1);
13582 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
13583 			cnt = 9;
13584 
13585 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13586 			if (!new_prog)
13587 				return -ENOMEM;
13588 
13589 			delta    += cnt - 1;
13590 			env->prog = prog = new_prog;
13591 			insn      = new_prog->insnsi + i + delta;
13592 			continue;
13593 		}
13594 
13595 		/* Implement bpf_get_func_ret inline. */
13596 		if (prog_type == BPF_PROG_TYPE_TRACING &&
13597 		    insn->imm == BPF_FUNC_get_func_ret) {
13598 			if (eatype == BPF_TRACE_FEXIT ||
13599 			    eatype == BPF_MODIFY_RETURN) {
13600 				/* Load nr_args from ctx - 8 */
13601 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13602 				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
13603 				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
13604 				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13605 				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
13606 				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
13607 				cnt = 6;
13608 			} else {
13609 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
13610 				cnt = 1;
13611 			}
13612 
13613 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13614 			if (!new_prog)
13615 				return -ENOMEM;
13616 
13617 			delta    += cnt - 1;
13618 			env->prog = prog = new_prog;
13619 			insn      = new_prog->insnsi + i + delta;
13620 			continue;
13621 		}
13622 
13623 		/* Implement get_func_arg_cnt inline. */
13624 		if (prog_type == BPF_PROG_TYPE_TRACING &&
13625 		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
13626 			/* Load nr_args from ctx - 8 */
13627 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13628 
13629 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13630 			if (!new_prog)
13631 				return -ENOMEM;
13632 
13633 			env->prog = prog = new_prog;
13634 			insn      = new_prog->insnsi + i + delta;
13635 			continue;
13636 		}
13637 
13638 		/* Implement bpf_get_func_ip inline. */
13639 		if (prog_type == BPF_PROG_TYPE_TRACING &&
13640 		    insn->imm == BPF_FUNC_get_func_ip) {
13641 			/* Load IP address from ctx - 16 */
13642 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
13643 
13644 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13645 			if (!new_prog)
13646 				return -ENOMEM;
13647 
13648 			env->prog = prog = new_prog;
13649 			insn      = new_prog->insnsi + i + delta;
13650 			continue;
13651 		}
13652 
13653 patch_call_imm:
13654 		fn = env->ops->get_func_proto(insn->imm, env->prog);
13655 		/* all functions that have prototype and verifier allowed
13656 		 * programs to call them, must be real in-kernel functions
13657 		 */
13658 		if (!fn->func) {
13659 			verbose(env,
13660 				"kernel subsystem misconfigured func %s#%d\n",
13661 				func_id_name(insn->imm), insn->imm);
13662 			return -EFAULT;
13663 		}
13664 		insn->imm = fn->func - __bpf_call_base;
13665 	}
13666 
13667 	/* Since poke tab is now finalized, publish aux to tracker. */
13668 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
13669 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
13670 		if (!map_ptr->ops->map_poke_track ||
13671 		    !map_ptr->ops->map_poke_untrack ||
13672 		    !map_ptr->ops->map_poke_run) {
13673 			verbose(env, "bpf verifier is misconfigured\n");
13674 			return -EINVAL;
13675 		}
13676 
13677 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
13678 		if (ret < 0) {
13679 			verbose(env, "tracking tail call prog failed\n");
13680 			return ret;
13681 		}
13682 	}
13683 
13684 	sort_kfunc_descs_by_imm(env->prog);
13685 
13686 	return 0;
13687 }
13688 
13689 static void free_states(struct bpf_verifier_env *env)
13690 {
13691 	struct bpf_verifier_state_list *sl, *sln;
13692 	int i;
13693 
13694 	sl = env->free_list;
13695 	while (sl) {
13696 		sln = sl->next;
13697 		free_verifier_state(&sl->state, false);
13698 		kfree(sl);
13699 		sl = sln;
13700 	}
13701 	env->free_list = NULL;
13702 
13703 	if (!env->explored_states)
13704 		return;
13705 
13706 	for (i = 0; i < state_htab_size(env); i++) {
13707 		sl = env->explored_states[i];
13708 
13709 		while (sl) {
13710 			sln = sl->next;
13711 			free_verifier_state(&sl->state, false);
13712 			kfree(sl);
13713 			sl = sln;
13714 		}
13715 		env->explored_states[i] = NULL;
13716 	}
13717 }
13718 
13719 static int do_check_common(struct bpf_verifier_env *env, int subprog)
13720 {
13721 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13722 	struct bpf_verifier_state *state;
13723 	struct bpf_reg_state *regs;
13724 	int ret, i;
13725 
13726 	env->prev_linfo = NULL;
13727 	env->pass_cnt++;
13728 
13729 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
13730 	if (!state)
13731 		return -ENOMEM;
13732 	state->curframe = 0;
13733 	state->speculative = false;
13734 	state->branches = 1;
13735 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
13736 	if (!state->frame[0]) {
13737 		kfree(state);
13738 		return -ENOMEM;
13739 	}
13740 	env->cur_state = state;
13741 	init_func_state(env, state->frame[0],
13742 			BPF_MAIN_FUNC /* callsite */,
13743 			0 /* frameno */,
13744 			subprog);
13745 
13746 	regs = state->frame[state->curframe]->regs;
13747 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
13748 		ret = btf_prepare_func_args(env, subprog, regs);
13749 		if (ret)
13750 			goto out;
13751 		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
13752 			if (regs[i].type == PTR_TO_CTX)
13753 				mark_reg_known_zero(env, regs, i);
13754 			else if (regs[i].type == SCALAR_VALUE)
13755 				mark_reg_unknown(env, regs, i);
13756 			else if (base_type(regs[i].type) == PTR_TO_MEM) {
13757 				const u32 mem_size = regs[i].mem_size;
13758 
13759 				mark_reg_known_zero(env, regs, i);
13760 				regs[i].mem_size = mem_size;
13761 				regs[i].id = ++env->id_gen;
13762 			}
13763 		}
13764 	} else {
13765 		/* 1st arg to a function */
13766 		regs[BPF_REG_1].type = PTR_TO_CTX;
13767 		mark_reg_known_zero(env, regs, BPF_REG_1);
13768 		ret = btf_check_subprog_arg_match(env, subprog, regs);
13769 		if (ret == -EFAULT)
13770 			/* unlikely verifier bug. abort.
13771 			 * ret == 0 and ret < 0 are sadly acceptable for
13772 			 * main() function due to backward compatibility.
13773 			 * Like socket filter program may be written as:
13774 			 * int bpf_prog(struct pt_regs *ctx)
13775 			 * and never dereference that ctx in the program.
13776 			 * 'struct pt_regs' is a type mismatch for socket
13777 			 * filter that should be using 'struct __sk_buff'.
13778 			 */
13779 			goto out;
13780 	}
13781 
13782 	ret = do_check(env);
13783 out:
13784 	/* check for NULL is necessary, since cur_state can be freed inside
13785 	 * do_check() under memory pressure.
13786 	 */
13787 	if (env->cur_state) {
13788 		free_verifier_state(env->cur_state, true);
13789 		env->cur_state = NULL;
13790 	}
13791 	while (!pop_stack(env, NULL, NULL, false));
13792 	if (!ret && pop_log)
13793 		bpf_vlog_reset(&env->log, 0);
13794 	free_states(env);
13795 	return ret;
13796 }
13797 
13798 /* Verify all global functions in a BPF program one by one based on their BTF.
13799  * All global functions must pass verification. Otherwise the whole program is rejected.
13800  * Consider:
13801  * int bar(int);
13802  * int foo(int f)
13803  * {
13804  *    return bar(f);
13805  * }
13806  * int bar(int b)
13807  * {
13808  *    ...
13809  * }
13810  * foo() will be verified first for R1=any_scalar_value. During verification it
13811  * will be assumed that bar() already verified successfully and call to bar()
13812  * from foo() will be checked for type match only. Later bar() will be verified
13813  * independently to check that it's safe for R1=any_scalar_value.
13814  */
13815 static int do_check_subprogs(struct bpf_verifier_env *env)
13816 {
13817 	struct bpf_prog_aux *aux = env->prog->aux;
13818 	int i, ret;
13819 
13820 	if (!aux->func_info)
13821 		return 0;
13822 
13823 	for (i = 1; i < env->subprog_cnt; i++) {
13824 		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
13825 			continue;
13826 		env->insn_idx = env->subprog_info[i].start;
13827 		WARN_ON_ONCE(env->insn_idx == 0);
13828 		ret = do_check_common(env, i);
13829 		if (ret) {
13830 			return ret;
13831 		} else if (env->log.level & BPF_LOG_LEVEL) {
13832 			verbose(env,
13833 				"Func#%d is safe for any args that match its prototype\n",
13834 				i);
13835 		}
13836 	}
13837 	return 0;
13838 }
13839 
13840 static int do_check_main(struct bpf_verifier_env *env)
13841 {
13842 	int ret;
13843 
13844 	env->insn_idx = 0;
13845 	ret = do_check_common(env, 0);
13846 	if (!ret)
13847 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
13848 	return ret;
13849 }
13850 
13851 
13852 static void print_verification_stats(struct bpf_verifier_env *env)
13853 {
13854 	int i;
13855 
13856 	if (env->log.level & BPF_LOG_STATS) {
13857 		verbose(env, "verification time %lld usec\n",
13858 			div_u64(env->verification_time, 1000));
13859 		verbose(env, "stack depth ");
13860 		for (i = 0; i < env->subprog_cnt; i++) {
13861 			u32 depth = env->subprog_info[i].stack_depth;
13862 
13863 			verbose(env, "%d", depth);
13864 			if (i + 1 < env->subprog_cnt)
13865 				verbose(env, "+");
13866 		}
13867 		verbose(env, "\n");
13868 	}
13869 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
13870 		"total_states %d peak_states %d mark_read %d\n",
13871 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
13872 		env->max_states_per_insn, env->total_states,
13873 		env->peak_states, env->longest_mark_read_walk);
13874 }
13875 
13876 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
13877 {
13878 	const struct btf_type *t, *func_proto;
13879 	const struct bpf_struct_ops *st_ops;
13880 	const struct btf_member *member;
13881 	struct bpf_prog *prog = env->prog;
13882 	u32 btf_id, member_idx;
13883 	const char *mname;
13884 
13885 	if (!prog->gpl_compatible) {
13886 		verbose(env, "struct ops programs must have a GPL compatible license\n");
13887 		return -EINVAL;
13888 	}
13889 
13890 	btf_id = prog->aux->attach_btf_id;
13891 	st_ops = bpf_struct_ops_find(btf_id);
13892 	if (!st_ops) {
13893 		verbose(env, "attach_btf_id %u is not a supported struct\n",
13894 			btf_id);
13895 		return -ENOTSUPP;
13896 	}
13897 
13898 	t = st_ops->type;
13899 	member_idx = prog->expected_attach_type;
13900 	if (member_idx >= btf_type_vlen(t)) {
13901 		verbose(env, "attach to invalid member idx %u of struct %s\n",
13902 			member_idx, st_ops->name);
13903 		return -EINVAL;
13904 	}
13905 
13906 	member = &btf_type_member(t)[member_idx];
13907 	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
13908 	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
13909 					       NULL);
13910 	if (!func_proto) {
13911 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
13912 			mname, member_idx, st_ops->name);
13913 		return -EINVAL;
13914 	}
13915 
13916 	if (st_ops->check_member) {
13917 		int err = st_ops->check_member(t, member);
13918 
13919 		if (err) {
13920 			verbose(env, "attach to unsupported member %s of struct %s\n",
13921 				mname, st_ops->name);
13922 			return err;
13923 		}
13924 	}
13925 
13926 	prog->aux->attach_func_proto = func_proto;
13927 	prog->aux->attach_func_name = mname;
13928 	env->ops = st_ops->verifier_ops;
13929 
13930 	return 0;
13931 }
13932 #define SECURITY_PREFIX "security_"
13933 
13934 static int check_attach_modify_return(unsigned long addr, const char *func_name)
13935 {
13936 	if (within_error_injection_list(addr) ||
13937 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
13938 		return 0;
13939 
13940 	return -EINVAL;
13941 }
13942 
13943 /* list of non-sleepable functions that are otherwise on
13944  * ALLOW_ERROR_INJECTION list
13945  */
13946 BTF_SET_START(btf_non_sleepable_error_inject)
13947 /* Three functions below can be called from sleepable and non-sleepable context.
13948  * Assume non-sleepable from bpf safety point of view.
13949  */
13950 BTF_ID(func, __filemap_add_folio)
13951 BTF_ID(func, should_fail_alloc_page)
13952 BTF_ID(func, should_failslab)
13953 BTF_SET_END(btf_non_sleepable_error_inject)
13954 
13955 static int check_non_sleepable_error_inject(u32 btf_id)
13956 {
13957 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
13958 }
13959 
13960 int bpf_check_attach_target(struct bpf_verifier_log *log,
13961 			    const struct bpf_prog *prog,
13962 			    const struct bpf_prog *tgt_prog,
13963 			    u32 btf_id,
13964 			    struct bpf_attach_target_info *tgt_info)
13965 {
13966 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
13967 	const char prefix[] = "btf_trace_";
13968 	int ret = 0, subprog = -1, i;
13969 	const struct btf_type *t;
13970 	bool conservative = true;
13971 	const char *tname;
13972 	struct btf *btf;
13973 	long addr = 0;
13974 
13975 	if (!btf_id) {
13976 		bpf_log(log, "Tracing programs must provide btf_id\n");
13977 		return -EINVAL;
13978 	}
13979 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
13980 	if (!btf) {
13981 		bpf_log(log,
13982 			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
13983 		return -EINVAL;
13984 	}
13985 	t = btf_type_by_id(btf, btf_id);
13986 	if (!t) {
13987 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
13988 		return -EINVAL;
13989 	}
13990 	tname = btf_name_by_offset(btf, t->name_off);
13991 	if (!tname) {
13992 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
13993 		return -EINVAL;
13994 	}
13995 	if (tgt_prog) {
13996 		struct bpf_prog_aux *aux = tgt_prog->aux;
13997 
13998 		for (i = 0; i < aux->func_info_cnt; i++)
13999 			if (aux->func_info[i].type_id == btf_id) {
14000 				subprog = i;
14001 				break;
14002 			}
14003 		if (subprog == -1) {
14004 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
14005 			return -EINVAL;
14006 		}
14007 		conservative = aux->func_info_aux[subprog].unreliable;
14008 		if (prog_extension) {
14009 			if (conservative) {
14010 				bpf_log(log,
14011 					"Cannot replace static functions\n");
14012 				return -EINVAL;
14013 			}
14014 			if (!prog->jit_requested) {
14015 				bpf_log(log,
14016 					"Extension programs should be JITed\n");
14017 				return -EINVAL;
14018 			}
14019 		}
14020 		if (!tgt_prog->jited) {
14021 			bpf_log(log, "Can attach to only JITed progs\n");
14022 			return -EINVAL;
14023 		}
14024 		if (tgt_prog->type == prog->type) {
14025 			/* Cannot fentry/fexit another fentry/fexit program.
14026 			 * Cannot attach program extension to another extension.
14027 			 * It's ok to attach fentry/fexit to extension program.
14028 			 */
14029 			bpf_log(log, "Cannot recursively attach\n");
14030 			return -EINVAL;
14031 		}
14032 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
14033 		    prog_extension &&
14034 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
14035 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
14036 			/* Program extensions can extend all program types
14037 			 * except fentry/fexit. The reason is the following.
14038 			 * The fentry/fexit programs are used for performance
14039 			 * analysis, stats and can be attached to any program
14040 			 * type except themselves. When extension program is
14041 			 * replacing XDP function it is necessary to allow
14042 			 * performance analysis of all functions. Both original
14043 			 * XDP program and its program extension. Hence
14044 			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
14045 			 * allowed. If extending of fentry/fexit was allowed it
14046 			 * would be possible to create long call chain
14047 			 * fentry->extension->fentry->extension beyond
14048 			 * reasonable stack size. Hence extending fentry is not
14049 			 * allowed.
14050 			 */
14051 			bpf_log(log, "Cannot extend fentry/fexit\n");
14052 			return -EINVAL;
14053 		}
14054 	} else {
14055 		if (prog_extension) {
14056 			bpf_log(log, "Cannot replace kernel functions\n");
14057 			return -EINVAL;
14058 		}
14059 	}
14060 
14061 	switch (prog->expected_attach_type) {
14062 	case BPF_TRACE_RAW_TP:
14063 		if (tgt_prog) {
14064 			bpf_log(log,
14065 				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
14066 			return -EINVAL;
14067 		}
14068 		if (!btf_type_is_typedef(t)) {
14069 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
14070 				btf_id);
14071 			return -EINVAL;
14072 		}
14073 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
14074 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
14075 				btf_id, tname);
14076 			return -EINVAL;
14077 		}
14078 		tname += sizeof(prefix) - 1;
14079 		t = btf_type_by_id(btf, t->type);
14080 		if (!btf_type_is_ptr(t))
14081 			/* should never happen in valid vmlinux build */
14082 			return -EINVAL;
14083 		t = btf_type_by_id(btf, t->type);
14084 		if (!btf_type_is_func_proto(t))
14085 			/* should never happen in valid vmlinux build */
14086 			return -EINVAL;
14087 
14088 		break;
14089 	case BPF_TRACE_ITER:
14090 		if (!btf_type_is_func(t)) {
14091 			bpf_log(log, "attach_btf_id %u is not a function\n",
14092 				btf_id);
14093 			return -EINVAL;
14094 		}
14095 		t = btf_type_by_id(btf, t->type);
14096 		if (!btf_type_is_func_proto(t))
14097 			return -EINVAL;
14098 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14099 		if (ret)
14100 			return ret;
14101 		break;
14102 	default:
14103 		if (!prog_extension)
14104 			return -EINVAL;
14105 		fallthrough;
14106 	case BPF_MODIFY_RETURN:
14107 	case BPF_LSM_MAC:
14108 	case BPF_TRACE_FENTRY:
14109 	case BPF_TRACE_FEXIT:
14110 		if (!btf_type_is_func(t)) {
14111 			bpf_log(log, "attach_btf_id %u is not a function\n",
14112 				btf_id);
14113 			return -EINVAL;
14114 		}
14115 		if (prog_extension &&
14116 		    btf_check_type_match(log, prog, btf, t))
14117 			return -EINVAL;
14118 		t = btf_type_by_id(btf, t->type);
14119 		if (!btf_type_is_func_proto(t))
14120 			return -EINVAL;
14121 
14122 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
14123 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
14124 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
14125 			return -EINVAL;
14126 
14127 		if (tgt_prog && conservative)
14128 			t = NULL;
14129 
14130 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14131 		if (ret < 0)
14132 			return ret;
14133 
14134 		if (tgt_prog) {
14135 			if (subprog == 0)
14136 				addr = (long) tgt_prog->bpf_func;
14137 			else
14138 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
14139 		} else {
14140 			addr = kallsyms_lookup_name(tname);
14141 			if (!addr) {
14142 				bpf_log(log,
14143 					"The address of function %s cannot be found\n",
14144 					tname);
14145 				return -ENOENT;
14146 			}
14147 		}
14148 
14149 		if (prog->aux->sleepable) {
14150 			ret = -EINVAL;
14151 			switch (prog->type) {
14152 			case BPF_PROG_TYPE_TRACING:
14153 				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
14154 				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
14155 				 */
14156 				if (!check_non_sleepable_error_inject(btf_id) &&
14157 				    within_error_injection_list(addr))
14158 					ret = 0;
14159 				break;
14160 			case BPF_PROG_TYPE_LSM:
14161 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
14162 				 * Only some of them are sleepable.
14163 				 */
14164 				if (bpf_lsm_is_sleepable_hook(btf_id))
14165 					ret = 0;
14166 				break;
14167 			default:
14168 				break;
14169 			}
14170 			if (ret) {
14171 				bpf_log(log, "%s is not sleepable\n", tname);
14172 				return ret;
14173 			}
14174 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
14175 			if (tgt_prog) {
14176 				bpf_log(log, "can't modify return codes of BPF programs\n");
14177 				return -EINVAL;
14178 			}
14179 			ret = check_attach_modify_return(addr, tname);
14180 			if (ret) {
14181 				bpf_log(log, "%s() is not modifiable\n", tname);
14182 				return ret;
14183 			}
14184 		}
14185 
14186 		break;
14187 	}
14188 	tgt_info->tgt_addr = addr;
14189 	tgt_info->tgt_name = tname;
14190 	tgt_info->tgt_type = t;
14191 	return 0;
14192 }
14193 
14194 BTF_SET_START(btf_id_deny)
14195 BTF_ID_UNUSED
14196 #ifdef CONFIG_SMP
14197 BTF_ID(func, migrate_disable)
14198 BTF_ID(func, migrate_enable)
14199 #endif
14200 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
14201 BTF_ID(func, rcu_read_unlock_strict)
14202 #endif
14203 BTF_SET_END(btf_id_deny)
14204 
14205 static int check_attach_btf_id(struct bpf_verifier_env *env)
14206 {
14207 	struct bpf_prog *prog = env->prog;
14208 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
14209 	struct bpf_attach_target_info tgt_info = {};
14210 	u32 btf_id = prog->aux->attach_btf_id;
14211 	struct bpf_trampoline *tr;
14212 	int ret;
14213 	u64 key;
14214 
14215 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
14216 		if (prog->aux->sleepable)
14217 			/* attach_btf_id checked to be zero already */
14218 			return 0;
14219 		verbose(env, "Syscall programs can only be sleepable\n");
14220 		return -EINVAL;
14221 	}
14222 
14223 	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
14224 	    prog->type != BPF_PROG_TYPE_LSM) {
14225 		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
14226 		return -EINVAL;
14227 	}
14228 
14229 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
14230 		return check_struct_ops_btf_id(env);
14231 
14232 	if (prog->type != BPF_PROG_TYPE_TRACING &&
14233 	    prog->type != BPF_PROG_TYPE_LSM &&
14234 	    prog->type != BPF_PROG_TYPE_EXT)
14235 		return 0;
14236 
14237 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
14238 	if (ret)
14239 		return ret;
14240 
14241 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
14242 		/* to make freplace equivalent to their targets, they need to
14243 		 * inherit env->ops and expected_attach_type for the rest of the
14244 		 * verification
14245 		 */
14246 		env->ops = bpf_verifier_ops[tgt_prog->type];
14247 		prog->expected_attach_type = tgt_prog->expected_attach_type;
14248 	}
14249 
14250 	/* store info about the attachment target that will be used later */
14251 	prog->aux->attach_func_proto = tgt_info.tgt_type;
14252 	prog->aux->attach_func_name = tgt_info.tgt_name;
14253 
14254 	if (tgt_prog) {
14255 		prog->aux->saved_dst_prog_type = tgt_prog->type;
14256 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
14257 	}
14258 
14259 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
14260 		prog->aux->attach_btf_trace = true;
14261 		return 0;
14262 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
14263 		if (!bpf_iter_prog_supported(prog))
14264 			return -EINVAL;
14265 		return 0;
14266 	}
14267 
14268 	if (prog->type == BPF_PROG_TYPE_LSM) {
14269 		ret = bpf_lsm_verify_prog(&env->log, prog);
14270 		if (ret < 0)
14271 			return ret;
14272 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
14273 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
14274 		return -EINVAL;
14275 	}
14276 
14277 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
14278 	tr = bpf_trampoline_get(key, &tgt_info);
14279 	if (!tr)
14280 		return -ENOMEM;
14281 
14282 	prog->aux->dst_trampoline = tr;
14283 	return 0;
14284 }
14285 
14286 struct btf *bpf_get_btf_vmlinux(void)
14287 {
14288 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
14289 		mutex_lock(&bpf_verifier_lock);
14290 		if (!btf_vmlinux)
14291 			btf_vmlinux = btf_parse_vmlinux();
14292 		mutex_unlock(&bpf_verifier_lock);
14293 	}
14294 	return btf_vmlinux;
14295 }
14296 
14297 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
14298 {
14299 	u64 start_time = ktime_get_ns();
14300 	struct bpf_verifier_env *env;
14301 	struct bpf_verifier_log *log;
14302 	int i, len, ret = -EINVAL;
14303 	bool is_priv;
14304 
14305 	/* no program is valid */
14306 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
14307 		return -EINVAL;
14308 
14309 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
14310 	 * allocate/free it every time bpf_check() is called
14311 	 */
14312 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
14313 	if (!env)
14314 		return -ENOMEM;
14315 	log = &env->log;
14316 
14317 	len = (*prog)->len;
14318 	env->insn_aux_data =
14319 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
14320 	ret = -ENOMEM;
14321 	if (!env->insn_aux_data)
14322 		goto err_free_env;
14323 	for (i = 0; i < len; i++)
14324 		env->insn_aux_data[i].orig_idx = i;
14325 	env->prog = *prog;
14326 	env->ops = bpf_verifier_ops[env->prog->type];
14327 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
14328 	is_priv = bpf_capable();
14329 
14330 	bpf_get_btf_vmlinux();
14331 
14332 	/* grab the mutex to protect few globals used by verifier */
14333 	if (!is_priv)
14334 		mutex_lock(&bpf_verifier_lock);
14335 
14336 	if (attr->log_level || attr->log_buf || attr->log_size) {
14337 		/* user requested verbose verifier output
14338 		 * and supplied buffer to store the verification trace
14339 		 */
14340 		log->level = attr->log_level;
14341 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
14342 		log->len_total = attr->log_size;
14343 
14344 		/* log attributes have to be sane */
14345 		if (!bpf_verifier_log_attr_valid(log)) {
14346 			ret = -EINVAL;
14347 			goto err_unlock;
14348 		}
14349 	}
14350 
14351 	mark_verifier_state_clean(env);
14352 
14353 	if (IS_ERR(btf_vmlinux)) {
14354 		/* Either gcc or pahole or kernel are broken. */
14355 		verbose(env, "in-kernel BTF is malformed\n");
14356 		ret = PTR_ERR(btf_vmlinux);
14357 		goto skip_full_check;
14358 	}
14359 
14360 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
14361 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
14362 		env->strict_alignment = true;
14363 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
14364 		env->strict_alignment = false;
14365 
14366 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
14367 	env->allow_uninit_stack = bpf_allow_uninit_stack();
14368 	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
14369 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
14370 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
14371 	env->bpf_capable = bpf_capable();
14372 
14373 	if (is_priv)
14374 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
14375 
14376 	env->explored_states = kvcalloc(state_htab_size(env),
14377 				       sizeof(struct bpf_verifier_state_list *),
14378 				       GFP_USER);
14379 	ret = -ENOMEM;
14380 	if (!env->explored_states)
14381 		goto skip_full_check;
14382 
14383 	ret = add_subprog_and_kfunc(env);
14384 	if (ret < 0)
14385 		goto skip_full_check;
14386 
14387 	ret = check_subprogs(env);
14388 	if (ret < 0)
14389 		goto skip_full_check;
14390 
14391 	ret = check_btf_info(env, attr, uattr);
14392 	if (ret < 0)
14393 		goto skip_full_check;
14394 
14395 	ret = check_attach_btf_id(env);
14396 	if (ret)
14397 		goto skip_full_check;
14398 
14399 	ret = resolve_pseudo_ldimm64(env);
14400 	if (ret < 0)
14401 		goto skip_full_check;
14402 
14403 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
14404 		ret = bpf_prog_offload_verifier_prep(env->prog);
14405 		if (ret)
14406 			goto skip_full_check;
14407 	}
14408 
14409 	ret = check_cfg(env);
14410 	if (ret < 0)
14411 		goto skip_full_check;
14412 
14413 	ret = do_check_subprogs(env);
14414 	ret = ret ?: do_check_main(env);
14415 
14416 	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
14417 		ret = bpf_prog_offload_finalize(env);
14418 
14419 skip_full_check:
14420 	kvfree(env->explored_states);
14421 
14422 	if (ret == 0)
14423 		ret = check_max_stack_depth(env);
14424 
14425 	/* instruction rewrites happen after this point */
14426 	if (is_priv) {
14427 		if (ret == 0)
14428 			opt_hard_wire_dead_code_branches(env);
14429 		if (ret == 0)
14430 			ret = opt_remove_dead_code(env);
14431 		if (ret == 0)
14432 			ret = opt_remove_nops(env);
14433 	} else {
14434 		if (ret == 0)
14435 			sanitize_dead_code(env);
14436 	}
14437 
14438 	if (ret == 0)
14439 		/* program is valid, convert *(u32*)(ctx + off) accesses */
14440 		ret = convert_ctx_accesses(env);
14441 
14442 	if (ret == 0)
14443 		ret = do_misc_fixups(env);
14444 
14445 	/* do 32-bit optimization after insn patching has done so those patched
14446 	 * insns could be handled correctly.
14447 	 */
14448 	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14449 		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14450 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14451 								     : false;
14452 	}
14453 
14454 	if (ret == 0)
14455 		ret = fixup_call_args(env);
14456 
14457 	env->verification_time = ktime_get_ns() - start_time;
14458 	print_verification_stats(env);
14459 	env->prog->aux->verified_insns = env->insn_processed;
14460 
14461 	if (log->level && bpf_verifier_log_full(log))
14462 		ret = -ENOSPC;
14463 	if (log->level && !log->ubuf) {
14464 		ret = -EFAULT;
14465 		goto err_release_maps;
14466 	}
14467 
14468 	if (ret)
14469 		goto err_release_maps;
14470 
14471 	if (env->used_map_cnt) {
14472 		/* if program passed verifier, update used_maps in bpf_prog_info */
14473 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14474 							  sizeof(env->used_maps[0]),
14475 							  GFP_KERNEL);
14476 
14477 		if (!env->prog->aux->used_maps) {
14478 			ret = -ENOMEM;
14479 			goto err_release_maps;
14480 		}
14481 
14482 		memcpy(env->prog->aux->used_maps, env->used_maps,
14483 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
14484 		env->prog->aux->used_map_cnt = env->used_map_cnt;
14485 	}
14486 	if (env->used_btf_cnt) {
14487 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
14488 		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14489 							  sizeof(env->used_btfs[0]),
14490 							  GFP_KERNEL);
14491 		if (!env->prog->aux->used_btfs) {
14492 			ret = -ENOMEM;
14493 			goto err_release_maps;
14494 		}
14495 
14496 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
14497 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14498 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14499 	}
14500 	if (env->used_map_cnt || env->used_btf_cnt) {
14501 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
14502 		 * bpf_ld_imm64 instructions
14503 		 */
14504 		convert_pseudo_ld_imm64(env);
14505 	}
14506 
14507 	adjust_btf_func(env);
14508 
14509 err_release_maps:
14510 	if (!env->prog->aux->used_maps)
14511 		/* if we didn't copy map pointers into bpf_prog_info, release
14512 		 * them now. Otherwise free_used_maps() will release them.
14513 		 */
14514 		release_maps(env);
14515 	if (!env->prog->aux->used_btfs)
14516 		release_btfs(env);
14517 
14518 	/* extension progs temporarily inherit the attach_type of their targets
14519 	   for verification purposes, so set it back to zero before returning
14520 	 */
14521 	if (env->prog->type == BPF_PROG_TYPE_EXT)
14522 		env->prog->expected_attach_type = 0;
14523 
14524 	*prog = env->prog;
14525 err_unlock:
14526 	if (!is_priv)
14527 		mutex_unlock(&bpf_verifier_lock);
14528 	vfree(env->insn_aux_data);
14529 err_free_env:
14530 	kfree(env);
14531 	return ret;
14532 }
14533