xref: /linux/kernel/bpf/verifier.c (revision 6ebe6dbd6886af07b102aca42e44edbee94a22d9)
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  * Copyright (c) 2016 Facebook
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/kernel.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/bpf.h>
17 #include <linux/bpf_verifier.h>
18 #include <linux/filter.h>
19 #include <net/netlink.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <linux/stringify.h>
23 #include <linux/bsearch.h>
24 #include <linux/sort.h>
25 
26 #include "disasm.h"
27 
28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name) \
30 	[_id] = & _name ## _verifier_ops,
31 #define BPF_MAP_TYPE(_id, _ops)
32 #include <linux/bpf_types.h>
33 #undef BPF_PROG_TYPE
34 #undef BPF_MAP_TYPE
35 };
36 
37 /* bpf_check() is a static code analyzer that walks eBPF program
38  * instruction by instruction and updates register/stack state.
39  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
40  *
41  * The first pass is depth-first-search to check that the program is a DAG.
42  * It rejects the following programs:
43  * - larger than BPF_MAXINSNS insns
44  * - if loop is present (detected via back-edge)
45  * - unreachable insns exist (shouldn't be a forest. program = one function)
46  * - out of bounds or malformed jumps
47  * The second pass is all possible path descent from the 1st insn.
48  * Since it's analyzing all pathes through the program, the length of the
49  * analysis is limited to 64k insn, which may be hit even if total number of
50  * insn is less then 4K, but there are too many branches that change stack/regs.
51  * Number of 'branches to be analyzed' is limited to 1k
52  *
53  * On entry to each instruction, each register has a type, and the instruction
54  * changes the types of the registers depending on instruction semantics.
55  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
56  * copied to R1.
57  *
58  * All registers are 64-bit.
59  * R0 - return register
60  * R1-R5 argument passing registers
61  * R6-R9 callee saved registers
62  * R10 - frame pointer read-only
63  *
64  * At the start of BPF program the register R1 contains a pointer to bpf_context
65  * and has type PTR_TO_CTX.
66  *
67  * Verifier tracks arithmetic operations on pointers in case:
68  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
69  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
70  * 1st insn copies R10 (which has FRAME_PTR) type into R1
71  * and 2nd arithmetic instruction is pattern matched to recognize
72  * that it wants to construct a pointer to some element within stack.
73  * So after 2nd insn, the register R1 has type PTR_TO_STACK
74  * (and -20 constant is saved for further stack bounds checking).
75  * Meaning that this reg is a pointer to stack plus known immediate constant.
76  *
77  * Most of the time the registers have SCALAR_VALUE type, which
78  * means the register has some value, but it's not a valid pointer.
79  * (like pointer plus pointer becomes SCALAR_VALUE type)
80  *
81  * When verifier sees load or store instructions the type of base register
82  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
83  * types recognized by check_mem_access() function.
84  *
85  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
86  * and the range of [ptr, ptr + map's value_size) is accessible.
87  *
88  * registers used to pass values to function calls are checked against
89  * function argument constraints.
90  *
91  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
92  * It means that the register type passed to this function must be
93  * PTR_TO_STACK and it will be used inside the function as
94  * 'pointer to map element key'
95  *
96  * For example the argument constraints for bpf_map_lookup_elem():
97  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
98  *   .arg1_type = ARG_CONST_MAP_PTR,
99  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
100  *
101  * ret_type says that this function returns 'pointer to map elem value or null'
102  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
103  * 2nd argument should be a pointer to stack, which will be used inside
104  * the helper function as a pointer to map element key.
105  *
106  * On the kernel side the helper function looks like:
107  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
108  * {
109  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
110  *    void *key = (void *) (unsigned long) r2;
111  *    void *value;
112  *
113  *    here kernel can access 'key' and 'map' pointers safely, knowing that
114  *    [key, key + map->key_size) bytes are valid and were initialized on
115  *    the stack of eBPF program.
116  * }
117  *
118  * Corresponding eBPF program may look like:
119  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
120  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
121  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
122  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
123  * here verifier looks at prototype of map_lookup_elem() and sees:
124  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
125  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
126  *
127  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
128  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
129  * and were initialized prior to this call.
130  * If it's ok, then verifier allows this BPF_CALL insn and looks at
131  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
132  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
133  * returns ether pointer to map value or NULL.
134  *
135  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
136  * insn, the register holding that pointer in the true branch changes state to
137  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
138  * branch. See check_cond_jmp_op().
139  *
140  * After the call R0 is set to return type of the function and registers R1-R5
141  * are set to NOT_INIT to indicate that they are no longer readable.
142  */
143 
144 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
145 struct bpf_verifier_stack_elem {
146 	/* verifer state is 'st'
147 	 * before processing instruction 'insn_idx'
148 	 * and after processing instruction 'prev_insn_idx'
149 	 */
150 	struct bpf_verifier_state st;
151 	int insn_idx;
152 	int prev_insn_idx;
153 	struct bpf_verifier_stack_elem *next;
154 };
155 
156 #define BPF_COMPLEXITY_LIMIT_INSNS	131072
157 #define BPF_COMPLEXITY_LIMIT_STACK	1024
158 
159 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
160 
161 struct bpf_call_arg_meta {
162 	struct bpf_map *map_ptr;
163 	bool raw_mode;
164 	bool pkt_access;
165 	int regno;
166 	int access_size;
167 };
168 
169 static DEFINE_MUTEX(bpf_verifier_lock);
170 
171 /* log_level controls verbosity level of eBPF verifier.
172  * verbose() is used to dump the verification trace to the log, so the user
173  * can figure out what's wrong with the program
174  */
175 static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
176 				   const char *fmt, ...)
177 {
178 	struct bpf_verifer_log *log = &env->log;
179 	unsigned int n;
180 	va_list args;
181 
182 	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
183 		return;
184 
185 	va_start(args, fmt);
186 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
187 	va_end(args);
188 
189 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
190 		  "verifier log line truncated - local buffer too short\n");
191 
192 	n = min(log->len_total - log->len_used - 1, n);
193 	log->kbuf[n] = '\0';
194 
195 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
196 		log->len_used += n;
197 	else
198 		log->ubuf = NULL;
199 }
200 
201 static bool type_is_pkt_pointer(enum bpf_reg_type type)
202 {
203 	return type == PTR_TO_PACKET ||
204 	       type == PTR_TO_PACKET_META;
205 }
206 
207 /* string representation of 'enum bpf_reg_type' */
208 static const char * const reg_type_str[] = {
209 	[NOT_INIT]		= "?",
210 	[SCALAR_VALUE]		= "inv",
211 	[PTR_TO_CTX]		= "ctx",
212 	[CONST_PTR_TO_MAP]	= "map_ptr",
213 	[PTR_TO_MAP_VALUE]	= "map_value",
214 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
215 	[PTR_TO_STACK]		= "fp",
216 	[PTR_TO_PACKET]		= "pkt",
217 	[PTR_TO_PACKET_META]	= "pkt_meta",
218 	[PTR_TO_PACKET_END]	= "pkt_end",
219 };
220 
221 static void print_liveness(struct bpf_verifier_env *env,
222 			   enum bpf_reg_liveness live)
223 {
224 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
225 	    verbose(env, "_");
226 	if (live & REG_LIVE_READ)
227 		verbose(env, "r");
228 	if (live & REG_LIVE_WRITTEN)
229 		verbose(env, "w");
230 }
231 
232 static struct bpf_func_state *func(struct bpf_verifier_env *env,
233 				   const struct bpf_reg_state *reg)
234 {
235 	struct bpf_verifier_state *cur = env->cur_state;
236 
237 	return cur->frame[reg->frameno];
238 }
239 
240 static void print_verifier_state(struct bpf_verifier_env *env,
241 				 const struct bpf_func_state *state)
242 {
243 	const struct bpf_reg_state *reg;
244 	enum bpf_reg_type t;
245 	int i;
246 
247 	if (state->frameno)
248 		verbose(env, " frame%d:", state->frameno);
249 	for (i = 0; i < MAX_BPF_REG; i++) {
250 		reg = &state->regs[i];
251 		t = reg->type;
252 		if (t == NOT_INIT)
253 			continue;
254 		verbose(env, " R%d", i);
255 		print_liveness(env, reg->live);
256 		verbose(env, "=%s", reg_type_str[t]);
257 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
258 		    tnum_is_const(reg->var_off)) {
259 			/* reg->off should be 0 for SCALAR_VALUE */
260 			verbose(env, "%lld", reg->var_off.value + reg->off);
261 			if (t == PTR_TO_STACK)
262 				verbose(env, ",call_%d", func(env, reg)->callsite);
263 		} else {
264 			verbose(env, "(id=%d", reg->id);
265 			if (t != SCALAR_VALUE)
266 				verbose(env, ",off=%d", reg->off);
267 			if (type_is_pkt_pointer(t))
268 				verbose(env, ",r=%d", reg->range);
269 			else if (t == CONST_PTR_TO_MAP ||
270 				 t == PTR_TO_MAP_VALUE ||
271 				 t == PTR_TO_MAP_VALUE_OR_NULL)
272 				verbose(env, ",ks=%d,vs=%d",
273 					reg->map_ptr->key_size,
274 					reg->map_ptr->value_size);
275 			if (tnum_is_const(reg->var_off)) {
276 				/* Typically an immediate SCALAR_VALUE, but
277 				 * could be a pointer whose offset is too big
278 				 * for reg->off
279 				 */
280 				verbose(env, ",imm=%llx", reg->var_off.value);
281 			} else {
282 				if (reg->smin_value != reg->umin_value &&
283 				    reg->smin_value != S64_MIN)
284 					verbose(env, ",smin_value=%lld",
285 						(long long)reg->smin_value);
286 				if (reg->smax_value != reg->umax_value &&
287 				    reg->smax_value != S64_MAX)
288 					verbose(env, ",smax_value=%lld",
289 						(long long)reg->smax_value);
290 				if (reg->umin_value != 0)
291 					verbose(env, ",umin_value=%llu",
292 						(unsigned long long)reg->umin_value);
293 				if (reg->umax_value != U64_MAX)
294 					verbose(env, ",umax_value=%llu",
295 						(unsigned long long)reg->umax_value);
296 				if (!tnum_is_unknown(reg->var_off)) {
297 					char tn_buf[48];
298 
299 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
300 					verbose(env, ",var_off=%s", tn_buf);
301 				}
302 			}
303 			verbose(env, ")");
304 		}
305 	}
306 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
307 		if (state->stack[i].slot_type[0] == STACK_SPILL) {
308 			verbose(env, " fp%d",
309 				(-i - 1) * BPF_REG_SIZE);
310 			print_liveness(env, state->stack[i].spilled_ptr.live);
311 			verbose(env, "=%s",
312 				reg_type_str[state->stack[i].spilled_ptr.type]);
313 		}
314 		if (state->stack[i].slot_type[0] == STACK_ZERO)
315 			verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
316 	}
317 	verbose(env, "\n");
318 }
319 
320 static int copy_stack_state(struct bpf_func_state *dst,
321 			    const struct bpf_func_state *src)
322 {
323 	if (!src->stack)
324 		return 0;
325 	if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
326 		/* internal bug, make state invalid to reject the program */
327 		memset(dst, 0, sizeof(*dst));
328 		return -EFAULT;
329 	}
330 	memcpy(dst->stack, src->stack,
331 	       sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
332 	return 0;
333 }
334 
335 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
336  * make it consume minimal amount of memory. check_stack_write() access from
337  * the program calls into realloc_func_state() to grow the stack size.
338  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
339  * which this function copies over. It points to previous bpf_verifier_state
340  * which is never reallocated
341  */
342 static int realloc_func_state(struct bpf_func_state *state, int size,
343 			      bool copy_old)
344 {
345 	u32 old_size = state->allocated_stack;
346 	struct bpf_stack_state *new_stack;
347 	int slot = size / BPF_REG_SIZE;
348 
349 	if (size <= old_size || !size) {
350 		if (copy_old)
351 			return 0;
352 		state->allocated_stack = slot * BPF_REG_SIZE;
353 		if (!size && old_size) {
354 			kfree(state->stack);
355 			state->stack = NULL;
356 		}
357 		return 0;
358 	}
359 	new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
360 				  GFP_KERNEL);
361 	if (!new_stack)
362 		return -ENOMEM;
363 	if (copy_old) {
364 		if (state->stack)
365 			memcpy(new_stack, state->stack,
366 			       sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
367 		memset(new_stack + old_size / BPF_REG_SIZE, 0,
368 		       sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
369 	}
370 	state->allocated_stack = slot * BPF_REG_SIZE;
371 	kfree(state->stack);
372 	state->stack = new_stack;
373 	return 0;
374 }
375 
376 static void free_func_state(struct bpf_func_state *state)
377 {
378 	kfree(state->stack);
379 	kfree(state);
380 }
381 
382 static void free_verifier_state(struct bpf_verifier_state *state,
383 				bool free_self)
384 {
385 	int i;
386 
387 	for (i = 0; i <= state->curframe; i++) {
388 		free_func_state(state->frame[i]);
389 		state->frame[i] = NULL;
390 	}
391 	if (free_self)
392 		kfree(state);
393 }
394 
395 /* copy verifier state from src to dst growing dst stack space
396  * when necessary to accommodate larger src stack
397  */
398 static int copy_func_state(struct bpf_func_state *dst,
399 			   const struct bpf_func_state *src)
400 {
401 	int err;
402 
403 	err = realloc_func_state(dst, src->allocated_stack, false);
404 	if (err)
405 		return err;
406 	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
407 	return copy_stack_state(dst, src);
408 }
409 
410 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
411 			       const struct bpf_verifier_state *src)
412 {
413 	struct bpf_func_state *dst;
414 	int i, err;
415 
416 	/* if dst has more stack frames then src frame, free them */
417 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
418 		free_func_state(dst_state->frame[i]);
419 		dst_state->frame[i] = NULL;
420 	}
421 	dst_state->curframe = src->curframe;
422 	dst_state->parent = src->parent;
423 	for (i = 0; i <= src->curframe; i++) {
424 		dst = dst_state->frame[i];
425 		if (!dst) {
426 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
427 			if (!dst)
428 				return -ENOMEM;
429 			dst_state->frame[i] = dst;
430 		}
431 		err = copy_func_state(dst, src->frame[i]);
432 		if (err)
433 			return err;
434 	}
435 	return 0;
436 }
437 
438 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
439 		     int *insn_idx)
440 {
441 	struct bpf_verifier_state *cur = env->cur_state;
442 	struct bpf_verifier_stack_elem *elem, *head = env->head;
443 	int err;
444 
445 	if (env->head == NULL)
446 		return -ENOENT;
447 
448 	if (cur) {
449 		err = copy_verifier_state(cur, &head->st);
450 		if (err)
451 			return err;
452 	}
453 	if (insn_idx)
454 		*insn_idx = head->insn_idx;
455 	if (prev_insn_idx)
456 		*prev_insn_idx = head->prev_insn_idx;
457 	elem = head->next;
458 	free_verifier_state(&head->st, false);
459 	kfree(head);
460 	env->head = elem;
461 	env->stack_size--;
462 	return 0;
463 }
464 
465 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
466 					     int insn_idx, int prev_insn_idx)
467 {
468 	struct bpf_verifier_state *cur = env->cur_state;
469 	struct bpf_verifier_stack_elem *elem;
470 	int err;
471 
472 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
473 	if (!elem)
474 		goto err;
475 
476 	elem->insn_idx = insn_idx;
477 	elem->prev_insn_idx = prev_insn_idx;
478 	elem->next = env->head;
479 	env->head = elem;
480 	env->stack_size++;
481 	err = copy_verifier_state(&elem->st, cur);
482 	if (err)
483 		goto err;
484 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
485 		verbose(env, "BPF program is too complex\n");
486 		goto err;
487 	}
488 	return &elem->st;
489 err:
490 	/* pop all elements and return */
491 	while (!pop_stack(env, NULL, NULL));
492 	return NULL;
493 }
494 
495 #define CALLER_SAVED_REGS 6
496 static const int caller_saved[CALLER_SAVED_REGS] = {
497 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
498 };
499 #define CALLEE_SAVED_REGS 5
500 static const int callee_saved[CALLEE_SAVED_REGS] = {
501 	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
502 };
503 
504 static void __mark_reg_not_init(struct bpf_reg_state *reg);
505 
506 /* Mark the unknown part of a register (variable offset or scalar value) as
507  * known to have the value @imm.
508  */
509 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
510 {
511 	reg->id = 0;
512 	reg->var_off = tnum_const(imm);
513 	reg->smin_value = (s64)imm;
514 	reg->smax_value = (s64)imm;
515 	reg->umin_value = imm;
516 	reg->umax_value = imm;
517 }
518 
519 /* Mark the 'variable offset' part of a register as zero.  This should be
520  * used only on registers holding a pointer type.
521  */
522 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
523 {
524 	__mark_reg_known(reg, 0);
525 }
526 
527 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
528 {
529 	__mark_reg_known(reg, 0);
530 	reg->off = 0;
531 	reg->type = SCALAR_VALUE;
532 }
533 
534 static void mark_reg_known_zero(struct bpf_verifier_env *env,
535 				struct bpf_reg_state *regs, u32 regno)
536 {
537 	if (WARN_ON(regno >= MAX_BPF_REG)) {
538 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
539 		/* Something bad happened, let's kill all regs */
540 		for (regno = 0; regno < MAX_BPF_REG; regno++)
541 			__mark_reg_not_init(regs + regno);
542 		return;
543 	}
544 	__mark_reg_known_zero(regs + regno);
545 }
546 
547 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
548 {
549 	return type_is_pkt_pointer(reg->type);
550 }
551 
552 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
553 {
554 	return reg_is_pkt_pointer(reg) ||
555 	       reg->type == PTR_TO_PACKET_END;
556 }
557 
558 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
559 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
560 				    enum bpf_reg_type which)
561 {
562 	/* The register can already have a range from prior markings.
563 	 * This is fine as long as it hasn't been advanced from its
564 	 * origin.
565 	 */
566 	return reg->type == which &&
567 	       reg->id == 0 &&
568 	       reg->off == 0 &&
569 	       tnum_equals_const(reg->var_off, 0);
570 }
571 
572 /* Attempts to improve min/max values based on var_off information */
573 static void __update_reg_bounds(struct bpf_reg_state *reg)
574 {
575 	/* min signed is max(sign bit) | min(other bits) */
576 	reg->smin_value = max_t(s64, reg->smin_value,
577 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
578 	/* max signed is min(sign bit) | max(other bits) */
579 	reg->smax_value = min_t(s64, reg->smax_value,
580 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
581 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
582 	reg->umax_value = min(reg->umax_value,
583 			      reg->var_off.value | reg->var_off.mask);
584 }
585 
586 /* Uses signed min/max values to inform unsigned, and vice-versa */
587 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
588 {
589 	/* Learn sign from signed bounds.
590 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
591 	 * are the same, so combine.  This works even in the negative case, e.g.
592 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
593 	 */
594 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
595 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
596 							  reg->umin_value);
597 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
598 							  reg->umax_value);
599 		return;
600 	}
601 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
602 	 * boundary, so we must be careful.
603 	 */
604 	if ((s64)reg->umax_value >= 0) {
605 		/* Positive.  We can't learn anything from the smin, but smax
606 		 * is positive, hence safe.
607 		 */
608 		reg->smin_value = reg->umin_value;
609 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
610 							  reg->umax_value);
611 	} else if ((s64)reg->umin_value < 0) {
612 		/* Negative.  We can't learn anything from the smax, but smin
613 		 * is negative, hence safe.
614 		 */
615 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
616 							  reg->umin_value);
617 		reg->smax_value = reg->umax_value;
618 	}
619 }
620 
621 /* Attempts to improve var_off based on unsigned min/max information */
622 static void __reg_bound_offset(struct bpf_reg_state *reg)
623 {
624 	reg->var_off = tnum_intersect(reg->var_off,
625 				      tnum_range(reg->umin_value,
626 						 reg->umax_value));
627 }
628 
629 /* Reset the min/max bounds of a register */
630 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
631 {
632 	reg->smin_value = S64_MIN;
633 	reg->smax_value = S64_MAX;
634 	reg->umin_value = 0;
635 	reg->umax_value = U64_MAX;
636 }
637 
638 /* Mark a register as having a completely unknown (scalar) value. */
639 static void __mark_reg_unknown(struct bpf_reg_state *reg)
640 {
641 	reg->type = SCALAR_VALUE;
642 	reg->id = 0;
643 	reg->off = 0;
644 	reg->var_off = tnum_unknown;
645 	reg->frameno = 0;
646 	__mark_reg_unbounded(reg);
647 }
648 
649 static void mark_reg_unknown(struct bpf_verifier_env *env,
650 			     struct bpf_reg_state *regs, u32 regno)
651 {
652 	if (WARN_ON(regno >= MAX_BPF_REG)) {
653 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
654 		/* Something bad happened, let's kill all regs except FP */
655 		for (regno = 0; regno < BPF_REG_FP; regno++)
656 			__mark_reg_not_init(regs + regno);
657 		return;
658 	}
659 	__mark_reg_unknown(regs + regno);
660 }
661 
662 static void __mark_reg_not_init(struct bpf_reg_state *reg)
663 {
664 	__mark_reg_unknown(reg);
665 	reg->type = NOT_INIT;
666 }
667 
668 static void mark_reg_not_init(struct bpf_verifier_env *env,
669 			      struct bpf_reg_state *regs, u32 regno)
670 {
671 	if (WARN_ON(regno >= MAX_BPF_REG)) {
672 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
673 		/* Something bad happened, let's kill all regs except FP */
674 		for (regno = 0; regno < BPF_REG_FP; regno++)
675 			__mark_reg_not_init(regs + regno);
676 		return;
677 	}
678 	__mark_reg_not_init(regs + regno);
679 }
680 
681 static void init_reg_state(struct bpf_verifier_env *env,
682 			   struct bpf_func_state *state)
683 {
684 	struct bpf_reg_state *regs = state->regs;
685 	int i;
686 
687 	for (i = 0; i < MAX_BPF_REG; i++) {
688 		mark_reg_not_init(env, regs, i);
689 		regs[i].live = REG_LIVE_NONE;
690 	}
691 
692 	/* frame pointer */
693 	regs[BPF_REG_FP].type = PTR_TO_STACK;
694 	mark_reg_known_zero(env, regs, BPF_REG_FP);
695 	regs[BPF_REG_FP].frameno = state->frameno;
696 
697 	/* 1st arg to a function */
698 	regs[BPF_REG_1].type = PTR_TO_CTX;
699 	mark_reg_known_zero(env, regs, BPF_REG_1);
700 }
701 
702 #define BPF_MAIN_FUNC (-1)
703 static void init_func_state(struct bpf_verifier_env *env,
704 			    struct bpf_func_state *state,
705 			    int callsite, int frameno, int subprogno)
706 {
707 	state->callsite = callsite;
708 	state->frameno = frameno;
709 	state->subprogno = subprogno;
710 	init_reg_state(env, state);
711 }
712 
713 enum reg_arg_type {
714 	SRC_OP,		/* register is used as source operand */
715 	DST_OP,		/* register is used as destination operand */
716 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
717 };
718 
719 static int cmp_subprogs(const void *a, const void *b)
720 {
721 	return *(int *)a - *(int *)b;
722 }
723 
724 static int find_subprog(struct bpf_verifier_env *env, int off)
725 {
726 	u32 *p;
727 
728 	p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
729 		    sizeof(env->subprog_starts[0]), cmp_subprogs);
730 	if (!p)
731 		return -ENOENT;
732 	return p - env->subprog_starts;
733 
734 }
735 
736 static int add_subprog(struct bpf_verifier_env *env, int off)
737 {
738 	int insn_cnt = env->prog->len;
739 	int ret;
740 
741 	if (off >= insn_cnt || off < 0) {
742 		verbose(env, "call to invalid destination\n");
743 		return -EINVAL;
744 	}
745 	ret = find_subprog(env, off);
746 	if (ret >= 0)
747 		return 0;
748 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
749 		verbose(env, "too many subprograms\n");
750 		return -E2BIG;
751 	}
752 	env->subprog_starts[env->subprog_cnt++] = off;
753 	sort(env->subprog_starts, env->subprog_cnt,
754 	     sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
755 	return 0;
756 }
757 
758 static int check_subprogs(struct bpf_verifier_env *env)
759 {
760 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
761 	struct bpf_insn *insn = env->prog->insnsi;
762 	int insn_cnt = env->prog->len;
763 
764 	/* determine subprog starts. The end is one before the next starts */
765 	for (i = 0; i < insn_cnt; i++) {
766 		if (insn[i].code != (BPF_JMP | BPF_CALL))
767 			continue;
768 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
769 			continue;
770 		if (!env->allow_ptr_leaks) {
771 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
772 			return -EPERM;
773 		}
774 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
775 			verbose(env, "function calls in offloaded programs are not supported yet\n");
776 			return -EINVAL;
777 		}
778 		ret = add_subprog(env, i + insn[i].imm + 1);
779 		if (ret < 0)
780 			return ret;
781 	}
782 
783 	if (env->log.level > 1)
784 		for (i = 0; i < env->subprog_cnt; i++)
785 			verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
786 
787 	/* now check that all jumps are within the same subprog */
788 	subprog_start = 0;
789 	if (env->subprog_cnt == cur_subprog)
790 		subprog_end = insn_cnt;
791 	else
792 		subprog_end = env->subprog_starts[cur_subprog++];
793 	for (i = 0; i < insn_cnt; i++) {
794 		u8 code = insn[i].code;
795 
796 		if (BPF_CLASS(code) != BPF_JMP)
797 			goto next;
798 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
799 			goto next;
800 		off = i + insn[i].off + 1;
801 		if (off < subprog_start || off >= subprog_end) {
802 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
803 			return -EINVAL;
804 		}
805 next:
806 		if (i == subprog_end - 1) {
807 			/* to avoid fall-through from one subprog into another
808 			 * the last insn of the subprog should be either exit
809 			 * or unconditional jump back
810 			 */
811 			if (code != (BPF_JMP | BPF_EXIT) &&
812 			    code != (BPF_JMP | BPF_JA)) {
813 				verbose(env, "last insn is not an exit or jmp\n");
814 				return -EINVAL;
815 			}
816 			subprog_start = subprog_end;
817 			if (env->subprog_cnt == cur_subprog)
818 				subprog_end = insn_cnt;
819 			else
820 				subprog_end = env->subprog_starts[cur_subprog++];
821 		}
822 	}
823 	return 0;
824 }
825 
826 static
827 struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
828 				       const struct bpf_verifier_state *state,
829 				       struct bpf_verifier_state *parent,
830 				       u32 regno)
831 {
832 	struct bpf_verifier_state *tmp = NULL;
833 
834 	/* 'parent' could be a state of caller and
835 	 * 'state' could be a state of callee. In such case
836 	 * parent->curframe < state->curframe
837 	 * and it's ok for r1 - r5 registers
838 	 *
839 	 * 'parent' could be a callee's state after it bpf_exit-ed.
840 	 * In such case parent->curframe > state->curframe
841 	 * and it's ok for r0 only
842 	 */
843 	if (parent->curframe == state->curframe ||
844 	    (parent->curframe < state->curframe &&
845 	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
846 	    (parent->curframe > state->curframe &&
847 	       regno == BPF_REG_0))
848 		return parent;
849 
850 	if (parent->curframe > state->curframe &&
851 	    regno >= BPF_REG_6) {
852 		/* for callee saved regs we have to skip the whole chain
853 		 * of states that belong to callee and mark as LIVE_READ
854 		 * the registers before the call
855 		 */
856 		tmp = parent;
857 		while (tmp && tmp->curframe != state->curframe) {
858 			tmp = tmp->parent;
859 		}
860 		if (!tmp)
861 			goto bug;
862 		parent = tmp;
863 	} else {
864 		goto bug;
865 	}
866 	return parent;
867 bug:
868 	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
869 	verbose(env, "regno %d parent frame %d current frame %d\n",
870 		regno, parent->curframe, state->curframe);
871 	return NULL;
872 }
873 
874 static int mark_reg_read(struct bpf_verifier_env *env,
875 			 const struct bpf_verifier_state *state,
876 			 struct bpf_verifier_state *parent,
877 			 u32 regno)
878 {
879 	bool writes = parent == state->parent; /* Observe write marks */
880 
881 	if (regno == BPF_REG_FP)
882 		/* We don't need to worry about FP liveness because it's read-only */
883 		return 0;
884 
885 	while (parent) {
886 		/* if read wasn't screened by an earlier write ... */
887 		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
888 			break;
889 		parent = skip_callee(env, state, parent, regno);
890 		if (!parent)
891 			return -EFAULT;
892 		/* ... then we depend on parent's value */
893 		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
894 		state = parent;
895 		parent = state->parent;
896 		writes = true;
897 	}
898 	return 0;
899 }
900 
901 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
902 			 enum reg_arg_type t)
903 {
904 	struct bpf_verifier_state *vstate = env->cur_state;
905 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
906 	struct bpf_reg_state *regs = state->regs;
907 
908 	if (regno >= MAX_BPF_REG) {
909 		verbose(env, "R%d is invalid\n", regno);
910 		return -EINVAL;
911 	}
912 
913 	if (t == SRC_OP) {
914 		/* check whether register used as source operand can be read */
915 		if (regs[regno].type == NOT_INIT) {
916 			verbose(env, "R%d !read_ok\n", regno);
917 			return -EACCES;
918 		}
919 		return mark_reg_read(env, vstate, vstate->parent, regno);
920 	} else {
921 		/* check whether register used as dest operand can be written to */
922 		if (regno == BPF_REG_FP) {
923 			verbose(env, "frame pointer is read only\n");
924 			return -EACCES;
925 		}
926 		regs[regno].live |= REG_LIVE_WRITTEN;
927 		if (t == DST_OP)
928 			mark_reg_unknown(env, regs, regno);
929 	}
930 	return 0;
931 }
932 
933 static bool is_spillable_regtype(enum bpf_reg_type type)
934 {
935 	switch (type) {
936 	case PTR_TO_MAP_VALUE:
937 	case PTR_TO_MAP_VALUE_OR_NULL:
938 	case PTR_TO_STACK:
939 	case PTR_TO_CTX:
940 	case PTR_TO_PACKET:
941 	case PTR_TO_PACKET_META:
942 	case PTR_TO_PACKET_END:
943 	case CONST_PTR_TO_MAP:
944 		return true;
945 	default:
946 		return false;
947 	}
948 }
949 
950 /* Does this register contain a constant zero? */
951 static bool register_is_null(struct bpf_reg_state *reg)
952 {
953 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
954 }
955 
956 /* check_stack_read/write functions track spill/fill of registers,
957  * stack boundary and alignment are checked in check_mem_access()
958  */
959 static int check_stack_write(struct bpf_verifier_env *env,
960 			     struct bpf_func_state *state, /* func where register points to */
961 			     int off, int size, int value_regno)
962 {
963 	struct bpf_func_state *cur; /* state of the current function */
964 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
965 	enum bpf_reg_type type;
966 
967 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
968 				 true);
969 	if (err)
970 		return err;
971 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
972 	 * so it's aligned access and [off, off + size) are within stack limits
973 	 */
974 	if (!env->allow_ptr_leaks &&
975 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
976 	    size != BPF_REG_SIZE) {
977 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
978 		return -EACCES;
979 	}
980 
981 	cur = env->cur_state->frame[env->cur_state->curframe];
982 	if (value_regno >= 0 &&
983 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
984 
985 		/* register containing pointer is being spilled into stack */
986 		if (size != BPF_REG_SIZE) {
987 			verbose(env, "invalid size of register spill\n");
988 			return -EACCES;
989 		}
990 
991 		if (state != cur && type == PTR_TO_STACK) {
992 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
993 			return -EINVAL;
994 		}
995 
996 		/* save register state */
997 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
998 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
999 
1000 		for (i = 0; i < BPF_REG_SIZE; i++)
1001 			state->stack[spi].slot_type[i] = STACK_SPILL;
1002 	} else {
1003 		u8 type = STACK_MISC;
1004 
1005 		/* regular write of data into stack */
1006 		state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
1007 
1008 		/* only mark the slot as written if all 8 bytes were written
1009 		 * otherwise read propagation may incorrectly stop too soon
1010 		 * when stack slots are partially written.
1011 		 * This heuristic means that read propagation will be
1012 		 * conservative, since it will add reg_live_read marks
1013 		 * to stack slots all the way to first state when programs
1014 		 * writes+reads less than 8 bytes
1015 		 */
1016 		if (size == BPF_REG_SIZE)
1017 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1018 
1019 		/* when we zero initialize stack slots mark them as such */
1020 		if (value_regno >= 0 &&
1021 		    register_is_null(&cur->regs[value_regno]))
1022 			type = STACK_ZERO;
1023 
1024 		for (i = 0; i < size; i++)
1025 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1026 				type;
1027 	}
1028 	return 0;
1029 }
1030 
1031 /* registers of every function are unique and mark_reg_read() propagates
1032  * the liveness in the following cases:
1033  * - from callee into caller for R1 - R5 that were used as arguments
1034  * - from caller into callee for R0 that used as result of the call
1035  * - from caller to the same caller skipping states of the callee for R6 - R9,
1036  *   since R6 - R9 are callee saved by implicit function prologue and
1037  *   caller's R6 != callee's R6, so when we propagate liveness up to
1038  *   parent states we need to skip callee states for R6 - R9.
1039  *
1040  * stack slot marking is different, since stacks of caller and callee are
1041  * accessible in both (since caller can pass a pointer to caller's stack to
1042  * callee which can pass it to another function), hence mark_stack_slot_read()
1043  * has to propagate the stack liveness to all parent states at given frame number.
1044  * Consider code:
1045  * f1() {
1046  *   ptr = fp - 8;
1047  *   *ptr = ctx;
1048  *   call f2 {
1049  *      .. = *ptr;
1050  *   }
1051  *   .. = *ptr;
1052  * }
1053  * First *ptr is reading from f1's stack and mark_stack_slot_read() has
1054  * to mark liveness at the f1's frame and not f2's frame.
1055  * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
1056  * to propagate liveness to f2 states at f1's frame level and further into
1057  * f1 states at f1's frame level until write into that stack slot
1058  */
1059 static void mark_stack_slot_read(struct bpf_verifier_env *env,
1060 				 const struct bpf_verifier_state *state,
1061 				 struct bpf_verifier_state *parent,
1062 				 int slot, int frameno)
1063 {
1064 	bool writes = parent == state->parent; /* Observe write marks */
1065 
1066 	while (parent) {
1067 		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
1068 			/* since LIVE_WRITTEN mark is only done for full 8-byte
1069 			 * write the read marks are conservative and parent
1070 			 * state may not even have the stack allocated. In such case
1071 			 * end the propagation, since the loop reached beginning
1072 			 * of the function
1073 			 */
1074 			break;
1075 		/* if read wasn't screened by an earlier write ... */
1076 		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
1077 			break;
1078 		/* ... then we depend on parent's value */
1079 		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
1080 		state = parent;
1081 		parent = state->parent;
1082 		writes = true;
1083 	}
1084 }
1085 
1086 static int check_stack_read(struct bpf_verifier_env *env,
1087 			    struct bpf_func_state *reg_state /* func where register points to */,
1088 			    int off, int size, int value_regno)
1089 {
1090 	struct bpf_verifier_state *vstate = env->cur_state;
1091 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1092 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1093 	u8 *stype;
1094 
1095 	if (reg_state->allocated_stack <= slot) {
1096 		verbose(env, "invalid read from stack off %d+0 size %d\n",
1097 			off, size);
1098 		return -EACCES;
1099 	}
1100 	stype = reg_state->stack[spi].slot_type;
1101 
1102 	if (stype[0] == STACK_SPILL) {
1103 		if (size != BPF_REG_SIZE) {
1104 			verbose(env, "invalid size of register spill\n");
1105 			return -EACCES;
1106 		}
1107 		for (i = 1; i < BPF_REG_SIZE; i++) {
1108 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1109 				verbose(env, "corrupted spill memory\n");
1110 				return -EACCES;
1111 			}
1112 		}
1113 
1114 		if (value_regno >= 0) {
1115 			/* restore register state from stack */
1116 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1117 			/* mark reg as written since spilled pointer state likely
1118 			 * has its liveness marks cleared by is_state_visited()
1119 			 * which resets stack/reg liveness for state transitions
1120 			 */
1121 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1122 		}
1123 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
1124 				     reg_state->frameno);
1125 		return 0;
1126 	} else {
1127 		int zeros = 0;
1128 
1129 		for (i = 0; i < size; i++) {
1130 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1131 				continue;
1132 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1133 				zeros++;
1134 				continue;
1135 			}
1136 			verbose(env, "invalid read from stack off %d+%d size %d\n",
1137 				off, i, size);
1138 			return -EACCES;
1139 		}
1140 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
1141 				     reg_state->frameno);
1142 		if (value_regno >= 0) {
1143 			if (zeros == size) {
1144 				/* any size read into register is zero extended,
1145 				 * so the whole register == const_zero
1146 				 */
1147 				__mark_reg_const_zero(&state->regs[value_regno]);
1148 			} else {
1149 				/* have read misc data from the stack */
1150 				mark_reg_unknown(env, state->regs, value_regno);
1151 			}
1152 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1153 		}
1154 		return 0;
1155 	}
1156 }
1157 
1158 /* check read/write into map element returned by bpf_map_lookup_elem() */
1159 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1160 			      int size, bool zero_size_allowed)
1161 {
1162 	struct bpf_reg_state *regs = cur_regs(env);
1163 	struct bpf_map *map = regs[regno].map_ptr;
1164 
1165 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1166 	    off + size > map->value_size) {
1167 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1168 			map->value_size, off, size);
1169 		return -EACCES;
1170 	}
1171 	return 0;
1172 }
1173 
1174 /* check read/write into a map element with possible variable offset */
1175 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1176 			    int off, int size, bool zero_size_allowed)
1177 {
1178 	struct bpf_verifier_state *vstate = env->cur_state;
1179 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1180 	struct bpf_reg_state *reg = &state->regs[regno];
1181 	int err;
1182 
1183 	/* We may have adjusted the register to this map value, so we
1184 	 * need to try adding each of min_value and max_value to off
1185 	 * to make sure our theoretical access will be safe.
1186 	 */
1187 	if (env->log.level)
1188 		print_verifier_state(env, state);
1189 	/* The minimum value is only important with signed
1190 	 * comparisons where we can't assume the floor of a
1191 	 * value is 0.  If we are using signed variables for our
1192 	 * index'es we need to make sure that whatever we use
1193 	 * will have a set floor within our range.
1194 	 */
1195 	if (reg->smin_value < 0) {
1196 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1197 			regno);
1198 		return -EACCES;
1199 	}
1200 	err = __check_map_access(env, regno, reg->smin_value + off, size,
1201 				 zero_size_allowed);
1202 	if (err) {
1203 		verbose(env, "R%d min value is outside of the array range\n",
1204 			regno);
1205 		return err;
1206 	}
1207 
1208 	/* If we haven't set a max value then we need to bail since we can't be
1209 	 * sure we won't do bad things.
1210 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
1211 	 */
1212 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1213 		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1214 			regno);
1215 		return -EACCES;
1216 	}
1217 	err = __check_map_access(env, regno, reg->umax_value + off, size,
1218 				 zero_size_allowed);
1219 	if (err)
1220 		verbose(env, "R%d max value is outside of the array range\n",
1221 			regno);
1222 	return err;
1223 }
1224 
1225 #define MAX_PACKET_OFF 0xffff
1226 
1227 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1228 				       const struct bpf_call_arg_meta *meta,
1229 				       enum bpf_access_type t)
1230 {
1231 	switch (env->prog->type) {
1232 	case BPF_PROG_TYPE_LWT_IN:
1233 	case BPF_PROG_TYPE_LWT_OUT:
1234 		/* dst_input() and dst_output() can't write for now */
1235 		if (t == BPF_WRITE)
1236 			return false;
1237 		/* fallthrough */
1238 	case BPF_PROG_TYPE_SCHED_CLS:
1239 	case BPF_PROG_TYPE_SCHED_ACT:
1240 	case BPF_PROG_TYPE_XDP:
1241 	case BPF_PROG_TYPE_LWT_XMIT:
1242 	case BPF_PROG_TYPE_SK_SKB:
1243 		if (meta)
1244 			return meta->pkt_access;
1245 
1246 		env->seen_direct_write = true;
1247 		return true;
1248 	default:
1249 		return false;
1250 	}
1251 }
1252 
1253 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1254 				 int off, int size, bool zero_size_allowed)
1255 {
1256 	struct bpf_reg_state *regs = cur_regs(env);
1257 	struct bpf_reg_state *reg = &regs[regno];
1258 
1259 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1260 	    (u64)off + size > reg->range) {
1261 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1262 			off, size, regno, reg->id, reg->off, reg->range);
1263 		return -EACCES;
1264 	}
1265 	return 0;
1266 }
1267 
1268 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1269 			       int size, bool zero_size_allowed)
1270 {
1271 	struct bpf_reg_state *regs = cur_regs(env);
1272 	struct bpf_reg_state *reg = &regs[regno];
1273 	int err;
1274 
1275 	/* We may have added a variable offset to the packet pointer; but any
1276 	 * reg->range we have comes after that.  We are only checking the fixed
1277 	 * offset.
1278 	 */
1279 
1280 	/* We don't allow negative numbers, because we aren't tracking enough
1281 	 * detail to prove they're safe.
1282 	 */
1283 	if (reg->smin_value < 0) {
1284 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1285 			regno);
1286 		return -EACCES;
1287 	}
1288 	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1289 	if (err) {
1290 		verbose(env, "R%d offset is outside of the packet\n", regno);
1291 		return err;
1292 	}
1293 	return err;
1294 }
1295 
1296 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
1297 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1298 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
1299 {
1300 	struct bpf_insn_access_aux info = {
1301 		.reg_type = *reg_type,
1302 	};
1303 
1304 	if (env->ops->is_valid_access &&
1305 	    env->ops->is_valid_access(off, size, t, &info)) {
1306 		/* A non zero info.ctx_field_size indicates that this field is a
1307 		 * candidate for later verifier transformation to load the whole
1308 		 * field and then apply a mask when accessed with a narrower
1309 		 * access than actual ctx access size. A zero info.ctx_field_size
1310 		 * will only allow for whole field access and rejects any other
1311 		 * type of narrower access.
1312 		 */
1313 		*reg_type = info.reg_type;
1314 
1315 		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1316 		/* remember the offset of last byte accessed in ctx */
1317 		if (env->prog->aux->max_ctx_offset < off + size)
1318 			env->prog->aux->max_ctx_offset = off + size;
1319 		return 0;
1320 	}
1321 
1322 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1323 	return -EACCES;
1324 }
1325 
1326 static bool __is_pointer_value(bool allow_ptr_leaks,
1327 			       const struct bpf_reg_state *reg)
1328 {
1329 	if (allow_ptr_leaks)
1330 		return false;
1331 
1332 	return reg->type != SCALAR_VALUE;
1333 }
1334 
1335 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1336 {
1337 	return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
1338 }
1339 
1340 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1341 				   const struct bpf_reg_state *reg,
1342 				   int off, int size, bool strict)
1343 {
1344 	struct tnum reg_off;
1345 	int ip_align;
1346 
1347 	/* Byte size accesses are always allowed. */
1348 	if (!strict || size == 1)
1349 		return 0;
1350 
1351 	/* For platforms that do not have a Kconfig enabling
1352 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1353 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
1354 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1355 	 * to this code only in strict mode where we want to emulate
1356 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
1357 	 * unconditional IP align value of '2'.
1358 	 */
1359 	ip_align = 2;
1360 
1361 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1362 	if (!tnum_is_aligned(reg_off, size)) {
1363 		char tn_buf[48];
1364 
1365 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1366 		verbose(env,
1367 			"misaligned packet access off %d+%s+%d+%d size %d\n",
1368 			ip_align, tn_buf, reg->off, off, size);
1369 		return -EACCES;
1370 	}
1371 
1372 	return 0;
1373 }
1374 
1375 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1376 				       const struct bpf_reg_state *reg,
1377 				       const char *pointer_desc,
1378 				       int off, int size, bool strict)
1379 {
1380 	struct tnum reg_off;
1381 
1382 	/* Byte size accesses are always allowed. */
1383 	if (!strict || size == 1)
1384 		return 0;
1385 
1386 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1387 	if (!tnum_is_aligned(reg_off, size)) {
1388 		char tn_buf[48];
1389 
1390 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1391 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1392 			pointer_desc, tn_buf, reg->off, off, size);
1393 		return -EACCES;
1394 	}
1395 
1396 	return 0;
1397 }
1398 
1399 static int check_ptr_alignment(struct bpf_verifier_env *env,
1400 			       const struct bpf_reg_state *reg,
1401 			       int off, int size)
1402 {
1403 	bool strict = env->strict_alignment;
1404 	const char *pointer_desc = "";
1405 
1406 	switch (reg->type) {
1407 	case PTR_TO_PACKET:
1408 	case PTR_TO_PACKET_META:
1409 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
1410 		 * right in front, treat it the very same way.
1411 		 */
1412 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
1413 	case PTR_TO_MAP_VALUE:
1414 		pointer_desc = "value ";
1415 		break;
1416 	case PTR_TO_CTX:
1417 		pointer_desc = "context ";
1418 		break;
1419 	case PTR_TO_STACK:
1420 		pointer_desc = "stack ";
1421 		/* The stack spill tracking logic in check_stack_write()
1422 		 * and check_stack_read() relies on stack accesses being
1423 		 * aligned.
1424 		 */
1425 		strict = true;
1426 		break;
1427 	default:
1428 		break;
1429 	}
1430 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1431 					   strict);
1432 }
1433 
1434 static int update_stack_depth(struct bpf_verifier_env *env,
1435 			      const struct bpf_func_state *func,
1436 			      int off)
1437 {
1438 	u16 stack = env->subprog_stack_depth[func->subprogno];
1439 
1440 	if (stack >= -off)
1441 		return 0;
1442 
1443 	/* update known max for given subprogram */
1444 	env->subprog_stack_depth[func->subprogno] = -off;
1445 	return 0;
1446 }
1447 
1448 /* starting from main bpf function walk all instructions of the function
1449  * and recursively walk all callees that given function can call.
1450  * Ignore jump and exit insns.
1451  * Since recursion is prevented by check_cfg() this algorithm
1452  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1453  */
1454 static int check_max_stack_depth(struct bpf_verifier_env *env)
1455 {
1456 	int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
1457 	struct bpf_insn *insn = env->prog->insnsi;
1458 	int insn_cnt = env->prog->len;
1459 	int ret_insn[MAX_CALL_FRAMES];
1460 	int ret_prog[MAX_CALL_FRAMES];
1461 
1462 process_func:
1463 	/* round up to 32-bytes, since this is granularity
1464 	 * of interpreter stack size
1465 	 */
1466 	depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1467 	if (depth > MAX_BPF_STACK) {
1468 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
1469 			frame + 1, depth);
1470 		return -EACCES;
1471 	}
1472 continue_func:
1473 	if (env->subprog_cnt == subprog)
1474 		subprog_end = insn_cnt;
1475 	else
1476 		subprog_end = env->subprog_starts[subprog];
1477 	for (; i < subprog_end; i++) {
1478 		if (insn[i].code != (BPF_JMP | BPF_CALL))
1479 			continue;
1480 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1481 			continue;
1482 		/* remember insn and function to return to */
1483 		ret_insn[frame] = i + 1;
1484 		ret_prog[frame] = subprog;
1485 
1486 		/* find the callee */
1487 		i = i + insn[i].imm + 1;
1488 		subprog = find_subprog(env, i);
1489 		if (subprog < 0) {
1490 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1491 				  i);
1492 			return -EFAULT;
1493 		}
1494 		subprog++;
1495 		frame++;
1496 		if (frame >= MAX_CALL_FRAMES) {
1497 			WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1498 			return -EFAULT;
1499 		}
1500 		goto process_func;
1501 	}
1502 	/* end of for() loop means the last insn of the 'subprog'
1503 	 * was reached. Doesn't matter whether it was JA or EXIT
1504 	 */
1505 	if (frame == 0)
1506 		return 0;
1507 	depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1508 	frame--;
1509 	i = ret_insn[frame];
1510 	subprog = ret_prog[frame];
1511 	goto continue_func;
1512 }
1513 
1514 static int get_callee_stack_depth(struct bpf_verifier_env *env,
1515 				  const struct bpf_insn *insn, int idx)
1516 {
1517 	int start = idx + insn->imm + 1, subprog;
1518 
1519 	subprog = find_subprog(env, start);
1520 	if (subprog < 0) {
1521 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1522 			  start);
1523 		return -EFAULT;
1524 	}
1525 	subprog++;
1526 	return env->subprog_stack_depth[subprog];
1527 }
1528 
1529 /* truncate register to smaller size (in bytes)
1530  * must be called with size < BPF_REG_SIZE
1531  */
1532 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1533 {
1534 	u64 mask;
1535 
1536 	/* clear high bits in bit representation */
1537 	reg->var_off = tnum_cast(reg->var_off, size);
1538 
1539 	/* fix arithmetic bounds */
1540 	mask = ((u64)1 << (size * 8)) - 1;
1541 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1542 		reg->umin_value &= mask;
1543 		reg->umax_value &= mask;
1544 	} else {
1545 		reg->umin_value = 0;
1546 		reg->umax_value = mask;
1547 	}
1548 	reg->smin_value = reg->umin_value;
1549 	reg->smax_value = reg->umax_value;
1550 }
1551 
1552 /* check whether memory at (regno + off) is accessible for t = (read | write)
1553  * if t==write, value_regno is a register which value is stored into memory
1554  * if t==read, value_regno is a register which will receive the value from memory
1555  * if t==write && value_regno==-1, some unknown value is stored into memory
1556  * if t==read && value_regno==-1, don't care what we read from memory
1557  */
1558 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
1559 			    int bpf_size, enum bpf_access_type t,
1560 			    int value_regno)
1561 {
1562 	struct bpf_reg_state *regs = cur_regs(env);
1563 	struct bpf_reg_state *reg = regs + regno;
1564 	struct bpf_func_state *state;
1565 	int size, err = 0;
1566 
1567 	size = bpf_size_to_bytes(bpf_size);
1568 	if (size < 0)
1569 		return size;
1570 
1571 	/* alignment checks will add in reg->off themselves */
1572 	err = check_ptr_alignment(env, reg, off, size);
1573 	if (err)
1574 		return err;
1575 
1576 	/* for access checks, reg->off is just part of off */
1577 	off += reg->off;
1578 
1579 	if (reg->type == PTR_TO_MAP_VALUE) {
1580 		if (t == BPF_WRITE && value_regno >= 0 &&
1581 		    is_pointer_value(env, value_regno)) {
1582 			verbose(env, "R%d leaks addr into map\n", value_regno);
1583 			return -EACCES;
1584 		}
1585 
1586 		err = check_map_access(env, regno, off, size, false);
1587 		if (!err && t == BPF_READ && value_regno >= 0)
1588 			mark_reg_unknown(env, regs, value_regno);
1589 
1590 	} else if (reg->type == PTR_TO_CTX) {
1591 		enum bpf_reg_type reg_type = SCALAR_VALUE;
1592 
1593 		if (t == BPF_WRITE && value_regno >= 0 &&
1594 		    is_pointer_value(env, value_regno)) {
1595 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
1596 			return -EACCES;
1597 		}
1598 		/* ctx accesses must be at a fixed offset, so that we can
1599 		 * determine what type of data were returned.
1600 		 */
1601 		if (reg->off) {
1602 			verbose(env,
1603 				"dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1604 				regno, reg->off, off - reg->off);
1605 			return -EACCES;
1606 		}
1607 		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1608 			char tn_buf[48];
1609 
1610 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1611 			verbose(env,
1612 				"variable ctx access var_off=%s off=%d size=%d",
1613 				tn_buf, off, size);
1614 			return -EACCES;
1615 		}
1616 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1617 		if (!err && t == BPF_READ && value_regno >= 0) {
1618 			/* ctx access returns either a scalar, or a
1619 			 * PTR_TO_PACKET[_META,_END]. In the latter
1620 			 * case, we know the offset is zero.
1621 			 */
1622 			if (reg_type == SCALAR_VALUE)
1623 				mark_reg_unknown(env, regs, value_regno);
1624 			else
1625 				mark_reg_known_zero(env, regs,
1626 						    value_regno);
1627 			regs[value_regno].id = 0;
1628 			regs[value_regno].off = 0;
1629 			regs[value_regno].range = 0;
1630 			regs[value_regno].type = reg_type;
1631 		}
1632 
1633 	} else if (reg->type == PTR_TO_STACK) {
1634 		/* stack accesses must be at a fixed offset, so that we can
1635 		 * determine what type of data were returned.
1636 		 * See check_stack_read().
1637 		 */
1638 		if (!tnum_is_const(reg->var_off)) {
1639 			char tn_buf[48];
1640 
1641 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1642 			verbose(env, "variable stack access var_off=%s off=%d size=%d",
1643 				tn_buf, off, size);
1644 			return -EACCES;
1645 		}
1646 		off += reg->var_off.value;
1647 		if (off >= 0 || off < -MAX_BPF_STACK) {
1648 			verbose(env, "invalid stack off=%d size=%d\n", off,
1649 				size);
1650 			return -EACCES;
1651 		}
1652 
1653 		state = func(env, reg);
1654 		err = update_stack_depth(env, state, off);
1655 		if (err)
1656 			return err;
1657 
1658 		if (t == BPF_WRITE)
1659 			err = check_stack_write(env, state, off, size,
1660 						value_regno);
1661 		else
1662 			err = check_stack_read(env, state, off, size,
1663 					       value_regno);
1664 	} else if (reg_is_pkt_pointer(reg)) {
1665 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
1666 			verbose(env, "cannot write into packet\n");
1667 			return -EACCES;
1668 		}
1669 		if (t == BPF_WRITE && value_regno >= 0 &&
1670 		    is_pointer_value(env, value_regno)) {
1671 			verbose(env, "R%d leaks addr into packet\n",
1672 				value_regno);
1673 			return -EACCES;
1674 		}
1675 		err = check_packet_access(env, regno, off, size, false);
1676 		if (!err && t == BPF_READ && value_regno >= 0)
1677 			mark_reg_unknown(env, regs, value_regno);
1678 	} else {
1679 		verbose(env, "R%d invalid mem access '%s'\n", regno,
1680 			reg_type_str[reg->type]);
1681 		return -EACCES;
1682 	}
1683 
1684 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1685 	    regs[value_regno].type == SCALAR_VALUE) {
1686 		/* b/h/w load zero-extends, mark upper bits as known 0 */
1687 		coerce_reg_to_size(&regs[value_regno], size);
1688 	}
1689 	return err;
1690 }
1691 
1692 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
1693 {
1694 	int err;
1695 
1696 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1697 	    insn->imm != 0) {
1698 		verbose(env, "BPF_XADD uses reserved fields\n");
1699 		return -EINVAL;
1700 	}
1701 
1702 	/* check src1 operand */
1703 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
1704 	if (err)
1705 		return err;
1706 
1707 	/* check src2 operand */
1708 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
1709 	if (err)
1710 		return err;
1711 
1712 	if (is_pointer_value(env, insn->src_reg)) {
1713 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
1714 		return -EACCES;
1715 	}
1716 
1717 	/* check whether atomic_add can read the memory */
1718 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1719 			       BPF_SIZE(insn->code), BPF_READ, -1);
1720 	if (err)
1721 		return err;
1722 
1723 	/* check whether atomic_add can write into the same memory */
1724 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1725 				BPF_SIZE(insn->code), BPF_WRITE, -1);
1726 }
1727 
1728 /* when register 'regno' is passed into function that will read 'access_size'
1729  * bytes from that pointer, make sure that it's within stack boundary
1730  * and all elements of stack are initialized.
1731  * Unlike most pointer bounds-checking functions, this one doesn't take an
1732  * 'off' argument, so it has to add in reg->off itself.
1733  */
1734 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1735 				int access_size, bool zero_size_allowed,
1736 				struct bpf_call_arg_meta *meta)
1737 {
1738 	struct bpf_reg_state *reg = cur_regs(env) + regno;
1739 	struct bpf_func_state *state = func(env, reg);
1740 	int off, i, slot, spi;
1741 
1742 	if (reg->type != PTR_TO_STACK) {
1743 		/* Allow zero-byte read from NULL, regardless of pointer type */
1744 		if (zero_size_allowed && access_size == 0 &&
1745 		    register_is_null(reg))
1746 			return 0;
1747 
1748 		verbose(env, "R%d type=%s expected=%s\n", regno,
1749 			reg_type_str[reg->type],
1750 			reg_type_str[PTR_TO_STACK]);
1751 		return -EACCES;
1752 	}
1753 
1754 	/* Only allow fixed-offset stack reads */
1755 	if (!tnum_is_const(reg->var_off)) {
1756 		char tn_buf[48];
1757 
1758 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1759 		verbose(env, "invalid variable stack read R%d var_off=%s\n",
1760 			regno, tn_buf);
1761 		return -EACCES;
1762 	}
1763 	off = reg->off + reg->var_off.value;
1764 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1765 	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1766 		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1767 			regno, off, access_size);
1768 		return -EACCES;
1769 	}
1770 
1771 	if (meta && meta->raw_mode) {
1772 		meta->access_size = access_size;
1773 		meta->regno = regno;
1774 		return 0;
1775 	}
1776 
1777 	for (i = 0; i < access_size; i++) {
1778 		u8 *stype;
1779 
1780 		slot = -(off + i) - 1;
1781 		spi = slot / BPF_REG_SIZE;
1782 		if (state->allocated_stack <= slot)
1783 			goto err;
1784 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
1785 		if (*stype == STACK_MISC)
1786 			goto mark;
1787 		if (*stype == STACK_ZERO) {
1788 			/* helper can write anything into the stack */
1789 			*stype = STACK_MISC;
1790 			goto mark;
1791 		}
1792 err:
1793 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1794 			off, i, access_size);
1795 		return -EACCES;
1796 mark:
1797 		/* reading any byte out of 8-byte 'spill_slot' will cause
1798 		 * the whole slot to be marked as 'read'
1799 		 */
1800 		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
1801 				     spi, state->frameno);
1802 	}
1803 	return update_stack_depth(env, state, off);
1804 }
1805 
1806 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1807 				   int access_size, bool zero_size_allowed,
1808 				   struct bpf_call_arg_meta *meta)
1809 {
1810 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1811 
1812 	switch (reg->type) {
1813 	case PTR_TO_PACKET:
1814 	case PTR_TO_PACKET_META:
1815 		return check_packet_access(env, regno, reg->off, access_size,
1816 					   zero_size_allowed);
1817 	case PTR_TO_MAP_VALUE:
1818 		return check_map_access(env, regno, reg->off, access_size,
1819 					zero_size_allowed);
1820 	default: /* scalar_value|ptr_to_stack or invalid ptr */
1821 		return check_stack_boundary(env, regno, access_size,
1822 					    zero_size_allowed, meta);
1823 	}
1824 }
1825 
1826 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1827 			  enum bpf_arg_type arg_type,
1828 			  struct bpf_call_arg_meta *meta)
1829 {
1830 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1831 	enum bpf_reg_type expected_type, type = reg->type;
1832 	int err = 0;
1833 
1834 	if (arg_type == ARG_DONTCARE)
1835 		return 0;
1836 
1837 	err = check_reg_arg(env, regno, SRC_OP);
1838 	if (err)
1839 		return err;
1840 
1841 	if (arg_type == ARG_ANYTHING) {
1842 		if (is_pointer_value(env, regno)) {
1843 			verbose(env, "R%d leaks addr into helper function\n",
1844 				regno);
1845 			return -EACCES;
1846 		}
1847 		return 0;
1848 	}
1849 
1850 	if (type_is_pkt_pointer(type) &&
1851 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1852 		verbose(env, "helper access to the packet is not allowed\n");
1853 		return -EACCES;
1854 	}
1855 
1856 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
1857 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
1858 		expected_type = PTR_TO_STACK;
1859 		if (!type_is_pkt_pointer(type) &&
1860 		    type != expected_type)
1861 			goto err_type;
1862 	} else if (arg_type == ARG_CONST_SIZE ||
1863 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
1864 		expected_type = SCALAR_VALUE;
1865 		if (type != expected_type)
1866 			goto err_type;
1867 	} else if (arg_type == ARG_CONST_MAP_PTR) {
1868 		expected_type = CONST_PTR_TO_MAP;
1869 		if (type != expected_type)
1870 			goto err_type;
1871 	} else if (arg_type == ARG_PTR_TO_CTX) {
1872 		expected_type = PTR_TO_CTX;
1873 		if (type != expected_type)
1874 			goto err_type;
1875 	} else if (arg_type == ARG_PTR_TO_MEM ||
1876 		   arg_type == ARG_PTR_TO_MEM_OR_NULL ||
1877 		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
1878 		expected_type = PTR_TO_STACK;
1879 		/* One exception here. In case function allows for NULL to be
1880 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
1881 		 * happens during stack boundary checking.
1882 		 */
1883 		if (register_is_null(reg) &&
1884 		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
1885 			/* final test in check_stack_boundary() */;
1886 		else if (!type_is_pkt_pointer(type) &&
1887 			 type != PTR_TO_MAP_VALUE &&
1888 			 type != expected_type)
1889 			goto err_type;
1890 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1891 	} else {
1892 		verbose(env, "unsupported arg_type %d\n", arg_type);
1893 		return -EFAULT;
1894 	}
1895 
1896 	if (arg_type == ARG_CONST_MAP_PTR) {
1897 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1898 		meta->map_ptr = reg->map_ptr;
1899 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1900 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
1901 		 * check that [key, key + map->key_size) are within
1902 		 * stack limits and initialized
1903 		 */
1904 		if (!meta->map_ptr) {
1905 			/* in function declaration map_ptr must come before
1906 			 * map_key, so that it's verified and known before
1907 			 * we have to check map_key here. Otherwise it means
1908 			 * that kernel subsystem misconfigured verifier
1909 			 */
1910 			verbose(env, "invalid map_ptr to access map->key\n");
1911 			return -EACCES;
1912 		}
1913 		if (type_is_pkt_pointer(type))
1914 			err = check_packet_access(env, regno, reg->off,
1915 						  meta->map_ptr->key_size,
1916 						  false);
1917 		else
1918 			err = check_stack_boundary(env, regno,
1919 						   meta->map_ptr->key_size,
1920 						   false, NULL);
1921 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1922 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
1923 		 * check [value, value + map->value_size) validity
1924 		 */
1925 		if (!meta->map_ptr) {
1926 			/* kernel subsystem misconfigured verifier */
1927 			verbose(env, "invalid map_ptr to access map->value\n");
1928 			return -EACCES;
1929 		}
1930 		if (type_is_pkt_pointer(type))
1931 			err = check_packet_access(env, regno, reg->off,
1932 						  meta->map_ptr->value_size,
1933 						  false);
1934 		else
1935 			err = check_stack_boundary(env, regno,
1936 						   meta->map_ptr->value_size,
1937 						   false, NULL);
1938 	} else if (arg_type == ARG_CONST_SIZE ||
1939 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
1940 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1941 
1942 		/* bpf_xxx(..., buf, len) call will access 'len' bytes
1943 		 * from stack pointer 'buf'. Check it
1944 		 * note: regno == len, regno - 1 == buf
1945 		 */
1946 		if (regno == 0) {
1947 			/* kernel subsystem misconfigured verifier */
1948 			verbose(env,
1949 				"ARG_CONST_SIZE cannot be first argument\n");
1950 			return -EACCES;
1951 		}
1952 
1953 		/* The register is SCALAR_VALUE; the access check
1954 		 * happens using its boundaries.
1955 		 */
1956 
1957 		if (!tnum_is_const(reg->var_off))
1958 			/* For unprivileged variable accesses, disable raw
1959 			 * mode so that the program is required to
1960 			 * initialize all the memory that the helper could
1961 			 * just partially fill up.
1962 			 */
1963 			meta = NULL;
1964 
1965 		if (reg->smin_value < 0) {
1966 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
1967 				regno);
1968 			return -EACCES;
1969 		}
1970 
1971 		if (reg->umin_value == 0) {
1972 			err = check_helper_mem_access(env, regno - 1, 0,
1973 						      zero_size_allowed,
1974 						      meta);
1975 			if (err)
1976 				return err;
1977 		}
1978 
1979 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
1980 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
1981 				regno);
1982 			return -EACCES;
1983 		}
1984 		err = check_helper_mem_access(env, regno - 1,
1985 					      reg->umax_value,
1986 					      zero_size_allowed, meta);
1987 	}
1988 
1989 	return err;
1990 err_type:
1991 	verbose(env, "R%d type=%s expected=%s\n", regno,
1992 		reg_type_str[type], reg_type_str[expected_type]);
1993 	return -EACCES;
1994 }
1995 
1996 static int check_map_func_compatibility(struct bpf_verifier_env *env,
1997 					struct bpf_map *map, int func_id)
1998 {
1999 	if (!map)
2000 		return 0;
2001 
2002 	/* We need a two way check, first is from map perspective ... */
2003 	switch (map->map_type) {
2004 	case BPF_MAP_TYPE_PROG_ARRAY:
2005 		if (func_id != BPF_FUNC_tail_call)
2006 			goto error;
2007 		break;
2008 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2009 		if (func_id != BPF_FUNC_perf_event_read &&
2010 		    func_id != BPF_FUNC_perf_event_output &&
2011 		    func_id != BPF_FUNC_perf_event_read_value)
2012 			goto error;
2013 		break;
2014 	case BPF_MAP_TYPE_STACK_TRACE:
2015 		if (func_id != BPF_FUNC_get_stackid)
2016 			goto error;
2017 		break;
2018 	case BPF_MAP_TYPE_CGROUP_ARRAY:
2019 		if (func_id != BPF_FUNC_skb_under_cgroup &&
2020 		    func_id != BPF_FUNC_current_task_under_cgroup)
2021 			goto error;
2022 		break;
2023 	/* devmap returns a pointer to a live net_device ifindex that we cannot
2024 	 * allow to be modified from bpf side. So do not allow lookup elements
2025 	 * for now.
2026 	 */
2027 	case BPF_MAP_TYPE_DEVMAP:
2028 		if (func_id != BPF_FUNC_redirect_map)
2029 			goto error;
2030 		break;
2031 	/* Restrict bpf side of cpumap, open when use-cases appear */
2032 	case BPF_MAP_TYPE_CPUMAP:
2033 		if (func_id != BPF_FUNC_redirect_map)
2034 			goto error;
2035 		break;
2036 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2037 	case BPF_MAP_TYPE_HASH_OF_MAPS:
2038 		if (func_id != BPF_FUNC_map_lookup_elem)
2039 			goto error;
2040 		break;
2041 	case BPF_MAP_TYPE_SOCKMAP:
2042 		if (func_id != BPF_FUNC_sk_redirect_map &&
2043 		    func_id != BPF_FUNC_sock_map_update &&
2044 		    func_id != BPF_FUNC_map_delete_elem)
2045 			goto error;
2046 		break;
2047 	default:
2048 		break;
2049 	}
2050 
2051 	/* ... and second from the function itself. */
2052 	switch (func_id) {
2053 	case BPF_FUNC_tail_call:
2054 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2055 			goto error;
2056 		if (env->subprog_cnt) {
2057 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2058 			return -EINVAL;
2059 		}
2060 		break;
2061 	case BPF_FUNC_perf_event_read:
2062 	case BPF_FUNC_perf_event_output:
2063 	case BPF_FUNC_perf_event_read_value:
2064 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2065 			goto error;
2066 		break;
2067 	case BPF_FUNC_get_stackid:
2068 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2069 			goto error;
2070 		break;
2071 	case BPF_FUNC_current_task_under_cgroup:
2072 	case BPF_FUNC_skb_under_cgroup:
2073 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2074 			goto error;
2075 		break;
2076 	case BPF_FUNC_redirect_map:
2077 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2078 		    map->map_type != BPF_MAP_TYPE_CPUMAP)
2079 			goto error;
2080 		break;
2081 	case BPF_FUNC_sk_redirect_map:
2082 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2083 			goto error;
2084 		break;
2085 	case BPF_FUNC_sock_map_update:
2086 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2087 			goto error;
2088 		break;
2089 	default:
2090 		break;
2091 	}
2092 
2093 	return 0;
2094 error:
2095 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
2096 		map->map_type, func_id_name(func_id), func_id);
2097 	return -EINVAL;
2098 }
2099 
2100 static int check_raw_mode(const struct bpf_func_proto *fn)
2101 {
2102 	int count = 0;
2103 
2104 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2105 		count++;
2106 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2107 		count++;
2108 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2109 		count++;
2110 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2111 		count++;
2112 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2113 		count++;
2114 
2115 	return count > 1 ? -EINVAL : 0;
2116 }
2117 
2118 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2119  * are now invalid, so turn them into unknown SCALAR_VALUE.
2120  */
2121 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2122 				     struct bpf_func_state *state)
2123 {
2124 	struct bpf_reg_state *regs = state->regs, *reg;
2125 	int i;
2126 
2127 	for (i = 0; i < MAX_BPF_REG; i++)
2128 		if (reg_is_pkt_pointer_any(&regs[i]))
2129 			mark_reg_unknown(env, regs, i);
2130 
2131 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2132 		if (state->stack[i].slot_type[0] != STACK_SPILL)
2133 			continue;
2134 		reg = &state->stack[i].spilled_ptr;
2135 		if (reg_is_pkt_pointer_any(reg))
2136 			__mark_reg_unknown(reg);
2137 	}
2138 }
2139 
2140 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2141 {
2142 	struct bpf_verifier_state *vstate = env->cur_state;
2143 	int i;
2144 
2145 	for (i = 0; i <= vstate->curframe; i++)
2146 		__clear_all_pkt_pointers(env, vstate->frame[i]);
2147 }
2148 
2149 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2150 			   int *insn_idx)
2151 {
2152 	struct bpf_verifier_state *state = env->cur_state;
2153 	struct bpf_func_state *caller, *callee;
2154 	int i, subprog, target_insn;
2155 
2156 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
2157 		verbose(env, "the call stack of %d frames is too deep\n",
2158 			state->curframe + 2);
2159 		return -E2BIG;
2160 	}
2161 
2162 	target_insn = *insn_idx + insn->imm;
2163 	subprog = find_subprog(env, target_insn + 1);
2164 	if (subprog < 0) {
2165 		verbose(env, "verifier bug. No program starts at insn %d\n",
2166 			target_insn + 1);
2167 		return -EFAULT;
2168 	}
2169 
2170 	caller = state->frame[state->curframe];
2171 	if (state->frame[state->curframe + 1]) {
2172 		verbose(env, "verifier bug. Frame %d already allocated\n",
2173 			state->curframe + 1);
2174 		return -EFAULT;
2175 	}
2176 
2177 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2178 	if (!callee)
2179 		return -ENOMEM;
2180 	state->frame[state->curframe + 1] = callee;
2181 
2182 	/* callee cannot access r0, r6 - r9 for reading and has to write
2183 	 * into its own stack before reading from it.
2184 	 * callee can read/write into caller's stack
2185 	 */
2186 	init_func_state(env, callee,
2187 			/* remember the callsite, it will be used by bpf_exit */
2188 			*insn_idx /* callsite */,
2189 			state->curframe + 1 /* frameno within this callchain */,
2190 			subprog + 1 /* subprog number within this prog */);
2191 
2192 	/* copy r1 - r5 args that callee can access */
2193 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2194 		callee->regs[i] = caller->regs[i];
2195 
2196 	/* after the call regsiters r0 - r5 were scratched */
2197 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
2198 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
2199 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2200 	}
2201 
2202 	/* only increment it after check_reg_arg() finished */
2203 	state->curframe++;
2204 
2205 	/* and go analyze first insn of the callee */
2206 	*insn_idx = target_insn;
2207 
2208 	if (env->log.level) {
2209 		verbose(env, "caller:\n");
2210 		print_verifier_state(env, caller);
2211 		verbose(env, "callee:\n");
2212 		print_verifier_state(env, callee);
2213 	}
2214 	return 0;
2215 }
2216 
2217 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2218 {
2219 	struct bpf_verifier_state *state = env->cur_state;
2220 	struct bpf_func_state *caller, *callee;
2221 	struct bpf_reg_state *r0;
2222 
2223 	callee = state->frame[state->curframe];
2224 	r0 = &callee->regs[BPF_REG_0];
2225 	if (r0->type == PTR_TO_STACK) {
2226 		/* technically it's ok to return caller's stack pointer
2227 		 * (or caller's caller's pointer) back to the caller,
2228 		 * since these pointers are valid. Only current stack
2229 		 * pointer will be invalid as soon as function exits,
2230 		 * but let's be conservative
2231 		 */
2232 		verbose(env, "cannot return stack pointer to the caller\n");
2233 		return -EINVAL;
2234 	}
2235 
2236 	state->curframe--;
2237 	caller = state->frame[state->curframe];
2238 	/* return to the caller whatever r0 had in the callee */
2239 	caller->regs[BPF_REG_0] = *r0;
2240 
2241 	*insn_idx = callee->callsite + 1;
2242 	if (env->log.level) {
2243 		verbose(env, "returning from callee:\n");
2244 		print_verifier_state(env, callee);
2245 		verbose(env, "to caller at %d:\n", *insn_idx);
2246 		print_verifier_state(env, caller);
2247 	}
2248 	/* clear everything in the callee */
2249 	free_func_state(callee);
2250 	state->frame[state->curframe + 1] = NULL;
2251 	return 0;
2252 }
2253 
2254 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
2255 {
2256 	const struct bpf_func_proto *fn = NULL;
2257 	struct bpf_reg_state *regs;
2258 	struct bpf_call_arg_meta meta;
2259 	bool changes_data;
2260 	int i, err;
2261 
2262 	/* find function prototype */
2263 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
2264 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2265 			func_id);
2266 		return -EINVAL;
2267 	}
2268 
2269 	if (env->ops->get_func_proto)
2270 		fn = env->ops->get_func_proto(func_id);
2271 
2272 	if (!fn) {
2273 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2274 			func_id);
2275 		return -EINVAL;
2276 	}
2277 
2278 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
2279 	if (!env->prog->gpl_compatible && fn->gpl_only) {
2280 		verbose(env, "cannot call GPL only function from proprietary program\n");
2281 		return -EINVAL;
2282 	}
2283 
2284 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
2285 	changes_data = bpf_helper_changes_pkt_data(fn->func);
2286 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2287 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2288 			func_id_name(func_id), func_id);
2289 		return -EINVAL;
2290 	}
2291 
2292 	memset(&meta, 0, sizeof(meta));
2293 	meta.pkt_access = fn->pkt_access;
2294 
2295 	/* We only support one arg being in raw mode at the moment, which
2296 	 * is sufficient for the helper functions we have right now.
2297 	 */
2298 	err = check_raw_mode(fn);
2299 	if (err) {
2300 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
2301 			func_id_name(func_id), func_id);
2302 		return err;
2303 	}
2304 
2305 	/* check args */
2306 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2307 	if (err)
2308 		return err;
2309 	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
2310 	if (err)
2311 		return err;
2312 	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
2313 	if (err)
2314 		return err;
2315 	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
2316 	if (err)
2317 		return err;
2318 	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
2319 	if (err)
2320 		return err;
2321 
2322 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
2323 	 * is inferred from register state.
2324 	 */
2325 	for (i = 0; i < meta.access_size; i++) {
2326 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
2327 		if (err)
2328 			return err;
2329 	}
2330 
2331 	regs = cur_regs(env);
2332 	/* reset caller saved regs */
2333 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
2334 		mark_reg_not_init(env, regs, caller_saved[i]);
2335 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2336 	}
2337 
2338 	/* update return register (already marked as written above) */
2339 	if (fn->ret_type == RET_INTEGER) {
2340 		/* sets type to SCALAR_VALUE */
2341 		mark_reg_unknown(env, regs, BPF_REG_0);
2342 	} else if (fn->ret_type == RET_VOID) {
2343 		regs[BPF_REG_0].type = NOT_INIT;
2344 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
2345 		struct bpf_insn_aux_data *insn_aux;
2346 
2347 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2348 		/* There is no offset yet applied, variable or fixed */
2349 		mark_reg_known_zero(env, regs, BPF_REG_0);
2350 		regs[BPF_REG_0].off = 0;
2351 		/* remember map_ptr, so that check_map_access()
2352 		 * can check 'value_size' boundary of memory access
2353 		 * to map element returned from bpf_map_lookup_elem()
2354 		 */
2355 		if (meta.map_ptr == NULL) {
2356 			verbose(env,
2357 				"kernel subsystem misconfigured verifier\n");
2358 			return -EINVAL;
2359 		}
2360 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
2361 		regs[BPF_REG_0].id = ++env->id_gen;
2362 		insn_aux = &env->insn_aux_data[insn_idx];
2363 		if (!insn_aux->map_ptr)
2364 			insn_aux->map_ptr = meta.map_ptr;
2365 		else if (insn_aux->map_ptr != meta.map_ptr)
2366 			insn_aux->map_ptr = BPF_MAP_PTR_POISON;
2367 	} else {
2368 		verbose(env, "unknown return type %d of func %s#%d\n",
2369 			fn->ret_type, func_id_name(func_id), func_id);
2370 		return -EINVAL;
2371 	}
2372 
2373 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
2374 	if (err)
2375 		return err;
2376 
2377 	if (changes_data)
2378 		clear_all_pkt_pointers(env);
2379 	return 0;
2380 }
2381 
2382 static bool signed_add_overflows(s64 a, s64 b)
2383 {
2384 	/* Do the add in u64, where overflow is well-defined */
2385 	s64 res = (s64)((u64)a + (u64)b);
2386 
2387 	if (b < 0)
2388 		return res > a;
2389 	return res < a;
2390 }
2391 
2392 static bool signed_sub_overflows(s64 a, s64 b)
2393 {
2394 	/* Do the sub in u64, where overflow is well-defined */
2395 	s64 res = (s64)((u64)a - (u64)b);
2396 
2397 	if (b < 0)
2398 		return res < a;
2399 	return res > a;
2400 }
2401 
2402 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2403 				  const struct bpf_reg_state *reg,
2404 				  enum bpf_reg_type type)
2405 {
2406 	bool known = tnum_is_const(reg->var_off);
2407 	s64 val = reg->var_off.value;
2408 	s64 smin = reg->smin_value;
2409 
2410 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2411 		verbose(env, "math between %s pointer and %lld is not allowed\n",
2412 			reg_type_str[type], val);
2413 		return false;
2414 	}
2415 
2416 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2417 		verbose(env, "%s pointer offset %d is not allowed\n",
2418 			reg_type_str[type], reg->off);
2419 		return false;
2420 	}
2421 
2422 	if (smin == S64_MIN) {
2423 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2424 			reg_type_str[type]);
2425 		return false;
2426 	}
2427 
2428 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2429 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
2430 			smin, reg_type_str[type]);
2431 		return false;
2432 	}
2433 
2434 	return true;
2435 }
2436 
2437 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2438  * Caller should also handle BPF_MOV case separately.
2439  * If we return -EACCES, caller may want to try again treating pointer as a
2440  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
2441  */
2442 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2443 				   struct bpf_insn *insn,
2444 				   const struct bpf_reg_state *ptr_reg,
2445 				   const struct bpf_reg_state *off_reg)
2446 {
2447 	struct bpf_verifier_state *vstate = env->cur_state;
2448 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2449 	struct bpf_reg_state *regs = state->regs, *dst_reg;
2450 	bool known = tnum_is_const(off_reg->var_off);
2451 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2452 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2453 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
2454 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
2455 	u8 opcode = BPF_OP(insn->code);
2456 	u32 dst = insn->dst_reg;
2457 
2458 	dst_reg = &regs[dst];
2459 
2460 	if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
2461 		print_verifier_state(env, state);
2462 		verbose(env,
2463 			"verifier internal error: known but bad sbounds\n");
2464 		return -EINVAL;
2465 	}
2466 	if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
2467 		print_verifier_state(env, state);
2468 		verbose(env,
2469 			"verifier internal error: known but bad ubounds\n");
2470 		return -EINVAL;
2471 	}
2472 
2473 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
2474 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
2475 		verbose(env,
2476 			"R%d 32-bit pointer arithmetic prohibited\n",
2477 			dst);
2478 		return -EACCES;
2479 	}
2480 
2481 	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2482 		verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2483 			dst);
2484 		return -EACCES;
2485 	}
2486 	if (ptr_reg->type == CONST_PTR_TO_MAP) {
2487 		verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2488 			dst);
2489 		return -EACCES;
2490 	}
2491 	if (ptr_reg->type == PTR_TO_PACKET_END) {
2492 		verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2493 			dst);
2494 		return -EACCES;
2495 	}
2496 
2497 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2498 	 * The id may be overwritten later if we create a new variable offset.
2499 	 */
2500 	dst_reg->type = ptr_reg->type;
2501 	dst_reg->id = ptr_reg->id;
2502 
2503 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
2504 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
2505 		return -EINVAL;
2506 
2507 	switch (opcode) {
2508 	case BPF_ADD:
2509 		/* We can take a fixed offset as long as it doesn't overflow
2510 		 * the s32 'off' field
2511 		 */
2512 		if (known && (ptr_reg->off + smin_val ==
2513 			      (s64)(s32)(ptr_reg->off + smin_val))) {
2514 			/* pointer += K.  Accumulate it into fixed offset */
2515 			dst_reg->smin_value = smin_ptr;
2516 			dst_reg->smax_value = smax_ptr;
2517 			dst_reg->umin_value = umin_ptr;
2518 			dst_reg->umax_value = umax_ptr;
2519 			dst_reg->var_off = ptr_reg->var_off;
2520 			dst_reg->off = ptr_reg->off + smin_val;
2521 			dst_reg->range = ptr_reg->range;
2522 			break;
2523 		}
2524 		/* A new variable offset is created.  Note that off_reg->off
2525 		 * == 0, since it's a scalar.
2526 		 * dst_reg gets the pointer type and since some positive
2527 		 * integer value was added to the pointer, give it a new 'id'
2528 		 * if it's a PTR_TO_PACKET.
2529 		 * this creates a new 'base' pointer, off_reg (variable) gets
2530 		 * added into the variable offset, and we copy the fixed offset
2531 		 * from ptr_reg.
2532 		 */
2533 		if (signed_add_overflows(smin_ptr, smin_val) ||
2534 		    signed_add_overflows(smax_ptr, smax_val)) {
2535 			dst_reg->smin_value = S64_MIN;
2536 			dst_reg->smax_value = S64_MAX;
2537 		} else {
2538 			dst_reg->smin_value = smin_ptr + smin_val;
2539 			dst_reg->smax_value = smax_ptr + smax_val;
2540 		}
2541 		if (umin_ptr + umin_val < umin_ptr ||
2542 		    umax_ptr + umax_val < umax_ptr) {
2543 			dst_reg->umin_value = 0;
2544 			dst_reg->umax_value = U64_MAX;
2545 		} else {
2546 			dst_reg->umin_value = umin_ptr + umin_val;
2547 			dst_reg->umax_value = umax_ptr + umax_val;
2548 		}
2549 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
2550 		dst_reg->off = ptr_reg->off;
2551 		if (reg_is_pkt_pointer(ptr_reg)) {
2552 			dst_reg->id = ++env->id_gen;
2553 			/* something was added to pkt_ptr, set range to zero */
2554 			dst_reg->range = 0;
2555 		}
2556 		break;
2557 	case BPF_SUB:
2558 		if (dst_reg == off_reg) {
2559 			/* scalar -= pointer.  Creates an unknown scalar */
2560 			verbose(env, "R%d tried to subtract pointer from scalar\n",
2561 				dst);
2562 			return -EACCES;
2563 		}
2564 		/* We don't allow subtraction from FP, because (according to
2565 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
2566 		 * be able to deal with it.
2567 		 */
2568 		if (ptr_reg->type == PTR_TO_STACK) {
2569 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
2570 				dst);
2571 			return -EACCES;
2572 		}
2573 		if (known && (ptr_reg->off - smin_val ==
2574 			      (s64)(s32)(ptr_reg->off - smin_val))) {
2575 			/* pointer -= K.  Subtract it from fixed offset */
2576 			dst_reg->smin_value = smin_ptr;
2577 			dst_reg->smax_value = smax_ptr;
2578 			dst_reg->umin_value = umin_ptr;
2579 			dst_reg->umax_value = umax_ptr;
2580 			dst_reg->var_off = ptr_reg->var_off;
2581 			dst_reg->id = ptr_reg->id;
2582 			dst_reg->off = ptr_reg->off - smin_val;
2583 			dst_reg->range = ptr_reg->range;
2584 			break;
2585 		}
2586 		/* A new variable offset is created.  If the subtrahend is known
2587 		 * nonnegative, then any reg->range we had before is still good.
2588 		 */
2589 		if (signed_sub_overflows(smin_ptr, smax_val) ||
2590 		    signed_sub_overflows(smax_ptr, smin_val)) {
2591 			/* Overflow possible, we know nothing */
2592 			dst_reg->smin_value = S64_MIN;
2593 			dst_reg->smax_value = S64_MAX;
2594 		} else {
2595 			dst_reg->smin_value = smin_ptr - smax_val;
2596 			dst_reg->smax_value = smax_ptr - smin_val;
2597 		}
2598 		if (umin_ptr < umax_val) {
2599 			/* Overflow possible, we know nothing */
2600 			dst_reg->umin_value = 0;
2601 			dst_reg->umax_value = U64_MAX;
2602 		} else {
2603 			/* Cannot overflow (as long as bounds are consistent) */
2604 			dst_reg->umin_value = umin_ptr - umax_val;
2605 			dst_reg->umax_value = umax_ptr - umin_val;
2606 		}
2607 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
2608 		dst_reg->off = ptr_reg->off;
2609 		if (reg_is_pkt_pointer(ptr_reg)) {
2610 			dst_reg->id = ++env->id_gen;
2611 			/* something was added to pkt_ptr, set range to zero */
2612 			if (smin_val < 0)
2613 				dst_reg->range = 0;
2614 		}
2615 		break;
2616 	case BPF_AND:
2617 	case BPF_OR:
2618 	case BPF_XOR:
2619 		/* bitwise ops on pointers are troublesome, prohibit. */
2620 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2621 			dst, bpf_alu_string[opcode >> 4]);
2622 		return -EACCES;
2623 	default:
2624 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
2625 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2626 			dst, bpf_alu_string[opcode >> 4]);
2627 		return -EACCES;
2628 	}
2629 
2630 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2631 		return -EINVAL;
2632 
2633 	__update_reg_bounds(dst_reg);
2634 	__reg_deduce_bounds(dst_reg);
2635 	__reg_bound_offset(dst_reg);
2636 	return 0;
2637 }
2638 
2639 /* WARNING: This function does calculations on 64-bit values, but the actual
2640  * execution may occur on 32-bit values. Therefore, things like bitshifts
2641  * need extra checks in the 32-bit case.
2642  */
2643 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2644 				      struct bpf_insn *insn,
2645 				      struct bpf_reg_state *dst_reg,
2646 				      struct bpf_reg_state src_reg)
2647 {
2648 	struct bpf_reg_state *regs = cur_regs(env);
2649 	u8 opcode = BPF_OP(insn->code);
2650 	bool src_known, dst_known;
2651 	s64 smin_val, smax_val;
2652 	u64 umin_val, umax_val;
2653 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2654 
2655 	smin_val = src_reg.smin_value;
2656 	smax_val = src_reg.smax_value;
2657 	umin_val = src_reg.umin_value;
2658 	umax_val = src_reg.umax_value;
2659 	src_known = tnum_is_const(src_reg.var_off);
2660 	dst_known = tnum_is_const(dst_reg->var_off);
2661 
2662 	if (!src_known &&
2663 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2664 		__mark_reg_unknown(dst_reg);
2665 		return 0;
2666 	}
2667 
2668 	switch (opcode) {
2669 	case BPF_ADD:
2670 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
2671 		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
2672 			dst_reg->smin_value = S64_MIN;
2673 			dst_reg->smax_value = S64_MAX;
2674 		} else {
2675 			dst_reg->smin_value += smin_val;
2676 			dst_reg->smax_value += smax_val;
2677 		}
2678 		if (dst_reg->umin_value + umin_val < umin_val ||
2679 		    dst_reg->umax_value + umax_val < umax_val) {
2680 			dst_reg->umin_value = 0;
2681 			dst_reg->umax_value = U64_MAX;
2682 		} else {
2683 			dst_reg->umin_value += umin_val;
2684 			dst_reg->umax_value += umax_val;
2685 		}
2686 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
2687 		break;
2688 	case BPF_SUB:
2689 		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
2690 		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
2691 			/* Overflow possible, we know nothing */
2692 			dst_reg->smin_value = S64_MIN;
2693 			dst_reg->smax_value = S64_MAX;
2694 		} else {
2695 			dst_reg->smin_value -= smax_val;
2696 			dst_reg->smax_value -= smin_val;
2697 		}
2698 		if (dst_reg->umin_value < umax_val) {
2699 			/* Overflow possible, we know nothing */
2700 			dst_reg->umin_value = 0;
2701 			dst_reg->umax_value = U64_MAX;
2702 		} else {
2703 			/* Cannot overflow (as long as bounds are consistent) */
2704 			dst_reg->umin_value -= umax_val;
2705 			dst_reg->umax_value -= umin_val;
2706 		}
2707 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
2708 		break;
2709 	case BPF_MUL:
2710 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
2711 		if (smin_val < 0 || dst_reg->smin_value < 0) {
2712 			/* Ain't nobody got time to multiply that sign */
2713 			__mark_reg_unbounded(dst_reg);
2714 			__update_reg_bounds(dst_reg);
2715 			break;
2716 		}
2717 		/* Both values are positive, so we can work with unsigned and
2718 		 * copy the result to signed (unless it exceeds S64_MAX).
2719 		 */
2720 		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
2721 			/* Potential overflow, we know nothing */
2722 			__mark_reg_unbounded(dst_reg);
2723 			/* (except what we can learn from the var_off) */
2724 			__update_reg_bounds(dst_reg);
2725 			break;
2726 		}
2727 		dst_reg->umin_value *= umin_val;
2728 		dst_reg->umax_value *= umax_val;
2729 		if (dst_reg->umax_value > S64_MAX) {
2730 			/* Overflow possible, we know nothing */
2731 			dst_reg->smin_value = S64_MIN;
2732 			dst_reg->smax_value = S64_MAX;
2733 		} else {
2734 			dst_reg->smin_value = dst_reg->umin_value;
2735 			dst_reg->smax_value = dst_reg->umax_value;
2736 		}
2737 		break;
2738 	case BPF_AND:
2739 		if (src_known && dst_known) {
2740 			__mark_reg_known(dst_reg, dst_reg->var_off.value &
2741 						  src_reg.var_off.value);
2742 			break;
2743 		}
2744 		/* We get our minimum from the var_off, since that's inherently
2745 		 * bitwise.  Our maximum is the minimum of the operands' maxima.
2746 		 */
2747 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
2748 		dst_reg->umin_value = dst_reg->var_off.value;
2749 		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
2750 		if (dst_reg->smin_value < 0 || smin_val < 0) {
2751 			/* Lose signed bounds when ANDing negative numbers,
2752 			 * ain't nobody got time for that.
2753 			 */
2754 			dst_reg->smin_value = S64_MIN;
2755 			dst_reg->smax_value = S64_MAX;
2756 		} else {
2757 			/* ANDing two positives gives a positive, so safe to
2758 			 * cast result into s64.
2759 			 */
2760 			dst_reg->smin_value = dst_reg->umin_value;
2761 			dst_reg->smax_value = dst_reg->umax_value;
2762 		}
2763 		/* We may learn something more from the var_off */
2764 		__update_reg_bounds(dst_reg);
2765 		break;
2766 	case BPF_OR:
2767 		if (src_known && dst_known) {
2768 			__mark_reg_known(dst_reg, dst_reg->var_off.value |
2769 						  src_reg.var_off.value);
2770 			break;
2771 		}
2772 		/* We get our maximum from the var_off, and our minimum is the
2773 		 * maximum of the operands' minima
2774 		 */
2775 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
2776 		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
2777 		dst_reg->umax_value = dst_reg->var_off.value |
2778 				      dst_reg->var_off.mask;
2779 		if (dst_reg->smin_value < 0 || smin_val < 0) {
2780 			/* Lose signed bounds when ORing negative numbers,
2781 			 * ain't nobody got time for that.
2782 			 */
2783 			dst_reg->smin_value = S64_MIN;
2784 			dst_reg->smax_value = S64_MAX;
2785 		} else {
2786 			/* ORing two positives gives a positive, so safe to
2787 			 * cast result into s64.
2788 			 */
2789 			dst_reg->smin_value = dst_reg->umin_value;
2790 			dst_reg->smax_value = dst_reg->umax_value;
2791 		}
2792 		/* We may learn something more from the var_off */
2793 		__update_reg_bounds(dst_reg);
2794 		break;
2795 	case BPF_LSH:
2796 		if (umax_val >= insn_bitness) {
2797 			/* Shifts greater than 31 or 63 are undefined.
2798 			 * This includes shifts by a negative number.
2799 			 */
2800 			mark_reg_unknown(env, regs, insn->dst_reg);
2801 			break;
2802 		}
2803 		/* We lose all sign bit information (except what we can pick
2804 		 * up from var_off)
2805 		 */
2806 		dst_reg->smin_value = S64_MIN;
2807 		dst_reg->smax_value = S64_MAX;
2808 		/* If we might shift our top bit out, then we know nothing */
2809 		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
2810 			dst_reg->umin_value = 0;
2811 			dst_reg->umax_value = U64_MAX;
2812 		} else {
2813 			dst_reg->umin_value <<= umin_val;
2814 			dst_reg->umax_value <<= umax_val;
2815 		}
2816 		if (src_known)
2817 			dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
2818 		else
2819 			dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
2820 		/* We may learn something more from the var_off */
2821 		__update_reg_bounds(dst_reg);
2822 		break;
2823 	case BPF_RSH:
2824 		if (umax_val >= insn_bitness) {
2825 			/* Shifts greater than 31 or 63 are undefined.
2826 			 * This includes shifts by a negative number.
2827 			 */
2828 			mark_reg_unknown(env, regs, insn->dst_reg);
2829 			break;
2830 		}
2831 		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
2832 		 * be negative, then either:
2833 		 * 1) src_reg might be zero, so the sign bit of the result is
2834 		 *    unknown, so we lose our signed bounds
2835 		 * 2) it's known negative, thus the unsigned bounds capture the
2836 		 *    signed bounds
2837 		 * 3) the signed bounds cross zero, so they tell us nothing
2838 		 *    about the result
2839 		 * If the value in dst_reg is known nonnegative, then again the
2840 		 * unsigned bounts capture the signed bounds.
2841 		 * Thus, in all cases it suffices to blow away our signed bounds
2842 		 * and rely on inferring new ones from the unsigned bounds and
2843 		 * var_off of the result.
2844 		 */
2845 		dst_reg->smin_value = S64_MIN;
2846 		dst_reg->smax_value = S64_MAX;
2847 		if (src_known)
2848 			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2849 						       umin_val);
2850 		else
2851 			dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
2852 		dst_reg->umin_value >>= umax_val;
2853 		dst_reg->umax_value >>= umin_val;
2854 		/* We may learn something more from the var_off */
2855 		__update_reg_bounds(dst_reg);
2856 		break;
2857 	default:
2858 		mark_reg_unknown(env, regs, insn->dst_reg);
2859 		break;
2860 	}
2861 
2862 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
2863 		/* 32-bit ALU ops are (32,32)->32 */
2864 		coerce_reg_to_size(dst_reg, 4);
2865 		coerce_reg_to_size(&src_reg, 4);
2866 	}
2867 
2868 	__reg_deduce_bounds(dst_reg);
2869 	__reg_bound_offset(dst_reg);
2870 	return 0;
2871 }
2872 
2873 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2874  * and var_off.
2875  */
2876 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2877 				   struct bpf_insn *insn)
2878 {
2879 	struct bpf_verifier_state *vstate = env->cur_state;
2880 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2881 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2882 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2883 	u8 opcode = BPF_OP(insn->code);
2884 
2885 	dst_reg = &regs[insn->dst_reg];
2886 	src_reg = NULL;
2887 	if (dst_reg->type != SCALAR_VALUE)
2888 		ptr_reg = dst_reg;
2889 	if (BPF_SRC(insn->code) == BPF_X) {
2890 		src_reg = &regs[insn->src_reg];
2891 		if (src_reg->type != SCALAR_VALUE) {
2892 			if (dst_reg->type != SCALAR_VALUE) {
2893 				/* Combining two pointers by any ALU op yields
2894 				 * an arbitrary scalar. Disallow all math except
2895 				 * pointer subtraction
2896 				 */
2897 				if (opcode == BPF_SUB){
2898 					mark_reg_unknown(env, regs, insn->dst_reg);
2899 					return 0;
2900 				}
2901 				verbose(env, "R%d pointer %s pointer prohibited\n",
2902 					insn->dst_reg,
2903 					bpf_alu_string[opcode >> 4]);
2904 				return -EACCES;
2905 			} else {
2906 				/* scalar += pointer
2907 				 * This is legal, but we have to reverse our
2908 				 * src/dest handling in computing the range
2909 				 */
2910 				return adjust_ptr_min_max_vals(env, insn,
2911 							       src_reg, dst_reg);
2912 			}
2913 		} else if (ptr_reg) {
2914 			/* pointer += scalar */
2915 			return adjust_ptr_min_max_vals(env, insn,
2916 						       dst_reg, src_reg);
2917 		}
2918 	} else {
2919 		/* Pretend the src is a reg with a known value, since we only
2920 		 * need to be able to read from this state.
2921 		 */
2922 		off_reg.type = SCALAR_VALUE;
2923 		__mark_reg_known(&off_reg, insn->imm);
2924 		src_reg = &off_reg;
2925 		if (ptr_reg) /* pointer += K */
2926 			return adjust_ptr_min_max_vals(env, insn,
2927 						       ptr_reg, src_reg);
2928 	}
2929 
2930 	/* Got here implies adding two SCALAR_VALUEs */
2931 	if (WARN_ON_ONCE(ptr_reg)) {
2932 		print_verifier_state(env, state);
2933 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
2934 		return -EINVAL;
2935 	}
2936 	if (WARN_ON(!src_reg)) {
2937 		print_verifier_state(env, state);
2938 		verbose(env, "verifier internal error: no src_reg\n");
2939 		return -EINVAL;
2940 	}
2941 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
2942 }
2943 
2944 /* check validity of 32-bit and 64-bit arithmetic operations */
2945 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2946 {
2947 	struct bpf_reg_state *regs = cur_regs(env);
2948 	u8 opcode = BPF_OP(insn->code);
2949 	int err;
2950 
2951 	if (opcode == BPF_END || opcode == BPF_NEG) {
2952 		if (opcode == BPF_NEG) {
2953 			if (BPF_SRC(insn->code) != 0 ||
2954 			    insn->src_reg != BPF_REG_0 ||
2955 			    insn->off != 0 || insn->imm != 0) {
2956 				verbose(env, "BPF_NEG uses reserved fields\n");
2957 				return -EINVAL;
2958 			}
2959 		} else {
2960 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
2961 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
2962 			    BPF_CLASS(insn->code) == BPF_ALU64) {
2963 				verbose(env, "BPF_END uses reserved fields\n");
2964 				return -EINVAL;
2965 			}
2966 		}
2967 
2968 		/* check src operand */
2969 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2970 		if (err)
2971 			return err;
2972 
2973 		if (is_pointer_value(env, insn->dst_reg)) {
2974 			verbose(env, "R%d pointer arithmetic prohibited\n",
2975 				insn->dst_reg);
2976 			return -EACCES;
2977 		}
2978 
2979 		/* check dest operand */
2980 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
2981 		if (err)
2982 			return err;
2983 
2984 	} else if (opcode == BPF_MOV) {
2985 
2986 		if (BPF_SRC(insn->code) == BPF_X) {
2987 			if (insn->imm != 0 || insn->off != 0) {
2988 				verbose(env, "BPF_MOV uses reserved fields\n");
2989 				return -EINVAL;
2990 			}
2991 
2992 			/* check src operand */
2993 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
2994 			if (err)
2995 				return err;
2996 		} else {
2997 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
2998 				verbose(env, "BPF_MOV uses reserved fields\n");
2999 				return -EINVAL;
3000 			}
3001 		}
3002 
3003 		/* check dest operand */
3004 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
3005 		if (err)
3006 			return err;
3007 
3008 		if (BPF_SRC(insn->code) == BPF_X) {
3009 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
3010 				/* case: R1 = R2
3011 				 * copy register state to dest reg
3012 				 */
3013 				regs[insn->dst_reg] = regs[insn->src_reg];
3014 				regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
3015 			} else {
3016 				/* R1 = (u32) R2 */
3017 				if (is_pointer_value(env, insn->src_reg)) {
3018 					verbose(env,
3019 						"R%d partial copy of pointer\n",
3020 						insn->src_reg);
3021 					return -EACCES;
3022 				}
3023 				mark_reg_unknown(env, regs, insn->dst_reg);
3024 				coerce_reg_to_size(&regs[insn->dst_reg], 4);
3025 			}
3026 		} else {
3027 			/* case: R = imm
3028 			 * remember the value we stored into this reg
3029 			 */
3030 			regs[insn->dst_reg].type = SCALAR_VALUE;
3031 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
3032 				__mark_reg_known(regs + insn->dst_reg,
3033 						 insn->imm);
3034 			} else {
3035 				__mark_reg_known(regs + insn->dst_reg,
3036 						 (u32)insn->imm);
3037 			}
3038 		}
3039 
3040 	} else if (opcode > BPF_END) {
3041 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
3042 		return -EINVAL;
3043 
3044 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
3045 
3046 		if (BPF_SRC(insn->code) == BPF_X) {
3047 			if (insn->imm != 0 || insn->off != 0) {
3048 				verbose(env, "BPF_ALU uses reserved fields\n");
3049 				return -EINVAL;
3050 			}
3051 			/* check src1 operand */
3052 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
3053 			if (err)
3054 				return err;
3055 		} else {
3056 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
3057 				verbose(env, "BPF_ALU uses reserved fields\n");
3058 				return -EINVAL;
3059 			}
3060 		}
3061 
3062 		/* check src2 operand */
3063 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3064 		if (err)
3065 			return err;
3066 
3067 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3068 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
3069 			verbose(env, "div by zero\n");
3070 			return -EINVAL;
3071 		}
3072 
3073 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
3074 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
3075 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
3076 
3077 			if (insn->imm < 0 || insn->imm >= size) {
3078 				verbose(env, "invalid shift %d\n", insn->imm);
3079 				return -EINVAL;
3080 			}
3081 		}
3082 
3083 		/* check dest operand */
3084 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
3085 		if (err)
3086 			return err;
3087 
3088 		return adjust_reg_min_max_vals(env, insn);
3089 	}
3090 
3091 	return 0;
3092 }
3093 
3094 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
3095 				   struct bpf_reg_state *dst_reg,
3096 				   enum bpf_reg_type type,
3097 				   bool range_right_open)
3098 {
3099 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3100 	struct bpf_reg_state *regs = state->regs, *reg;
3101 	u16 new_range;
3102 	int i, j;
3103 
3104 	if (dst_reg->off < 0 ||
3105 	    (dst_reg->off == 0 && range_right_open))
3106 		/* This doesn't give us any range */
3107 		return;
3108 
3109 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
3110 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
3111 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
3112 		 * than pkt_end, but that's because it's also less than pkt.
3113 		 */
3114 		return;
3115 
3116 	new_range = dst_reg->off;
3117 	if (range_right_open)
3118 		new_range--;
3119 
3120 	/* Examples for register markings:
3121 	 *
3122 	 * pkt_data in dst register:
3123 	 *
3124 	 *   r2 = r3;
3125 	 *   r2 += 8;
3126 	 *   if (r2 > pkt_end) goto <handle exception>
3127 	 *   <access okay>
3128 	 *
3129 	 *   r2 = r3;
3130 	 *   r2 += 8;
3131 	 *   if (r2 < pkt_end) goto <access okay>
3132 	 *   <handle exception>
3133 	 *
3134 	 *   Where:
3135 	 *     r2 == dst_reg, pkt_end == src_reg
3136 	 *     r2=pkt(id=n,off=8,r=0)
3137 	 *     r3=pkt(id=n,off=0,r=0)
3138 	 *
3139 	 * pkt_data in src register:
3140 	 *
3141 	 *   r2 = r3;
3142 	 *   r2 += 8;
3143 	 *   if (pkt_end >= r2) goto <access okay>
3144 	 *   <handle exception>
3145 	 *
3146 	 *   r2 = r3;
3147 	 *   r2 += 8;
3148 	 *   if (pkt_end <= r2) goto <handle exception>
3149 	 *   <access okay>
3150 	 *
3151 	 *   Where:
3152 	 *     pkt_end == dst_reg, r2 == src_reg
3153 	 *     r2=pkt(id=n,off=8,r=0)
3154 	 *     r3=pkt(id=n,off=0,r=0)
3155 	 *
3156 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
3157 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3158 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
3159 	 * the check.
3160 	 */
3161 
3162 	/* If our ids match, then we must have the same max_value.  And we
3163 	 * don't care about the other reg's fixed offset, since if it's too big
3164 	 * the range won't allow anything.
3165 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3166 	 */
3167 	for (i = 0; i < MAX_BPF_REG; i++)
3168 		if (regs[i].type == type && regs[i].id == dst_reg->id)
3169 			/* keep the maximum range already checked */
3170 			regs[i].range = max(regs[i].range, new_range);
3171 
3172 	for (j = 0; j <= vstate->curframe; j++) {
3173 		state = vstate->frame[j];
3174 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3175 			if (state->stack[i].slot_type[0] != STACK_SPILL)
3176 				continue;
3177 			reg = &state->stack[i].spilled_ptr;
3178 			if (reg->type == type && reg->id == dst_reg->id)
3179 				reg->range = max(reg->range, new_range);
3180 		}
3181 	}
3182 }
3183 
3184 /* Adjusts the register min/max values in the case that the dst_reg is the
3185  * variable register that we are working on, and src_reg is a constant or we're
3186  * simply doing a BPF_K check.
3187  * In JEQ/JNE cases we also adjust the var_off values.
3188  */
3189 static void reg_set_min_max(struct bpf_reg_state *true_reg,
3190 			    struct bpf_reg_state *false_reg, u64 val,
3191 			    u8 opcode)
3192 {
3193 	/* If the dst_reg is a pointer, we can't learn anything about its
3194 	 * variable offset from the compare (unless src_reg were a pointer into
3195 	 * the same object, but we don't bother with that.
3196 	 * Since false_reg and true_reg have the same type by construction, we
3197 	 * only need to check one of them for pointerness.
3198 	 */
3199 	if (__is_pointer_value(false, false_reg))
3200 		return;
3201 
3202 	switch (opcode) {
3203 	case BPF_JEQ:
3204 		/* If this is false then we know nothing Jon Snow, but if it is
3205 		 * true then we know for sure.
3206 		 */
3207 		__mark_reg_known(true_reg, val);
3208 		break;
3209 	case BPF_JNE:
3210 		/* If this is true we know nothing Jon Snow, but if it is false
3211 		 * we know the value for sure;
3212 		 */
3213 		__mark_reg_known(false_reg, val);
3214 		break;
3215 	case BPF_JGT:
3216 		false_reg->umax_value = min(false_reg->umax_value, val);
3217 		true_reg->umin_value = max(true_reg->umin_value, val + 1);
3218 		break;
3219 	case BPF_JSGT:
3220 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3221 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3222 		break;
3223 	case BPF_JLT:
3224 		false_reg->umin_value = max(false_reg->umin_value, val);
3225 		true_reg->umax_value = min(true_reg->umax_value, val - 1);
3226 		break;
3227 	case BPF_JSLT:
3228 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3229 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3230 		break;
3231 	case BPF_JGE:
3232 		false_reg->umax_value = min(false_reg->umax_value, val - 1);
3233 		true_reg->umin_value = max(true_reg->umin_value, val);
3234 		break;
3235 	case BPF_JSGE:
3236 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3237 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3238 		break;
3239 	case BPF_JLE:
3240 		false_reg->umin_value = max(false_reg->umin_value, val + 1);
3241 		true_reg->umax_value = min(true_reg->umax_value, val);
3242 		break;
3243 	case BPF_JSLE:
3244 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3245 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3246 		break;
3247 	default:
3248 		break;
3249 	}
3250 
3251 	__reg_deduce_bounds(false_reg);
3252 	__reg_deduce_bounds(true_reg);
3253 	/* We might have learned some bits from the bounds. */
3254 	__reg_bound_offset(false_reg);
3255 	__reg_bound_offset(true_reg);
3256 	/* Intersecting with the old var_off might have improved our bounds
3257 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3258 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3259 	 */
3260 	__update_reg_bounds(false_reg);
3261 	__update_reg_bounds(true_reg);
3262 }
3263 
3264 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
3265  * the variable reg.
3266  */
3267 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3268 				struct bpf_reg_state *false_reg, u64 val,
3269 				u8 opcode)
3270 {
3271 	if (__is_pointer_value(false, false_reg))
3272 		return;
3273 
3274 	switch (opcode) {
3275 	case BPF_JEQ:
3276 		/* If this is false then we know nothing Jon Snow, but if it is
3277 		 * true then we know for sure.
3278 		 */
3279 		__mark_reg_known(true_reg, val);
3280 		break;
3281 	case BPF_JNE:
3282 		/* If this is true we know nothing Jon Snow, but if it is false
3283 		 * we know the value for sure;
3284 		 */
3285 		__mark_reg_known(false_reg, val);
3286 		break;
3287 	case BPF_JGT:
3288 		true_reg->umax_value = min(true_reg->umax_value, val - 1);
3289 		false_reg->umin_value = max(false_reg->umin_value, val);
3290 		break;
3291 	case BPF_JSGT:
3292 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3293 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3294 		break;
3295 	case BPF_JLT:
3296 		true_reg->umin_value = max(true_reg->umin_value, val + 1);
3297 		false_reg->umax_value = min(false_reg->umax_value, val);
3298 		break;
3299 	case BPF_JSLT:
3300 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3301 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3302 		break;
3303 	case BPF_JGE:
3304 		true_reg->umax_value = min(true_reg->umax_value, val);
3305 		false_reg->umin_value = max(false_reg->umin_value, val + 1);
3306 		break;
3307 	case BPF_JSGE:
3308 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3309 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3310 		break;
3311 	case BPF_JLE:
3312 		true_reg->umin_value = max(true_reg->umin_value, val);
3313 		false_reg->umax_value = min(false_reg->umax_value, val - 1);
3314 		break;
3315 	case BPF_JSLE:
3316 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3317 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3318 		break;
3319 	default:
3320 		break;
3321 	}
3322 
3323 	__reg_deduce_bounds(false_reg);
3324 	__reg_deduce_bounds(true_reg);
3325 	/* We might have learned some bits from the bounds. */
3326 	__reg_bound_offset(false_reg);
3327 	__reg_bound_offset(true_reg);
3328 	/* Intersecting with the old var_off might have improved our bounds
3329 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3330 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3331 	 */
3332 	__update_reg_bounds(false_reg);
3333 	__update_reg_bounds(true_reg);
3334 }
3335 
3336 /* Regs are known to be equal, so intersect their min/max/var_off */
3337 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3338 				  struct bpf_reg_state *dst_reg)
3339 {
3340 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
3341 							dst_reg->umin_value);
3342 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
3343 							dst_reg->umax_value);
3344 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
3345 							dst_reg->smin_value);
3346 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
3347 							dst_reg->smax_value);
3348 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
3349 							     dst_reg->var_off);
3350 	/* We might have learned new bounds from the var_off. */
3351 	__update_reg_bounds(src_reg);
3352 	__update_reg_bounds(dst_reg);
3353 	/* We might have learned something about the sign bit. */
3354 	__reg_deduce_bounds(src_reg);
3355 	__reg_deduce_bounds(dst_reg);
3356 	/* We might have learned some bits from the bounds. */
3357 	__reg_bound_offset(src_reg);
3358 	__reg_bound_offset(dst_reg);
3359 	/* Intersecting with the old var_off might have improved our bounds
3360 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3361 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3362 	 */
3363 	__update_reg_bounds(src_reg);
3364 	__update_reg_bounds(dst_reg);
3365 }
3366 
3367 static void reg_combine_min_max(struct bpf_reg_state *true_src,
3368 				struct bpf_reg_state *true_dst,
3369 				struct bpf_reg_state *false_src,
3370 				struct bpf_reg_state *false_dst,
3371 				u8 opcode)
3372 {
3373 	switch (opcode) {
3374 	case BPF_JEQ:
3375 		__reg_combine_min_max(true_src, true_dst);
3376 		break;
3377 	case BPF_JNE:
3378 		__reg_combine_min_max(false_src, false_dst);
3379 		break;
3380 	}
3381 }
3382 
3383 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
3384 			 bool is_null)
3385 {
3386 	struct bpf_reg_state *reg = &regs[regno];
3387 
3388 	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
3389 		/* Old offset (both fixed and variable parts) should
3390 		 * have been known-zero, because we don't allow pointer
3391 		 * arithmetic on pointers that might be NULL.
3392 		 */
3393 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
3394 				 !tnum_equals_const(reg->var_off, 0) ||
3395 				 reg->off)) {
3396 			__mark_reg_known_zero(reg);
3397 			reg->off = 0;
3398 		}
3399 		if (is_null) {
3400 			reg->type = SCALAR_VALUE;
3401 		} else if (reg->map_ptr->inner_map_meta) {
3402 			reg->type = CONST_PTR_TO_MAP;
3403 			reg->map_ptr = reg->map_ptr->inner_map_meta;
3404 		} else {
3405 			reg->type = PTR_TO_MAP_VALUE;
3406 		}
3407 		/* We don't need id from this point onwards anymore, thus we
3408 		 * should better reset it, so that state pruning has chances
3409 		 * to take effect.
3410 		 */
3411 		reg->id = 0;
3412 	}
3413 }
3414 
3415 /* The logic is similar to find_good_pkt_pointers(), both could eventually
3416  * be folded together at some point.
3417  */
3418 static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
3419 			  bool is_null)
3420 {
3421 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3422 	struct bpf_reg_state *regs = state->regs;
3423 	u32 id = regs[regno].id;
3424 	int i, j;
3425 
3426 	for (i = 0; i < MAX_BPF_REG; i++)
3427 		mark_map_reg(regs, i, id, is_null);
3428 
3429 	for (j = 0; j <= vstate->curframe; j++) {
3430 		state = vstate->frame[j];
3431 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3432 			if (state->stack[i].slot_type[0] != STACK_SPILL)
3433 				continue;
3434 			mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
3435 		}
3436 	}
3437 }
3438 
3439 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
3440 				   struct bpf_reg_state *dst_reg,
3441 				   struct bpf_reg_state *src_reg,
3442 				   struct bpf_verifier_state *this_branch,
3443 				   struct bpf_verifier_state *other_branch)
3444 {
3445 	if (BPF_SRC(insn->code) != BPF_X)
3446 		return false;
3447 
3448 	switch (BPF_OP(insn->code)) {
3449 	case BPF_JGT:
3450 		if ((dst_reg->type == PTR_TO_PACKET &&
3451 		     src_reg->type == PTR_TO_PACKET_END) ||
3452 		    (dst_reg->type == PTR_TO_PACKET_META &&
3453 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3454 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
3455 			find_good_pkt_pointers(this_branch, dst_reg,
3456 					       dst_reg->type, false);
3457 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3458 			    src_reg->type == PTR_TO_PACKET) ||
3459 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3460 			    src_reg->type == PTR_TO_PACKET_META)) {
3461 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
3462 			find_good_pkt_pointers(other_branch, src_reg,
3463 					       src_reg->type, true);
3464 		} else {
3465 			return false;
3466 		}
3467 		break;
3468 	case BPF_JLT:
3469 		if ((dst_reg->type == PTR_TO_PACKET &&
3470 		     src_reg->type == PTR_TO_PACKET_END) ||
3471 		    (dst_reg->type == PTR_TO_PACKET_META &&
3472 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3473 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
3474 			find_good_pkt_pointers(other_branch, dst_reg,
3475 					       dst_reg->type, true);
3476 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3477 			    src_reg->type == PTR_TO_PACKET) ||
3478 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3479 			    src_reg->type == PTR_TO_PACKET_META)) {
3480 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
3481 			find_good_pkt_pointers(this_branch, src_reg,
3482 					       src_reg->type, false);
3483 		} else {
3484 			return false;
3485 		}
3486 		break;
3487 	case BPF_JGE:
3488 		if ((dst_reg->type == PTR_TO_PACKET &&
3489 		     src_reg->type == PTR_TO_PACKET_END) ||
3490 		    (dst_reg->type == PTR_TO_PACKET_META &&
3491 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3492 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
3493 			find_good_pkt_pointers(this_branch, dst_reg,
3494 					       dst_reg->type, true);
3495 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3496 			    src_reg->type == PTR_TO_PACKET) ||
3497 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3498 			    src_reg->type == PTR_TO_PACKET_META)) {
3499 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
3500 			find_good_pkt_pointers(other_branch, src_reg,
3501 					       src_reg->type, false);
3502 		} else {
3503 			return false;
3504 		}
3505 		break;
3506 	case BPF_JLE:
3507 		if ((dst_reg->type == PTR_TO_PACKET &&
3508 		     src_reg->type == PTR_TO_PACKET_END) ||
3509 		    (dst_reg->type == PTR_TO_PACKET_META &&
3510 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3511 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
3512 			find_good_pkt_pointers(other_branch, dst_reg,
3513 					       dst_reg->type, false);
3514 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3515 			    src_reg->type == PTR_TO_PACKET) ||
3516 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3517 			    src_reg->type == PTR_TO_PACKET_META)) {
3518 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
3519 			find_good_pkt_pointers(this_branch, src_reg,
3520 					       src_reg->type, true);
3521 		} else {
3522 			return false;
3523 		}
3524 		break;
3525 	default:
3526 		return false;
3527 	}
3528 
3529 	return true;
3530 }
3531 
3532 static int check_cond_jmp_op(struct bpf_verifier_env *env,
3533 			     struct bpf_insn *insn, int *insn_idx)
3534 {
3535 	struct bpf_verifier_state *this_branch = env->cur_state;
3536 	struct bpf_verifier_state *other_branch;
3537 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
3538 	struct bpf_reg_state *dst_reg, *other_branch_regs;
3539 	u8 opcode = BPF_OP(insn->code);
3540 	int err;
3541 
3542 	if (opcode > BPF_JSLE) {
3543 		verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
3544 		return -EINVAL;
3545 	}
3546 
3547 	if (BPF_SRC(insn->code) == BPF_X) {
3548 		if (insn->imm != 0) {
3549 			verbose(env, "BPF_JMP uses reserved fields\n");
3550 			return -EINVAL;
3551 		}
3552 
3553 		/* check src1 operand */
3554 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
3555 		if (err)
3556 			return err;
3557 
3558 		if (is_pointer_value(env, insn->src_reg)) {
3559 			verbose(env, "R%d pointer comparison prohibited\n",
3560 				insn->src_reg);
3561 			return -EACCES;
3562 		}
3563 	} else {
3564 		if (insn->src_reg != BPF_REG_0) {
3565 			verbose(env, "BPF_JMP uses reserved fields\n");
3566 			return -EINVAL;
3567 		}
3568 	}
3569 
3570 	/* check src2 operand */
3571 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3572 	if (err)
3573 		return err;
3574 
3575 	dst_reg = &regs[insn->dst_reg];
3576 
3577 	/* detect if R == 0 where R was initialized to zero earlier */
3578 	if (BPF_SRC(insn->code) == BPF_K &&
3579 	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3580 	    dst_reg->type == SCALAR_VALUE &&
3581 	    tnum_is_const(dst_reg->var_off)) {
3582 		if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
3583 		    (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
3584 			/* if (imm == imm) goto pc+off;
3585 			 * only follow the goto, ignore fall-through
3586 			 */
3587 			*insn_idx += insn->off;
3588 			return 0;
3589 		} else {
3590 			/* if (imm != imm) goto pc+off;
3591 			 * only follow fall-through branch, since
3592 			 * that's where the program will go
3593 			 */
3594 			return 0;
3595 		}
3596 	}
3597 
3598 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
3599 	if (!other_branch)
3600 		return -EFAULT;
3601 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
3602 
3603 	/* detect if we are comparing against a constant value so we can adjust
3604 	 * our min/max values for our dst register.
3605 	 * this is only legit if both are scalars (or pointers to the same
3606 	 * object, I suppose, but we don't support that right now), because
3607 	 * otherwise the different base pointers mean the offsets aren't
3608 	 * comparable.
3609 	 */
3610 	if (BPF_SRC(insn->code) == BPF_X) {
3611 		if (dst_reg->type == SCALAR_VALUE &&
3612 		    regs[insn->src_reg].type == SCALAR_VALUE) {
3613 			if (tnum_is_const(regs[insn->src_reg].var_off))
3614 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
3615 						dst_reg, regs[insn->src_reg].var_off.value,
3616 						opcode);
3617 			else if (tnum_is_const(dst_reg->var_off))
3618 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
3619 						    &regs[insn->src_reg],
3620 						    dst_reg->var_off.value, opcode);
3621 			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
3622 				/* Comparing for equality, we can combine knowledge */
3623 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
3624 						    &other_branch_regs[insn->dst_reg],
3625 						    &regs[insn->src_reg],
3626 						    &regs[insn->dst_reg], opcode);
3627 		}
3628 	} else if (dst_reg->type == SCALAR_VALUE) {
3629 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
3630 					dst_reg, insn->imm, opcode);
3631 	}
3632 
3633 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
3634 	if (BPF_SRC(insn->code) == BPF_K &&
3635 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3636 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3637 		/* Mark all identical map registers in each branch as either
3638 		 * safe or unknown depending R == 0 or R != 0 conditional.
3639 		 */
3640 		mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
3641 		mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
3642 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
3643 					   this_branch, other_branch) &&
3644 		   is_pointer_value(env, insn->dst_reg)) {
3645 		verbose(env, "R%d pointer comparison prohibited\n",
3646 			insn->dst_reg);
3647 		return -EACCES;
3648 	}
3649 	if (env->log.level)
3650 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
3651 	return 0;
3652 }
3653 
3654 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
3655 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
3656 {
3657 	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
3658 
3659 	return (struct bpf_map *) (unsigned long) imm64;
3660 }
3661 
3662 /* verify BPF_LD_IMM64 instruction */
3663 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
3664 {
3665 	struct bpf_reg_state *regs = cur_regs(env);
3666 	int err;
3667 
3668 	if (BPF_SIZE(insn->code) != BPF_DW) {
3669 		verbose(env, "invalid BPF_LD_IMM insn\n");
3670 		return -EINVAL;
3671 	}
3672 	if (insn->off != 0) {
3673 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
3674 		return -EINVAL;
3675 	}
3676 
3677 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
3678 	if (err)
3679 		return err;
3680 
3681 	if (insn->src_reg == 0) {
3682 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
3683 
3684 		regs[insn->dst_reg].type = SCALAR_VALUE;
3685 		__mark_reg_known(&regs[insn->dst_reg], imm);
3686 		return 0;
3687 	}
3688 
3689 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
3690 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
3691 
3692 	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
3693 	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
3694 	return 0;
3695 }
3696 
3697 static bool may_access_skb(enum bpf_prog_type type)
3698 {
3699 	switch (type) {
3700 	case BPF_PROG_TYPE_SOCKET_FILTER:
3701 	case BPF_PROG_TYPE_SCHED_CLS:
3702 	case BPF_PROG_TYPE_SCHED_ACT:
3703 		return true;
3704 	default:
3705 		return false;
3706 	}
3707 }
3708 
3709 /* verify safety of LD_ABS|LD_IND instructions:
3710  * - they can only appear in the programs where ctx == skb
3711  * - since they are wrappers of function calls, they scratch R1-R5 registers,
3712  *   preserve R6-R9, and store return value into R0
3713  *
3714  * Implicit input:
3715  *   ctx == skb == R6 == CTX
3716  *
3717  * Explicit input:
3718  *   SRC == any register
3719  *   IMM == 32-bit immediate
3720  *
3721  * Output:
3722  *   R0 - 8/16/32-bit skb data converted to cpu endianness
3723  */
3724 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
3725 {
3726 	struct bpf_reg_state *regs = cur_regs(env);
3727 	u8 mode = BPF_MODE(insn->code);
3728 	int i, err;
3729 
3730 	if (!may_access_skb(env->prog->type)) {
3731 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
3732 		return -EINVAL;
3733 	}
3734 
3735 	if (env->subprog_cnt) {
3736 		/* when program has LD_ABS insn JITs and interpreter assume
3737 		 * that r1 == ctx == skb which is not the case for callees
3738 		 * that can have arbitrary arguments. It's problematic
3739 		 * for main prog as well since JITs would need to analyze
3740 		 * all functions in order to make proper register save/restore
3741 		 * decisions in the main prog. Hence disallow LD_ABS with calls
3742 		 */
3743 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
3744 		return -EINVAL;
3745 	}
3746 
3747 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
3748 	    BPF_SIZE(insn->code) == BPF_DW ||
3749 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
3750 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
3751 		return -EINVAL;
3752 	}
3753 
3754 	/* check whether implicit source operand (register R6) is readable */
3755 	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
3756 	if (err)
3757 		return err;
3758 
3759 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
3760 		verbose(env,
3761 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
3762 		return -EINVAL;
3763 	}
3764 
3765 	if (mode == BPF_IND) {
3766 		/* check explicit source operand */
3767 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
3768 		if (err)
3769 			return err;
3770 	}
3771 
3772 	/* reset caller saved regs to unreadable */
3773 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
3774 		mark_reg_not_init(env, regs, caller_saved[i]);
3775 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3776 	}
3777 
3778 	/* mark destination R0 register as readable, since it contains
3779 	 * the value fetched from the packet.
3780 	 * Already marked as written above.
3781 	 */
3782 	mark_reg_unknown(env, regs, BPF_REG_0);
3783 	return 0;
3784 }
3785 
3786 static int check_return_code(struct bpf_verifier_env *env)
3787 {
3788 	struct bpf_reg_state *reg;
3789 	struct tnum range = tnum_range(0, 1);
3790 
3791 	switch (env->prog->type) {
3792 	case BPF_PROG_TYPE_CGROUP_SKB:
3793 	case BPF_PROG_TYPE_CGROUP_SOCK:
3794 	case BPF_PROG_TYPE_SOCK_OPS:
3795 	case BPF_PROG_TYPE_CGROUP_DEVICE:
3796 		break;
3797 	default:
3798 		return 0;
3799 	}
3800 
3801 	reg = cur_regs(env) + BPF_REG_0;
3802 	if (reg->type != SCALAR_VALUE) {
3803 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
3804 			reg_type_str[reg->type]);
3805 		return -EINVAL;
3806 	}
3807 
3808 	if (!tnum_in(range, reg->var_off)) {
3809 		verbose(env, "At program exit the register R0 ");
3810 		if (!tnum_is_unknown(reg->var_off)) {
3811 			char tn_buf[48];
3812 
3813 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3814 			verbose(env, "has value %s", tn_buf);
3815 		} else {
3816 			verbose(env, "has unknown scalar value");
3817 		}
3818 		verbose(env, " should have been 0 or 1\n");
3819 		return -EINVAL;
3820 	}
3821 	return 0;
3822 }
3823 
3824 /* non-recursive DFS pseudo code
3825  * 1  procedure DFS-iterative(G,v):
3826  * 2      label v as discovered
3827  * 3      let S be a stack
3828  * 4      S.push(v)
3829  * 5      while S is not empty
3830  * 6            t <- S.pop()
3831  * 7            if t is what we're looking for:
3832  * 8                return t
3833  * 9            for all edges e in G.adjacentEdges(t) do
3834  * 10               if edge e is already labelled
3835  * 11                   continue with the next edge
3836  * 12               w <- G.adjacentVertex(t,e)
3837  * 13               if vertex w is not discovered and not explored
3838  * 14                   label e as tree-edge
3839  * 15                   label w as discovered
3840  * 16                   S.push(w)
3841  * 17                   continue at 5
3842  * 18               else if vertex w is discovered
3843  * 19                   label e as back-edge
3844  * 20               else
3845  * 21                   // vertex w is explored
3846  * 22                   label e as forward- or cross-edge
3847  * 23           label t as explored
3848  * 24           S.pop()
3849  *
3850  * convention:
3851  * 0x10 - discovered
3852  * 0x11 - discovered and fall-through edge labelled
3853  * 0x12 - discovered and fall-through and branch edges labelled
3854  * 0x20 - explored
3855  */
3856 
3857 enum {
3858 	DISCOVERED = 0x10,
3859 	EXPLORED = 0x20,
3860 	FALLTHROUGH = 1,
3861 	BRANCH = 2,
3862 };
3863 
3864 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
3865 
3866 static int *insn_stack;	/* stack of insns to process */
3867 static int cur_stack;	/* current stack index */
3868 static int *insn_state;
3869 
3870 /* t, w, e - match pseudo-code above:
3871  * t - index of current instruction
3872  * w - next instruction
3873  * e - edge
3874  */
3875 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
3876 {
3877 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
3878 		return 0;
3879 
3880 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
3881 		return 0;
3882 
3883 	if (w < 0 || w >= env->prog->len) {
3884 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
3885 		return -EINVAL;
3886 	}
3887 
3888 	if (e == BRANCH)
3889 		/* mark branch target for state pruning */
3890 		env->explored_states[w] = STATE_LIST_MARK;
3891 
3892 	if (insn_state[w] == 0) {
3893 		/* tree-edge */
3894 		insn_state[t] = DISCOVERED | e;
3895 		insn_state[w] = DISCOVERED;
3896 		if (cur_stack >= env->prog->len)
3897 			return -E2BIG;
3898 		insn_stack[cur_stack++] = w;
3899 		return 1;
3900 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
3901 		verbose(env, "back-edge from insn %d to %d\n", t, w);
3902 		return -EINVAL;
3903 	} else if (insn_state[w] == EXPLORED) {
3904 		/* forward- or cross-edge */
3905 		insn_state[t] = DISCOVERED | e;
3906 	} else {
3907 		verbose(env, "insn state internal bug\n");
3908 		return -EFAULT;
3909 	}
3910 	return 0;
3911 }
3912 
3913 /* non-recursive depth-first-search to detect loops in BPF program
3914  * loop == back-edge in directed graph
3915  */
3916 static int check_cfg(struct bpf_verifier_env *env)
3917 {
3918 	struct bpf_insn *insns = env->prog->insnsi;
3919 	int insn_cnt = env->prog->len;
3920 	int ret = 0;
3921 	int i, t;
3922 
3923 	ret = check_subprogs(env);
3924 	if (ret < 0)
3925 		return ret;
3926 
3927 	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3928 	if (!insn_state)
3929 		return -ENOMEM;
3930 
3931 	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3932 	if (!insn_stack) {
3933 		kfree(insn_state);
3934 		return -ENOMEM;
3935 	}
3936 
3937 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
3938 	insn_stack[0] = 0; /* 0 is the first instruction */
3939 	cur_stack = 1;
3940 
3941 peek_stack:
3942 	if (cur_stack == 0)
3943 		goto check_state;
3944 	t = insn_stack[cur_stack - 1];
3945 
3946 	if (BPF_CLASS(insns[t].code) == BPF_JMP) {
3947 		u8 opcode = BPF_OP(insns[t].code);
3948 
3949 		if (opcode == BPF_EXIT) {
3950 			goto mark_explored;
3951 		} else if (opcode == BPF_CALL) {
3952 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
3953 			if (ret == 1)
3954 				goto peek_stack;
3955 			else if (ret < 0)
3956 				goto err_free;
3957 			if (t + 1 < insn_cnt)
3958 				env->explored_states[t + 1] = STATE_LIST_MARK;
3959 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
3960 				env->explored_states[t] = STATE_LIST_MARK;
3961 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
3962 				if (ret == 1)
3963 					goto peek_stack;
3964 				else if (ret < 0)
3965 					goto err_free;
3966 			}
3967 		} else if (opcode == BPF_JA) {
3968 			if (BPF_SRC(insns[t].code) != BPF_K) {
3969 				ret = -EINVAL;
3970 				goto err_free;
3971 			}
3972 			/* unconditional jump with single edge */
3973 			ret = push_insn(t, t + insns[t].off + 1,
3974 					FALLTHROUGH, env);
3975 			if (ret == 1)
3976 				goto peek_stack;
3977 			else if (ret < 0)
3978 				goto err_free;
3979 			/* tell verifier to check for equivalent states
3980 			 * after every call and jump
3981 			 */
3982 			if (t + 1 < insn_cnt)
3983 				env->explored_states[t + 1] = STATE_LIST_MARK;
3984 		} else {
3985 			/* conditional jump with two edges */
3986 			env->explored_states[t] = STATE_LIST_MARK;
3987 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
3988 			if (ret == 1)
3989 				goto peek_stack;
3990 			else if (ret < 0)
3991 				goto err_free;
3992 
3993 			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
3994 			if (ret == 1)
3995 				goto peek_stack;
3996 			else if (ret < 0)
3997 				goto err_free;
3998 		}
3999 	} else {
4000 		/* all other non-branch instructions with single
4001 		 * fall-through edge
4002 		 */
4003 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
4004 		if (ret == 1)
4005 			goto peek_stack;
4006 		else if (ret < 0)
4007 			goto err_free;
4008 	}
4009 
4010 mark_explored:
4011 	insn_state[t] = EXPLORED;
4012 	if (cur_stack-- <= 0) {
4013 		verbose(env, "pop stack internal bug\n");
4014 		ret = -EFAULT;
4015 		goto err_free;
4016 	}
4017 	goto peek_stack;
4018 
4019 check_state:
4020 	for (i = 0; i < insn_cnt; i++) {
4021 		if (insn_state[i] != EXPLORED) {
4022 			verbose(env, "unreachable insn %d\n", i);
4023 			ret = -EINVAL;
4024 			goto err_free;
4025 		}
4026 	}
4027 	ret = 0; /* cfg looks good */
4028 
4029 err_free:
4030 	kfree(insn_state);
4031 	kfree(insn_stack);
4032 	return ret;
4033 }
4034 
4035 /* check %cur's range satisfies %old's */
4036 static bool range_within(struct bpf_reg_state *old,
4037 			 struct bpf_reg_state *cur)
4038 {
4039 	return old->umin_value <= cur->umin_value &&
4040 	       old->umax_value >= cur->umax_value &&
4041 	       old->smin_value <= cur->smin_value &&
4042 	       old->smax_value >= cur->smax_value;
4043 }
4044 
4045 /* Maximum number of register states that can exist at once */
4046 #define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
4047 struct idpair {
4048 	u32 old;
4049 	u32 cur;
4050 };
4051 
4052 /* If in the old state two registers had the same id, then they need to have
4053  * the same id in the new state as well.  But that id could be different from
4054  * the old state, so we need to track the mapping from old to new ids.
4055  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
4056  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
4057  * regs with a different old id could still have new id 9, we don't care about
4058  * that.
4059  * So we look through our idmap to see if this old id has been seen before.  If
4060  * so, we require the new id to match; otherwise, we add the id pair to the map.
4061  */
4062 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
4063 {
4064 	unsigned int i;
4065 
4066 	for (i = 0; i < ID_MAP_SIZE; i++) {
4067 		if (!idmap[i].old) {
4068 			/* Reached an empty slot; haven't seen this id before */
4069 			idmap[i].old = old_id;
4070 			idmap[i].cur = cur_id;
4071 			return true;
4072 		}
4073 		if (idmap[i].old == old_id)
4074 			return idmap[i].cur == cur_id;
4075 	}
4076 	/* We ran out of idmap slots, which should be impossible */
4077 	WARN_ON_ONCE(1);
4078 	return false;
4079 }
4080 
4081 /* Returns true if (rold safe implies rcur safe) */
4082 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
4083 		    struct idpair *idmap)
4084 {
4085 	bool equal;
4086 
4087 	if (!(rold->live & REG_LIVE_READ))
4088 		/* explored state didn't use this */
4089 		return true;
4090 
4091 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
4092 
4093 	if (rold->type == PTR_TO_STACK)
4094 		/* two stack pointers are equal only if they're pointing to
4095 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
4096 		 */
4097 		return equal && rold->frameno == rcur->frameno;
4098 
4099 	if (equal)
4100 		return true;
4101 
4102 	if (rold->type == NOT_INIT)
4103 		/* explored state can't have used this */
4104 		return true;
4105 	if (rcur->type == NOT_INIT)
4106 		return false;
4107 	switch (rold->type) {
4108 	case SCALAR_VALUE:
4109 		if (rcur->type == SCALAR_VALUE) {
4110 			/* new val must satisfy old val knowledge */
4111 			return range_within(rold, rcur) &&
4112 			       tnum_in(rold->var_off, rcur->var_off);
4113 		} else {
4114 			/* We're trying to use a pointer in place of a scalar.
4115 			 * Even if the scalar was unbounded, this could lead to
4116 			 * pointer leaks because scalars are allowed to leak
4117 			 * while pointers are not. We could make this safe in
4118 			 * special cases if root is calling us, but it's
4119 			 * probably not worth the hassle.
4120 			 */
4121 			return false;
4122 		}
4123 	case PTR_TO_MAP_VALUE:
4124 		/* If the new min/max/var_off satisfy the old ones and
4125 		 * everything else matches, we are OK.
4126 		 * We don't care about the 'id' value, because nothing
4127 		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
4128 		 */
4129 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
4130 		       range_within(rold, rcur) &&
4131 		       tnum_in(rold->var_off, rcur->var_off);
4132 	case PTR_TO_MAP_VALUE_OR_NULL:
4133 		/* a PTR_TO_MAP_VALUE could be safe to use as a
4134 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
4135 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
4136 		 * checked, doing so could have affected others with the same
4137 		 * id, and we can't check for that because we lost the id when
4138 		 * we converted to a PTR_TO_MAP_VALUE.
4139 		 */
4140 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
4141 			return false;
4142 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
4143 			return false;
4144 		/* Check our ids match any regs they're supposed to */
4145 		return check_ids(rold->id, rcur->id, idmap);
4146 	case PTR_TO_PACKET_META:
4147 	case PTR_TO_PACKET:
4148 		if (rcur->type != rold->type)
4149 			return false;
4150 		/* We must have at least as much range as the old ptr
4151 		 * did, so that any accesses which were safe before are
4152 		 * still safe.  This is true even if old range < old off,
4153 		 * since someone could have accessed through (ptr - k), or
4154 		 * even done ptr -= k in a register, to get a safe access.
4155 		 */
4156 		if (rold->range > rcur->range)
4157 			return false;
4158 		/* If the offsets don't match, we can't trust our alignment;
4159 		 * nor can we be sure that we won't fall out of range.
4160 		 */
4161 		if (rold->off != rcur->off)
4162 			return false;
4163 		/* id relations must be preserved */
4164 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
4165 			return false;
4166 		/* new val must satisfy old val knowledge */
4167 		return range_within(rold, rcur) &&
4168 		       tnum_in(rold->var_off, rcur->var_off);
4169 	case PTR_TO_CTX:
4170 	case CONST_PTR_TO_MAP:
4171 	case PTR_TO_PACKET_END:
4172 		/* Only valid matches are exact, which memcmp() above
4173 		 * would have accepted
4174 		 */
4175 	default:
4176 		/* Don't know what's going on, just say it's not safe */
4177 		return false;
4178 	}
4179 
4180 	/* Shouldn't get here; if we do, say it's not safe */
4181 	WARN_ON_ONCE(1);
4182 	return false;
4183 }
4184 
4185 static bool stacksafe(struct bpf_func_state *old,
4186 		      struct bpf_func_state *cur,
4187 		      struct idpair *idmap)
4188 {
4189 	int i, spi;
4190 
4191 	/* if explored stack has more populated slots than current stack
4192 	 * such stacks are not equivalent
4193 	 */
4194 	if (old->allocated_stack > cur->allocated_stack)
4195 		return false;
4196 
4197 	/* walk slots of the explored stack and ignore any additional
4198 	 * slots in the current stack, since explored(safe) state
4199 	 * didn't use them
4200 	 */
4201 	for (i = 0; i < old->allocated_stack; i++) {
4202 		spi = i / BPF_REG_SIZE;
4203 
4204 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
4205 			/* explored state didn't use this */
4206 			continue;
4207 
4208 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
4209 			continue;
4210 		/* if old state was safe with misc data in the stack
4211 		 * it will be safe with zero-initialized stack.
4212 		 * The opposite is not true
4213 		 */
4214 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
4215 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
4216 			continue;
4217 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
4218 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
4219 			/* Ex: old explored (safe) state has STACK_SPILL in
4220 			 * this stack slot, but current has has STACK_MISC ->
4221 			 * this verifier states are not equivalent,
4222 			 * return false to continue verification of this path
4223 			 */
4224 			return false;
4225 		if (i % BPF_REG_SIZE)
4226 			continue;
4227 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
4228 			continue;
4229 		if (!regsafe(&old->stack[spi].spilled_ptr,
4230 			     &cur->stack[spi].spilled_ptr,
4231 			     idmap))
4232 			/* when explored and current stack slot are both storing
4233 			 * spilled registers, check that stored pointers types
4234 			 * are the same as well.
4235 			 * Ex: explored safe path could have stored
4236 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
4237 			 * but current path has stored:
4238 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
4239 			 * such verifier states are not equivalent.
4240 			 * return false to continue verification of this path
4241 			 */
4242 			return false;
4243 	}
4244 	return true;
4245 }
4246 
4247 /* compare two verifier states
4248  *
4249  * all states stored in state_list are known to be valid, since
4250  * verifier reached 'bpf_exit' instruction through them
4251  *
4252  * this function is called when verifier exploring different branches of
4253  * execution popped from the state stack. If it sees an old state that has
4254  * more strict register state and more strict stack state then this execution
4255  * branch doesn't need to be explored further, since verifier already
4256  * concluded that more strict state leads to valid finish.
4257  *
4258  * Therefore two states are equivalent if register state is more conservative
4259  * and explored stack state is more conservative than the current one.
4260  * Example:
4261  *       explored                   current
4262  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
4263  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
4264  *
4265  * In other words if current stack state (one being explored) has more
4266  * valid slots than old one that already passed validation, it means
4267  * the verifier can stop exploring and conclude that current state is valid too
4268  *
4269  * Similarly with registers. If explored state has register type as invalid
4270  * whereas register type in current state is meaningful, it means that
4271  * the current state will reach 'bpf_exit' instruction safely
4272  */
4273 static bool func_states_equal(struct bpf_func_state *old,
4274 			      struct bpf_func_state *cur)
4275 {
4276 	struct idpair *idmap;
4277 	bool ret = false;
4278 	int i;
4279 
4280 	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
4281 	/* If we failed to allocate the idmap, just say it's not safe */
4282 	if (!idmap)
4283 		return false;
4284 
4285 	for (i = 0; i < MAX_BPF_REG; i++) {
4286 		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
4287 			goto out_free;
4288 	}
4289 
4290 	if (!stacksafe(old, cur, idmap))
4291 		goto out_free;
4292 	ret = true;
4293 out_free:
4294 	kfree(idmap);
4295 	return ret;
4296 }
4297 
4298 static bool states_equal(struct bpf_verifier_env *env,
4299 			 struct bpf_verifier_state *old,
4300 			 struct bpf_verifier_state *cur)
4301 {
4302 	int i;
4303 
4304 	if (old->curframe != cur->curframe)
4305 		return false;
4306 
4307 	/* for states to be equal callsites have to be the same
4308 	 * and all frame states need to be equivalent
4309 	 */
4310 	for (i = 0; i <= old->curframe; i++) {
4311 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
4312 			return false;
4313 		if (!func_states_equal(old->frame[i], cur->frame[i]))
4314 			return false;
4315 	}
4316 	return true;
4317 }
4318 
4319 /* A write screens off any subsequent reads; but write marks come from the
4320  * straight-line code between a state and its parent.  When we arrive at an
4321  * equivalent state (jump target or such) we didn't arrive by the straight-line
4322  * code, so read marks in the state must propagate to the parent regardless
4323  * of the state's write marks. That's what 'parent == state->parent' comparison
4324  * in mark_reg_read() and mark_stack_slot_read() is for.
4325  */
4326 static int propagate_liveness(struct bpf_verifier_env *env,
4327 			      const struct bpf_verifier_state *vstate,
4328 			      struct bpf_verifier_state *vparent)
4329 {
4330 	int i, frame, err = 0;
4331 	struct bpf_func_state *state, *parent;
4332 
4333 	if (vparent->curframe != vstate->curframe) {
4334 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
4335 		     vparent->curframe, vstate->curframe);
4336 		return -EFAULT;
4337 	}
4338 	/* Propagate read liveness of registers... */
4339 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
4340 	/* We don't need to worry about FP liveness because it's read-only */
4341 	for (i = 0; i < BPF_REG_FP; i++) {
4342 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
4343 			continue;
4344 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
4345 			err = mark_reg_read(env, vstate, vparent, i);
4346 			if (err)
4347 				return err;
4348 		}
4349 	}
4350 
4351 	/* ... and stack slots */
4352 	for (frame = 0; frame <= vstate->curframe; frame++) {
4353 		state = vstate->frame[frame];
4354 		parent = vparent->frame[frame];
4355 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
4356 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
4357 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
4358 				continue;
4359 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
4360 				mark_stack_slot_read(env, vstate, vparent, i, frame);
4361 		}
4362 	}
4363 	return err;
4364 }
4365 
4366 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
4367 {
4368 	struct bpf_verifier_state_list *new_sl;
4369 	struct bpf_verifier_state_list *sl;
4370 	struct bpf_verifier_state *cur = env->cur_state;
4371 	int i, j, err;
4372 
4373 	sl = env->explored_states[insn_idx];
4374 	if (!sl)
4375 		/* this 'insn_idx' instruction wasn't marked, so we will not
4376 		 * be doing state search here
4377 		 */
4378 		return 0;
4379 
4380 	while (sl != STATE_LIST_MARK) {
4381 		if (states_equal(env, &sl->state, cur)) {
4382 			/* reached equivalent register/stack state,
4383 			 * prune the search.
4384 			 * Registers read by the continuation are read by us.
4385 			 * If we have any write marks in env->cur_state, they
4386 			 * will prevent corresponding reads in the continuation
4387 			 * from reaching our parent (an explored_state).  Our
4388 			 * own state will get the read marks recorded, but
4389 			 * they'll be immediately forgotten as we're pruning
4390 			 * this state and will pop a new one.
4391 			 */
4392 			err = propagate_liveness(env, &sl->state, cur);
4393 			if (err)
4394 				return err;
4395 			return 1;
4396 		}
4397 		sl = sl->next;
4398 	}
4399 
4400 	/* there were no equivalent states, remember current one.
4401 	 * technically the current state is not proven to be safe yet,
4402 	 * but it will either reach outer most bpf_exit (which means it's safe)
4403 	 * or it will be rejected. Since there are no loops, we won't be
4404 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
4405 	 * again on the way to bpf_exit
4406 	 */
4407 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
4408 	if (!new_sl)
4409 		return -ENOMEM;
4410 
4411 	/* add new state to the head of linked list */
4412 	err = copy_verifier_state(&new_sl->state, cur);
4413 	if (err) {
4414 		free_verifier_state(&new_sl->state, false);
4415 		kfree(new_sl);
4416 		return err;
4417 	}
4418 	new_sl->next = env->explored_states[insn_idx];
4419 	env->explored_states[insn_idx] = new_sl;
4420 	/* connect new state to parentage chain */
4421 	cur->parent = &new_sl->state;
4422 	/* clear write marks in current state: the writes we did are not writes
4423 	 * our child did, so they don't screen off its reads from us.
4424 	 * (There are no read marks in current state, because reads always mark
4425 	 * their parent and current state never has children yet.  Only
4426 	 * explored_states can get read marks.)
4427 	 */
4428 	for (i = 0; i < BPF_REG_FP; i++)
4429 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
4430 
4431 	/* all stack frames are accessible from callee, clear them all */
4432 	for (j = 0; j <= cur->curframe; j++) {
4433 		struct bpf_func_state *frame = cur->frame[j];
4434 
4435 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
4436 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
4437 	}
4438 	return 0;
4439 }
4440 
4441 static int do_check(struct bpf_verifier_env *env)
4442 {
4443 	struct bpf_verifier_state *state;
4444 	struct bpf_insn *insns = env->prog->insnsi;
4445 	struct bpf_reg_state *regs;
4446 	int insn_cnt = env->prog->len, i;
4447 	int insn_idx, prev_insn_idx = 0;
4448 	int insn_processed = 0;
4449 	bool do_print_state = false;
4450 
4451 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
4452 	if (!state)
4453 		return -ENOMEM;
4454 	state->curframe = 0;
4455 	state->parent = NULL;
4456 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
4457 	if (!state->frame[0]) {
4458 		kfree(state);
4459 		return -ENOMEM;
4460 	}
4461 	env->cur_state = state;
4462 	init_func_state(env, state->frame[0],
4463 			BPF_MAIN_FUNC /* callsite */,
4464 			0 /* frameno */,
4465 			0 /* subprogno, zero == main subprog */);
4466 	insn_idx = 0;
4467 	for (;;) {
4468 		struct bpf_insn *insn;
4469 		u8 class;
4470 		int err;
4471 
4472 		if (insn_idx >= insn_cnt) {
4473 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
4474 				insn_idx, insn_cnt);
4475 			return -EFAULT;
4476 		}
4477 
4478 		insn = &insns[insn_idx];
4479 		class = BPF_CLASS(insn->code);
4480 
4481 		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
4482 			verbose(env,
4483 				"BPF program is too large. Processed %d insn\n",
4484 				insn_processed);
4485 			return -E2BIG;
4486 		}
4487 
4488 		err = is_state_visited(env, insn_idx);
4489 		if (err < 0)
4490 			return err;
4491 		if (err == 1) {
4492 			/* found equivalent state, can prune the search */
4493 			if (env->log.level) {
4494 				if (do_print_state)
4495 					verbose(env, "\nfrom %d to %d: safe\n",
4496 						prev_insn_idx, insn_idx);
4497 				else
4498 					verbose(env, "%d: safe\n", insn_idx);
4499 			}
4500 			goto process_bpf_exit;
4501 		}
4502 
4503 		if (need_resched())
4504 			cond_resched();
4505 
4506 		if (env->log.level > 1 || (env->log.level && do_print_state)) {
4507 			if (env->log.level > 1)
4508 				verbose(env, "%d:", insn_idx);
4509 			else
4510 				verbose(env, "\nfrom %d to %d:",
4511 					prev_insn_idx, insn_idx);
4512 			print_verifier_state(env, state->frame[state->curframe]);
4513 			do_print_state = false;
4514 		}
4515 
4516 		if (env->log.level) {
4517 			const struct bpf_insn_cbs cbs = {
4518 				.cb_print	= verbose,
4519 			};
4520 
4521 			verbose(env, "%d: ", insn_idx);
4522 			print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
4523 		}
4524 
4525 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
4526 			err = bpf_prog_offload_verify_insn(env, insn_idx,
4527 							   prev_insn_idx);
4528 			if (err)
4529 				return err;
4530 		}
4531 
4532 		regs = cur_regs(env);
4533 		env->insn_aux_data[insn_idx].seen = true;
4534 		if (class == BPF_ALU || class == BPF_ALU64) {
4535 			err = check_alu_op(env, insn);
4536 			if (err)
4537 				return err;
4538 
4539 		} else if (class == BPF_LDX) {
4540 			enum bpf_reg_type *prev_src_type, src_reg_type;
4541 
4542 			/* check for reserved fields is already done */
4543 
4544 			/* check src operand */
4545 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4546 			if (err)
4547 				return err;
4548 
4549 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4550 			if (err)
4551 				return err;
4552 
4553 			src_reg_type = regs[insn->src_reg].type;
4554 
4555 			/* check that memory (src_reg + off) is readable,
4556 			 * the state of dst_reg will be updated by this func
4557 			 */
4558 			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
4559 					       BPF_SIZE(insn->code), BPF_READ,
4560 					       insn->dst_reg);
4561 			if (err)
4562 				return err;
4563 
4564 			prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
4565 
4566 			if (*prev_src_type == NOT_INIT) {
4567 				/* saw a valid insn
4568 				 * dst_reg = *(u32 *)(src_reg + off)
4569 				 * save type to validate intersecting paths
4570 				 */
4571 				*prev_src_type = src_reg_type;
4572 
4573 			} else if (src_reg_type != *prev_src_type &&
4574 				   (src_reg_type == PTR_TO_CTX ||
4575 				    *prev_src_type == PTR_TO_CTX)) {
4576 				/* ABuser program is trying to use the same insn
4577 				 * dst_reg = *(u32*) (src_reg + off)
4578 				 * with different pointer types:
4579 				 * src_reg == ctx in one branch and
4580 				 * src_reg == stack|map in some other branch.
4581 				 * Reject it.
4582 				 */
4583 				verbose(env, "same insn cannot be used with different pointers\n");
4584 				return -EINVAL;
4585 			}
4586 
4587 		} else if (class == BPF_STX) {
4588 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
4589 
4590 			if (BPF_MODE(insn->code) == BPF_XADD) {
4591 				err = check_xadd(env, insn_idx, insn);
4592 				if (err)
4593 					return err;
4594 				insn_idx++;
4595 				continue;
4596 			}
4597 
4598 			/* check src1 operand */
4599 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4600 			if (err)
4601 				return err;
4602 			/* check src2 operand */
4603 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4604 			if (err)
4605 				return err;
4606 
4607 			dst_reg_type = regs[insn->dst_reg].type;
4608 
4609 			/* check that memory (dst_reg + off) is writeable */
4610 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4611 					       BPF_SIZE(insn->code), BPF_WRITE,
4612 					       insn->src_reg);
4613 			if (err)
4614 				return err;
4615 
4616 			prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
4617 
4618 			if (*prev_dst_type == NOT_INIT) {
4619 				*prev_dst_type = dst_reg_type;
4620 			} else if (dst_reg_type != *prev_dst_type &&
4621 				   (dst_reg_type == PTR_TO_CTX ||
4622 				    *prev_dst_type == PTR_TO_CTX)) {
4623 				verbose(env, "same insn cannot be used with different pointers\n");
4624 				return -EINVAL;
4625 			}
4626 
4627 		} else if (class == BPF_ST) {
4628 			if (BPF_MODE(insn->code) != BPF_MEM ||
4629 			    insn->src_reg != BPF_REG_0) {
4630 				verbose(env, "BPF_ST uses reserved fields\n");
4631 				return -EINVAL;
4632 			}
4633 			/* check src operand */
4634 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4635 			if (err)
4636 				return err;
4637 
4638 			/* check that memory (dst_reg + off) is writeable */
4639 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4640 					       BPF_SIZE(insn->code), BPF_WRITE,
4641 					       -1);
4642 			if (err)
4643 				return err;
4644 
4645 		} else if (class == BPF_JMP) {
4646 			u8 opcode = BPF_OP(insn->code);
4647 
4648 			if (opcode == BPF_CALL) {
4649 				if (BPF_SRC(insn->code) != BPF_K ||
4650 				    insn->off != 0 ||
4651 				    (insn->src_reg != BPF_REG_0 &&
4652 				     insn->src_reg != BPF_PSEUDO_CALL) ||
4653 				    insn->dst_reg != BPF_REG_0) {
4654 					verbose(env, "BPF_CALL uses reserved fields\n");
4655 					return -EINVAL;
4656 				}
4657 
4658 				if (insn->src_reg == BPF_PSEUDO_CALL)
4659 					err = check_func_call(env, insn, &insn_idx);
4660 				else
4661 					err = check_helper_call(env, insn->imm, insn_idx);
4662 				if (err)
4663 					return err;
4664 
4665 			} else if (opcode == BPF_JA) {
4666 				if (BPF_SRC(insn->code) != BPF_K ||
4667 				    insn->imm != 0 ||
4668 				    insn->src_reg != BPF_REG_0 ||
4669 				    insn->dst_reg != BPF_REG_0) {
4670 					verbose(env, "BPF_JA uses reserved fields\n");
4671 					return -EINVAL;
4672 				}
4673 
4674 				insn_idx += insn->off + 1;
4675 				continue;
4676 
4677 			} else if (opcode == BPF_EXIT) {
4678 				if (BPF_SRC(insn->code) != BPF_K ||
4679 				    insn->imm != 0 ||
4680 				    insn->src_reg != BPF_REG_0 ||
4681 				    insn->dst_reg != BPF_REG_0) {
4682 					verbose(env, "BPF_EXIT uses reserved fields\n");
4683 					return -EINVAL;
4684 				}
4685 
4686 				if (state->curframe) {
4687 					/* exit from nested function */
4688 					prev_insn_idx = insn_idx;
4689 					err = prepare_func_exit(env, &insn_idx);
4690 					if (err)
4691 						return err;
4692 					do_print_state = true;
4693 					continue;
4694 				}
4695 
4696 				/* eBPF calling convetion is such that R0 is used
4697 				 * to return the value from eBPF program.
4698 				 * Make sure that it's readable at this time
4699 				 * of bpf_exit, which means that program wrote
4700 				 * something into it earlier
4701 				 */
4702 				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4703 				if (err)
4704 					return err;
4705 
4706 				if (is_pointer_value(env, BPF_REG_0)) {
4707 					verbose(env, "R0 leaks addr as return value\n");
4708 					return -EACCES;
4709 				}
4710 
4711 				err = check_return_code(env);
4712 				if (err)
4713 					return err;
4714 process_bpf_exit:
4715 				err = pop_stack(env, &prev_insn_idx, &insn_idx);
4716 				if (err < 0) {
4717 					if (err != -ENOENT)
4718 						return err;
4719 					break;
4720 				} else {
4721 					do_print_state = true;
4722 					continue;
4723 				}
4724 			} else {
4725 				err = check_cond_jmp_op(env, insn, &insn_idx);
4726 				if (err)
4727 					return err;
4728 			}
4729 		} else if (class == BPF_LD) {
4730 			u8 mode = BPF_MODE(insn->code);
4731 
4732 			if (mode == BPF_ABS || mode == BPF_IND) {
4733 				err = check_ld_abs(env, insn);
4734 				if (err)
4735 					return err;
4736 
4737 			} else if (mode == BPF_IMM) {
4738 				err = check_ld_imm(env, insn);
4739 				if (err)
4740 					return err;
4741 
4742 				insn_idx++;
4743 				env->insn_aux_data[insn_idx].seen = true;
4744 			} else {
4745 				verbose(env, "invalid BPF_LD mode\n");
4746 				return -EINVAL;
4747 			}
4748 		} else {
4749 			verbose(env, "unknown insn class %d\n", class);
4750 			return -EINVAL;
4751 		}
4752 
4753 		insn_idx++;
4754 	}
4755 
4756 	verbose(env, "processed %d insns, stack depth ", insn_processed);
4757 	for (i = 0; i < env->subprog_cnt + 1; i++) {
4758 		u32 depth = env->subprog_stack_depth[i];
4759 
4760 		verbose(env, "%d", depth);
4761 		if (i + 1 < env->subprog_cnt + 1)
4762 			verbose(env, "+");
4763 	}
4764 	verbose(env, "\n");
4765 	env->prog->aux->stack_depth = env->subprog_stack_depth[0];
4766 	return 0;
4767 }
4768 
4769 static int check_map_prealloc(struct bpf_map *map)
4770 {
4771 	return (map->map_type != BPF_MAP_TYPE_HASH &&
4772 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
4773 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
4774 		!(map->map_flags & BPF_F_NO_PREALLOC);
4775 }
4776 
4777 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
4778 					struct bpf_map *map,
4779 					struct bpf_prog *prog)
4780 
4781 {
4782 	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
4783 	 * preallocated hash maps, since doing memory allocation
4784 	 * in overflow_handler can crash depending on where nmi got
4785 	 * triggered.
4786 	 */
4787 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
4788 		if (!check_map_prealloc(map)) {
4789 			verbose(env, "perf_event programs can only use preallocated hash map\n");
4790 			return -EINVAL;
4791 		}
4792 		if (map->inner_map_meta &&
4793 		    !check_map_prealloc(map->inner_map_meta)) {
4794 			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
4795 			return -EINVAL;
4796 		}
4797 	}
4798 	return 0;
4799 }
4800 
4801 /* look for pseudo eBPF instructions that access map FDs and
4802  * replace them with actual map pointers
4803  */
4804 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
4805 {
4806 	struct bpf_insn *insn = env->prog->insnsi;
4807 	int insn_cnt = env->prog->len;
4808 	int i, j, err;
4809 
4810 	err = bpf_prog_calc_tag(env->prog);
4811 	if (err)
4812 		return err;
4813 
4814 	for (i = 0; i < insn_cnt; i++, insn++) {
4815 		if (BPF_CLASS(insn->code) == BPF_LDX &&
4816 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
4817 			verbose(env, "BPF_LDX uses reserved fields\n");
4818 			return -EINVAL;
4819 		}
4820 
4821 		if (BPF_CLASS(insn->code) == BPF_STX &&
4822 		    ((BPF_MODE(insn->code) != BPF_MEM &&
4823 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
4824 			verbose(env, "BPF_STX uses reserved fields\n");
4825 			return -EINVAL;
4826 		}
4827 
4828 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
4829 			struct bpf_map *map;
4830 			struct fd f;
4831 
4832 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
4833 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
4834 			    insn[1].off != 0) {
4835 				verbose(env, "invalid bpf_ld_imm64 insn\n");
4836 				return -EINVAL;
4837 			}
4838 
4839 			if (insn->src_reg == 0)
4840 				/* valid generic load 64-bit imm */
4841 				goto next_insn;
4842 
4843 			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
4844 				verbose(env,
4845 					"unrecognized bpf_ld_imm64 insn\n");
4846 				return -EINVAL;
4847 			}
4848 
4849 			f = fdget(insn->imm);
4850 			map = __bpf_map_get(f);
4851 			if (IS_ERR(map)) {
4852 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
4853 					insn->imm);
4854 				return PTR_ERR(map);
4855 			}
4856 
4857 			err = check_map_prog_compatibility(env, map, env->prog);
4858 			if (err) {
4859 				fdput(f);
4860 				return err;
4861 			}
4862 
4863 			/* store map pointer inside BPF_LD_IMM64 instruction */
4864 			insn[0].imm = (u32) (unsigned long) map;
4865 			insn[1].imm = ((u64) (unsigned long) map) >> 32;
4866 
4867 			/* check whether we recorded this map already */
4868 			for (j = 0; j < env->used_map_cnt; j++)
4869 				if (env->used_maps[j] == map) {
4870 					fdput(f);
4871 					goto next_insn;
4872 				}
4873 
4874 			if (env->used_map_cnt >= MAX_USED_MAPS) {
4875 				fdput(f);
4876 				return -E2BIG;
4877 			}
4878 
4879 			/* hold the map. If the program is rejected by verifier,
4880 			 * the map will be released by release_maps() or it
4881 			 * will be used by the valid program until it's unloaded
4882 			 * and all maps are released in free_bpf_prog_info()
4883 			 */
4884 			map = bpf_map_inc(map, false);
4885 			if (IS_ERR(map)) {
4886 				fdput(f);
4887 				return PTR_ERR(map);
4888 			}
4889 			env->used_maps[env->used_map_cnt++] = map;
4890 
4891 			fdput(f);
4892 next_insn:
4893 			insn++;
4894 			i++;
4895 		}
4896 	}
4897 
4898 	/* now all pseudo BPF_LD_IMM64 instructions load valid
4899 	 * 'struct bpf_map *' into a register instead of user map_fd.
4900 	 * These pointers will be used later by verifier to validate map access.
4901 	 */
4902 	return 0;
4903 }
4904 
4905 /* drop refcnt of maps used by the rejected program */
4906 static void release_maps(struct bpf_verifier_env *env)
4907 {
4908 	int i;
4909 
4910 	for (i = 0; i < env->used_map_cnt; i++)
4911 		bpf_map_put(env->used_maps[i]);
4912 }
4913 
4914 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
4915 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
4916 {
4917 	struct bpf_insn *insn = env->prog->insnsi;
4918 	int insn_cnt = env->prog->len;
4919 	int i;
4920 
4921 	for (i = 0; i < insn_cnt; i++, insn++)
4922 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
4923 			insn->src_reg = 0;
4924 }
4925 
4926 /* single env->prog->insni[off] instruction was replaced with the range
4927  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
4928  * [0, off) and [off, end) to new locations, so the patched range stays zero
4929  */
4930 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
4931 				u32 off, u32 cnt)
4932 {
4933 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
4934 	int i;
4935 
4936 	if (cnt == 1)
4937 		return 0;
4938 	new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
4939 	if (!new_data)
4940 		return -ENOMEM;
4941 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
4942 	memcpy(new_data + off + cnt - 1, old_data + off,
4943 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
4944 	for (i = off; i < off + cnt - 1; i++)
4945 		new_data[i].seen = true;
4946 	env->insn_aux_data = new_data;
4947 	vfree(old_data);
4948 	return 0;
4949 }
4950 
4951 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
4952 {
4953 	int i;
4954 
4955 	if (len == 1)
4956 		return;
4957 	for (i = 0; i < env->subprog_cnt; i++) {
4958 		if (env->subprog_starts[i] < off)
4959 			continue;
4960 		env->subprog_starts[i] += len - 1;
4961 	}
4962 }
4963 
4964 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
4965 					    const struct bpf_insn *patch, u32 len)
4966 {
4967 	struct bpf_prog *new_prog;
4968 
4969 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4970 	if (!new_prog)
4971 		return NULL;
4972 	if (adjust_insn_aux_data(env, new_prog->len, off, len))
4973 		return NULL;
4974 	adjust_subprog_starts(env, off, len);
4975 	return new_prog;
4976 }
4977 
4978 /* The verifier does more data flow analysis than llvm and will not explore
4979  * branches that are dead at run time. Malicious programs can have dead code
4980  * too. Therefore replace all dead at-run-time code with nops.
4981  */
4982 static void sanitize_dead_code(struct bpf_verifier_env *env)
4983 {
4984 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
4985 	struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
4986 	struct bpf_insn *insn = env->prog->insnsi;
4987 	const int insn_cnt = env->prog->len;
4988 	int i;
4989 
4990 	for (i = 0; i < insn_cnt; i++) {
4991 		if (aux_data[i].seen)
4992 			continue;
4993 		memcpy(insn + i, &nop, sizeof(nop));
4994 	}
4995 }
4996 
4997 /* convert load instructions that access fields of 'struct __sk_buff'
4998  * into sequence of instructions that access fields of 'struct sk_buff'
4999  */
5000 static int convert_ctx_accesses(struct bpf_verifier_env *env)
5001 {
5002 	const struct bpf_verifier_ops *ops = env->ops;
5003 	int i, cnt, size, ctx_field_size, delta = 0;
5004 	const int insn_cnt = env->prog->len;
5005 	struct bpf_insn insn_buf[16], *insn;
5006 	struct bpf_prog *new_prog;
5007 	enum bpf_access_type type;
5008 	bool is_narrower_load;
5009 	u32 target_size;
5010 
5011 	if (ops->gen_prologue) {
5012 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
5013 					env->prog);
5014 		if (cnt >= ARRAY_SIZE(insn_buf)) {
5015 			verbose(env, "bpf verifier is misconfigured\n");
5016 			return -EINVAL;
5017 		} else if (cnt) {
5018 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
5019 			if (!new_prog)
5020 				return -ENOMEM;
5021 
5022 			env->prog = new_prog;
5023 			delta += cnt - 1;
5024 		}
5025 	}
5026 
5027 	if (!ops->convert_ctx_access)
5028 		return 0;
5029 
5030 	insn = env->prog->insnsi + delta;
5031 
5032 	for (i = 0; i < insn_cnt; i++, insn++) {
5033 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
5034 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
5035 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
5036 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
5037 			type = BPF_READ;
5038 		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
5039 			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
5040 			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
5041 			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
5042 			type = BPF_WRITE;
5043 		else
5044 			continue;
5045 
5046 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
5047 			continue;
5048 
5049 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
5050 		size = BPF_LDST_BYTES(insn);
5051 
5052 		/* If the read access is a narrower load of the field,
5053 		 * convert to a 4/8-byte load, to minimum program type specific
5054 		 * convert_ctx_access changes. If conversion is successful,
5055 		 * we will apply proper mask to the result.
5056 		 */
5057 		is_narrower_load = size < ctx_field_size;
5058 		if (is_narrower_load) {
5059 			u32 off = insn->off;
5060 			u8 size_code;
5061 
5062 			if (type == BPF_WRITE) {
5063 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
5064 				return -EINVAL;
5065 			}
5066 
5067 			size_code = BPF_H;
5068 			if (ctx_field_size == 4)
5069 				size_code = BPF_W;
5070 			else if (ctx_field_size == 8)
5071 				size_code = BPF_DW;
5072 
5073 			insn->off = off & ~(ctx_field_size - 1);
5074 			insn->code = BPF_LDX | BPF_MEM | size_code;
5075 		}
5076 
5077 		target_size = 0;
5078 		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
5079 					      &target_size);
5080 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
5081 		    (ctx_field_size && !target_size)) {
5082 			verbose(env, "bpf verifier is misconfigured\n");
5083 			return -EINVAL;
5084 		}
5085 
5086 		if (is_narrower_load && size < target_size) {
5087 			if (ctx_field_size <= 4)
5088 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
5089 								(1 << size * 8) - 1);
5090 			else
5091 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
5092 								(1 << size * 8) - 1);
5093 		}
5094 
5095 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
5096 		if (!new_prog)
5097 			return -ENOMEM;
5098 
5099 		delta += cnt - 1;
5100 
5101 		/* keep walking new program and skip insns we just inserted */
5102 		env->prog = new_prog;
5103 		insn      = new_prog->insnsi + i + delta;
5104 	}
5105 
5106 	return 0;
5107 }
5108 
5109 static int jit_subprogs(struct bpf_verifier_env *env)
5110 {
5111 	struct bpf_prog *prog = env->prog, **func, *tmp;
5112 	int i, j, subprog_start, subprog_end = 0, len, subprog;
5113 	struct bpf_insn *insn;
5114 	void *old_bpf_func;
5115 	int err = -ENOMEM;
5116 
5117 	if (env->subprog_cnt == 0)
5118 		return 0;
5119 
5120 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5121 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5122 		    insn->src_reg != BPF_PSEUDO_CALL)
5123 			continue;
5124 		subprog = find_subprog(env, i + insn->imm + 1);
5125 		if (subprog < 0) {
5126 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5127 				  i + insn->imm + 1);
5128 			return -EFAULT;
5129 		}
5130 		/* temporarily remember subprog id inside insn instead of
5131 		 * aux_data, since next loop will split up all insns into funcs
5132 		 */
5133 		insn->off = subprog + 1;
5134 		/* remember original imm in case JIT fails and fallback
5135 		 * to interpreter will be needed
5136 		 */
5137 		env->insn_aux_data[i].call_imm = insn->imm;
5138 		/* point imm to __bpf_call_base+1 from JITs point of view */
5139 		insn->imm = 1;
5140 	}
5141 
5142 	func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
5143 	if (!func)
5144 		return -ENOMEM;
5145 
5146 	for (i = 0; i <= env->subprog_cnt; i++) {
5147 		subprog_start = subprog_end;
5148 		if (env->subprog_cnt == i)
5149 			subprog_end = prog->len;
5150 		else
5151 			subprog_end = env->subprog_starts[i];
5152 
5153 		len = subprog_end - subprog_start;
5154 		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
5155 		if (!func[i])
5156 			goto out_free;
5157 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
5158 		       len * sizeof(struct bpf_insn));
5159 		func[i]->type = prog->type;
5160 		func[i]->len = len;
5161 		if (bpf_prog_calc_tag(func[i]))
5162 			goto out_free;
5163 		func[i]->is_func = 1;
5164 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
5165 		 * Long term would need debug info to populate names
5166 		 */
5167 		func[i]->aux->name[0] = 'F';
5168 		func[i]->aux->stack_depth = env->subprog_stack_depth[i];
5169 		func[i]->jit_requested = 1;
5170 		func[i] = bpf_int_jit_compile(func[i]);
5171 		if (!func[i]->jited) {
5172 			err = -ENOTSUPP;
5173 			goto out_free;
5174 		}
5175 		cond_resched();
5176 	}
5177 	/* at this point all bpf functions were successfully JITed
5178 	 * now populate all bpf_calls with correct addresses and
5179 	 * run last pass of JIT
5180 	 */
5181 	for (i = 0; i <= env->subprog_cnt; i++) {
5182 		insn = func[i]->insnsi;
5183 		for (j = 0; j < func[i]->len; j++, insn++) {
5184 			if (insn->code != (BPF_JMP | BPF_CALL) ||
5185 			    insn->src_reg != BPF_PSEUDO_CALL)
5186 				continue;
5187 			subprog = insn->off;
5188 			insn->off = 0;
5189 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5190 				func[subprog]->bpf_func -
5191 				__bpf_call_base;
5192 		}
5193 	}
5194 	for (i = 0; i <= env->subprog_cnt; i++) {
5195 		old_bpf_func = func[i]->bpf_func;
5196 		tmp = bpf_int_jit_compile(func[i]);
5197 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
5198 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
5199 			err = -EFAULT;
5200 			goto out_free;
5201 		}
5202 		cond_resched();
5203 	}
5204 
5205 	/* finally lock prog and jit images for all functions and
5206 	 * populate kallsysm
5207 	 */
5208 	for (i = 0; i <= env->subprog_cnt; i++) {
5209 		bpf_prog_lock_ro(func[i]);
5210 		bpf_prog_kallsyms_add(func[i]);
5211 	}
5212 
5213 	/* Last step: make now unused interpreter insns from main
5214 	 * prog consistent for later dump requests, so they can
5215 	 * later look the same as if they were interpreted only.
5216 	 */
5217 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5218 		unsigned long addr;
5219 
5220 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5221 		    insn->src_reg != BPF_PSEUDO_CALL)
5222 			continue;
5223 		insn->off = env->insn_aux_data[i].call_imm;
5224 		subprog = find_subprog(env, i + insn->off + 1);
5225 		addr  = (unsigned long)func[subprog + 1]->bpf_func;
5226 		addr &= PAGE_MASK;
5227 		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5228 			    addr - __bpf_call_base;
5229 	}
5230 
5231 	prog->jited = 1;
5232 	prog->bpf_func = func[0]->bpf_func;
5233 	prog->aux->func = func;
5234 	prog->aux->func_cnt = env->subprog_cnt + 1;
5235 	return 0;
5236 out_free:
5237 	for (i = 0; i <= env->subprog_cnt; i++)
5238 		if (func[i])
5239 			bpf_jit_free(func[i]);
5240 	kfree(func);
5241 	/* cleanup main prog to be interpreted */
5242 	prog->jit_requested = 0;
5243 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5244 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5245 		    insn->src_reg != BPF_PSEUDO_CALL)
5246 			continue;
5247 		insn->off = 0;
5248 		insn->imm = env->insn_aux_data[i].call_imm;
5249 	}
5250 	return err;
5251 }
5252 
5253 static int fixup_call_args(struct bpf_verifier_env *env)
5254 {
5255 	struct bpf_prog *prog = env->prog;
5256 	struct bpf_insn *insn = prog->insnsi;
5257 	int i, depth;
5258 
5259 	if (env->prog->jit_requested)
5260 		if (jit_subprogs(env) == 0)
5261 			return 0;
5262 
5263 	for (i = 0; i < prog->len; i++, insn++) {
5264 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5265 		    insn->src_reg != BPF_PSEUDO_CALL)
5266 			continue;
5267 		depth = get_callee_stack_depth(env, insn, i);
5268 		if (depth < 0)
5269 			return depth;
5270 		bpf_patch_call_args(insn, depth);
5271 	}
5272 	return 0;
5273 }
5274 
5275 /* fixup insn->imm field of bpf_call instructions
5276  * and inline eligible helpers as explicit sequence of BPF instructions
5277  *
5278  * this function is called after eBPF program passed verification
5279  */
5280 static int fixup_bpf_calls(struct bpf_verifier_env *env)
5281 {
5282 	struct bpf_prog *prog = env->prog;
5283 	struct bpf_insn *insn = prog->insnsi;
5284 	const struct bpf_func_proto *fn;
5285 	const int insn_cnt = prog->len;
5286 	struct bpf_insn insn_buf[16];
5287 	struct bpf_prog *new_prog;
5288 	struct bpf_map *map_ptr;
5289 	int i, cnt, delta = 0;
5290 
5291 	for (i = 0; i < insn_cnt; i++, insn++) {
5292 		if (insn->code != (BPF_JMP | BPF_CALL))
5293 			continue;
5294 		if (insn->src_reg == BPF_PSEUDO_CALL)
5295 			continue;
5296 
5297 		if (insn->imm == BPF_FUNC_get_route_realm)
5298 			prog->dst_needed = 1;
5299 		if (insn->imm == BPF_FUNC_get_prandom_u32)
5300 			bpf_user_rnd_init_once();
5301 		if (insn->imm == BPF_FUNC_override_return)
5302 			prog->kprobe_override = 1;
5303 		if (insn->imm == BPF_FUNC_tail_call) {
5304 			/* If we tail call into other programs, we
5305 			 * cannot make any assumptions since they can
5306 			 * be replaced dynamically during runtime in
5307 			 * the program array.
5308 			 */
5309 			prog->cb_access = 1;
5310 			env->prog->aux->stack_depth = MAX_BPF_STACK;
5311 
5312 			/* mark bpf_tail_call as different opcode to avoid
5313 			 * conditional branch in the interpeter for every normal
5314 			 * call and to prevent accidental JITing by JIT compiler
5315 			 * that doesn't support bpf_tail_call yet
5316 			 */
5317 			insn->imm = 0;
5318 			insn->code = BPF_JMP | BPF_TAIL_CALL;
5319 			continue;
5320 		}
5321 
5322 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
5323 		 * handlers are currently limited to 64 bit only.
5324 		 */
5325 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
5326 		    insn->imm == BPF_FUNC_map_lookup_elem) {
5327 			map_ptr = env->insn_aux_data[i + delta].map_ptr;
5328 			if (map_ptr == BPF_MAP_PTR_POISON ||
5329 			    !map_ptr->ops->map_gen_lookup)
5330 				goto patch_call_imm;
5331 
5332 			cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
5333 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
5334 				verbose(env, "bpf verifier is misconfigured\n");
5335 				return -EINVAL;
5336 			}
5337 
5338 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
5339 						       cnt);
5340 			if (!new_prog)
5341 				return -ENOMEM;
5342 
5343 			delta += cnt - 1;
5344 
5345 			/* keep walking new program and skip insns we just inserted */
5346 			env->prog = prog = new_prog;
5347 			insn      = new_prog->insnsi + i + delta;
5348 			continue;
5349 		}
5350 
5351 		if (insn->imm == BPF_FUNC_redirect_map) {
5352 			/* Note, we cannot use prog directly as imm as subsequent
5353 			 * rewrites would still change the prog pointer. The only
5354 			 * stable address we can use is aux, which also works with
5355 			 * prog clones during blinding.
5356 			 */
5357 			u64 addr = (unsigned long)prog->aux;
5358 			struct bpf_insn r4_ld[] = {
5359 				BPF_LD_IMM64(BPF_REG_4, addr),
5360 				*insn,
5361 			};
5362 			cnt = ARRAY_SIZE(r4_ld);
5363 
5364 			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
5365 			if (!new_prog)
5366 				return -ENOMEM;
5367 
5368 			delta    += cnt - 1;
5369 			env->prog = prog = new_prog;
5370 			insn      = new_prog->insnsi + i + delta;
5371 		}
5372 patch_call_imm:
5373 		fn = env->ops->get_func_proto(insn->imm);
5374 		/* all functions that have prototype and verifier allowed
5375 		 * programs to call them, must be real in-kernel functions
5376 		 */
5377 		if (!fn->func) {
5378 			verbose(env,
5379 				"kernel subsystem misconfigured func %s#%d\n",
5380 				func_id_name(insn->imm), insn->imm);
5381 			return -EFAULT;
5382 		}
5383 		insn->imm = fn->func - __bpf_call_base;
5384 	}
5385 
5386 	return 0;
5387 }
5388 
5389 static void free_states(struct bpf_verifier_env *env)
5390 {
5391 	struct bpf_verifier_state_list *sl, *sln;
5392 	int i;
5393 
5394 	if (!env->explored_states)
5395 		return;
5396 
5397 	for (i = 0; i < env->prog->len; i++) {
5398 		sl = env->explored_states[i];
5399 
5400 		if (sl)
5401 			while (sl != STATE_LIST_MARK) {
5402 				sln = sl->next;
5403 				free_verifier_state(&sl->state, false);
5404 				kfree(sl);
5405 				sl = sln;
5406 			}
5407 	}
5408 
5409 	kfree(env->explored_states);
5410 }
5411 
5412 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5413 {
5414 	struct bpf_verifier_env *env;
5415 	struct bpf_verifer_log *log;
5416 	int ret = -EINVAL;
5417 
5418 	/* no program is valid */
5419 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
5420 		return -EINVAL;
5421 
5422 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
5423 	 * allocate/free it every time bpf_check() is called
5424 	 */
5425 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
5426 	if (!env)
5427 		return -ENOMEM;
5428 	log = &env->log;
5429 
5430 	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
5431 				     (*prog)->len);
5432 	ret = -ENOMEM;
5433 	if (!env->insn_aux_data)
5434 		goto err_free_env;
5435 	env->prog = *prog;
5436 	env->ops = bpf_verifier_ops[env->prog->type];
5437 
5438 	/* grab the mutex to protect few globals used by verifier */
5439 	mutex_lock(&bpf_verifier_lock);
5440 
5441 	if (attr->log_level || attr->log_buf || attr->log_size) {
5442 		/* user requested verbose verifier output
5443 		 * and supplied buffer to store the verification trace
5444 		 */
5445 		log->level = attr->log_level;
5446 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
5447 		log->len_total = attr->log_size;
5448 
5449 		ret = -EINVAL;
5450 		/* log attributes have to be sane */
5451 		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
5452 		    !log->level || !log->ubuf)
5453 			goto err_unlock;
5454 	}
5455 
5456 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
5457 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
5458 		env->strict_alignment = true;
5459 
5460 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
5461 		ret = bpf_prog_offload_verifier_prep(env);
5462 		if (ret)
5463 			goto err_unlock;
5464 	}
5465 
5466 	ret = replace_map_fd_with_map_ptr(env);
5467 	if (ret < 0)
5468 		goto skip_full_check;
5469 
5470 	env->explored_states = kcalloc(env->prog->len,
5471 				       sizeof(struct bpf_verifier_state_list *),
5472 				       GFP_USER);
5473 	ret = -ENOMEM;
5474 	if (!env->explored_states)
5475 		goto skip_full_check;
5476 
5477 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
5478 
5479 	ret = check_cfg(env);
5480 	if (ret < 0)
5481 		goto skip_full_check;
5482 
5483 	ret = do_check(env);
5484 	if (env->cur_state) {
5485 		free_verifier_state(env->cur_state, true);
5486 		env->cur_state = NULL;
5487 	}
5488 
5489 skip_full_check:
5490 	while (!pop_stack(env, NULL, NULL));
5491 	free_states(env);
5492 
5493 	if (ret == 0)
5494 		sanitize_dead_code(env);
5495 
5496 	if (ret == 0)
5497 		ret = check_max_stack_depth(env);
5498 
5499 	if (ret == 0)
5500 		/* program is valid, convert *(u32*)(ctx + off) accesses */
5501 		ret = convert_ctx_accesses(env);
5502 
5503 	if (ret == 0)
5504 		ret = fixup_bpf_calls(env);
5505 
5506 	if (ret == 0)
5507 		ret = fixup_call_args(env);
5508 
5509 	if (log->level && bpf_verifier_log_full(log))
5510 		ret = -ENOSPC;
5511 	if (log->level && !log->ubuf) {
5512 		ret = -EFAULT;
5513 		goto err_release_maps;
5514 	}
5515 
5516 	if (ret == 0 && env->used_map_cnt) {
5517 		/* if program passed verifier, update used_maps in bpf_prog_info */
5518 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
5519 							  sizeof(env->used_maps[0]),
5520 							  GFP_KERNEL);
5521 
5522 		if (!env->prog->aux->used_maps) {
5523 			ret = -ENOMEM;
5524 			goto err_release_maps;
5525 		}
5526 
5527 		memcpy(env->prog->aux->used_maps, env->used_maps,
5528 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
5529 		env->prog->aux->used_map_cnt = env->used_map_cnt;
5530 
5531 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
5532 		 * bpf_ld_imm64 instructions
5533 		 */
5534 		convert_pseudo_ld_imm64(env);
5535 	}
5536 
5537 err_release_maps:
5538 	if (!env->prog->aux->used_maps)
5539 		/* if we didn't copy map pointers into bpf_prog_info, release
5540 		 * them now. Otherwise free_bpf_prog_info() will release them.
5541 		 */
5542 		release_maps(env);
5543 	*prog = env->prog;
5544 err_unlock:
5545 	mutex_unlock(&bpf_verifier_lock);
5546 	vfree(env->insn_aux_data);
5547 err_free_env:
5548 	kfree(env);
5549 	return ret;
5550 }
5551