xref: /linux/kernel/bpf/syscall.c (revision 68f4e480b089abae26fbab0c38c3df3cbac3d79d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <crypto/sha2.h>
5 #include <linux/bpf.h>
6 #include <linux/bpf-cgroup.h>
7 #include <linux/bpf_trace.h>
8 #include <linux/bpf_lirc.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bsearch.h>
11 #include <linux/btf.h>
12 #include <linux/hex.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/sched/signal.h>
16 #include <linux/vmalloc.h>
17 #include <linux/mmzone.h>
18 #include <linux/anon_inodes.h>
19 #include <linux/fdtable.h>
20 #include <linux/file.h>
21 #include <linux/fs.h>
22 #include <linux/license.h>
23 #include <linux/filter.h>
24 #include <linux/kernel.h>
25 #include <linux/idr.h>
26 #include <linux/cred.h>
27 #include <linux/timekeeping.h>
28 #include <linux/ctype.h>
29 #include <linux/nospec.h>
30 #include <linux/audit.h>
31 #include <uapi/linux/btf.h>
32 #include <linux/pgtable.h>
33 #include <linux/bpf_lsm.h>
34 #include <linux/poll.h>
35 #include <linux/sort.h>
36 #include <linux/bpf-netns.h>
37 #include <linux/rcupdate_trace.h>
38 #include <linux/memcontrol.h>
39 #include <linux/trace_events.h>
40 #include <linux/tracepoint.h>
41 #include <linux/overflow.h>
42 #include <linux/cookie.h>
43 #include <linux/verification.h>
44 #include <linux/btf_ids.h>
45 
46 #include <net/netfilter/nf_bpf_link.h>
47 #include <net/netkit.h>
48 #include <net/tcx.h>
49 
50 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
51 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
52 			  (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
53 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
54 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
55 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
56 			IS_FD_HASH(map))
57 
58 #define BPF_OBJ_FLAG_MASK   (BPF_F_RDONLY | BPF_F_WRONLY)
59 
60 DEFINE_PER_CPU(int, bpf_prog_active);
61 DEFINE_COOKIE(bpf_map_cookie);
62 static DEFINE_IDR(prog_idr);
63 static DEFINE_SPINLOCK(prog_idr_lock);
64 static DEFINE_IDR(map_idr);
65 static DEFINE_SPINLOCK(map_idr_lock);
66 static DEFINE_IDR(link_idr);
67 static DEFINE_SPINLOCK(link_idr_lock);
68 
69 int sysctl_unprivileged_bpf_disabled __read_mostly =
70 	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
71 
72 static const struct bpf_map_ops * const bpf_map_types[] = {
73 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
74 #define BPF_MAP_TYPE(_id, _ops) \
75 	[_id] = &_ops,
76 #define BPF_LINK_TYPE(_id, _name)
77 #include <linux/bpf_types.h>
78 #undef BPF_PROG_TYPE
79 #undef BPF_MAP_TYPE
80 #undef BPF_LINK_TYPE
81 };
82 
83 /*
84  * If we're handed a bigger struct than we know of, ensure all the unknown bits
85  * are 0 - i.e. new user-space does not rely on any kernel feature extensions
86  * we don't know about yet.
87  *
88  * There is a ToCToU between this function call and the following
89  * copy_from_user() call. However, this is not a concern since this function is
90  * meant to be a future-proofing of bits.
91  */
92 int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
93 			     size_t expected_size,
94 			     size_t actual_size)
95 {
96 	int res;
97 
98 	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */
99 		return -E2BIG;
100 
101 	if (actual_size <= expected_size)
102 		return 0;
103 
104 	if (uaddr.is_kernel)
105 		res = memchr_inv(uaddr.kernel + expected_size, 0,
106 				 actual_size - expected_size) == NULL;
107 	else
108 		res = check_zeroed_user(uaddr.user + expected_size,
109 					actual_size - expected_size);
110 	if (res < 0)
111 		return res;
112 	return res ? 0 : -E2BIG;
113 }
114 
115 const struct bpf_map_ops bpf_map_offload_ops = {
116 	.map_meta_equal = bpf_map_meta_equal,
117 	.map_alloc = bpf_map_offload_map_alloc,
118 	.map_free = bpf_map_offload_map_free,
119 	.map_check_btf = map_check_no_btf,
120 	.map_mem_usage = bpf_map_offload_map_mem_usage,
121 };
122 
123 static void bpf_map_write_active_inc(struct bpf_map *map)
124 {
125 	atomic64_inc(&map->writecnt);
126 }
127 
128 static void bpf_map_write_active_dec(struct bpf_map *map)
129 {
130 	atomic64_dec(&map->writecnt);
131 }
132 
133 bool bpf_map_write_active(const struct bpf_map *map)
134 {
135 	return atomic64_read(&map->writecnt) != 0;
136 }
137 
138 static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags)
139 {
140 	if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS))
141 		return map->value_size;
142 	else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
143 		 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
144 		 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
145 		 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
146 		return round_up(map->value_size, 8) * num_possible_cpus();
147 	else if (IS_FD_MAP(map))
148 		return sizeof(u32);
149 	else
150 		return  map->value_size;
151 }
152 
153 static void maybe_wait_bpf_programs(struct bpf_map *map)
154 {
155 	/* Wait for any running non-sleepable BPF programs to complete so that
156 	 * userspace, when we return to it, knows that all non-sleepable
157 	 * programs that could be running use the new map value. For sleepable
158 	 * BPF programs, synchronize_rcu_tasks_trace() should be used to wait
159 	 * for the completions of these programs, but considering the waiting
160 	 * time can be very long and userspace may think it will hang forever,
161 	 * so don't handle sleepable BPF programs now.
162 	 */
163 	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
164 	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
165 		synchronize_rcu_expedited();
166 }
167 
168 static void unpin_uptr_kaddr(void *kaddr)
169 {
170 	if (kaddr)
171 		unpin_user_page(virt_to_page(kaddr));
172 }
173 
174 static void __bpf_obj_unpin_uptrs(struct btf_record *rec, u32 cnt, void *obj)
175 {
176 	const struct btf_field *field;
177 	void **uptr_addr;
178 	int i;
179 
180 	for (i = 0, field = rec->fields; i < cnt; i++, field++) {
181 		if (field->type != BPF_UPTR)
182 			continue;
183 
184 		uptr_addr = obj + field->offset;
185 		unpin_uptr_kaddr(*uptr_addr);
186 	}
187 }
188 
189 static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *obj)
190 {
191 	if (!btf_record_has_field(rec, BPF_UPTR))
192 		return;
193 
194 	__bpf_obj_unpin_uptrs(rec, rec->cnt, obj);
195 }
196 
197 static int bpf_obj_pin_uptrs(struct btf_record *rec, void *obj)
198 {
199 	const struct btf_field *field;
200 	const struct btf_type *t;
201 	unsigned long start, end;
202 	struct page *page;
203 	void **uptr_addr;
204 	int i, err;
205 
206 	if (!btf_record_has_field(rec, BPF_UPTR))
207 		return 0;
208 
209 	for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) {
210 		if (field->type != BPF_UPTR)
211 			continue;
212 
213 		uptr_addr = obj + field->offset;
214 		start = *(unsigned long *)uptr_addr;
215 		if (!start)
216 			continue;
217 
218 		t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
219 		/* t->size was checked for zero before */
220 		if (check_add_overflow(start, t->size - 1, &end)) {
221 			err = -EFAULT;
222 			goto unpin_all;
223 		}
224 
225 		/* The uptr's struct cannot span across two pages */
226 		if ((start & PAGE_MASK) != (end & PAGE_MASK)) {
227 			err = -EOPNOTSUPP;
228 			goto unpin_all;
229 		}
230 
231 		err = pin_user_pages_fast(start, 1, FOLL_LONGTERM | FOLL_WRITE, &page);
232 		if (err != 1)
233 			goto unpin_all;
234 
235 		if (PageHighMem(page)) {
236 			err = -EOPNOTSUPP;
237 			unpin_user_page(page);
238 			goto unpin_all;
239 		}
240 
241 		*uptr_addr = page_address(page) + offset_in_page(start);
242 	}
243 
244 	return 0;
245 
246 unpin_all:
247 	__bpf_obj_unpin_uptrs(rec, i, obj);
248 	return err;
249 }
250 
251 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
252 				void *key, void *value, __u64 flags)
253 {
254 	int err;
255 
256 	/* Need to create a kthread, thus must support schedule */
257 	if (bpf_map_is_offloaded(map)) {
258 		return bpf_map_offload_update_elem(map, key, value, flags);
259 	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
260 		   map->map_type == BPF_MAP_TYPE_ARENA ||
261 		   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
262 		return map->ops->map_update_elem(map, key, value, flags);
263 	} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
264 		   map->map_type == BPF_MAP_TYPE_SOCKMAP) {
265 		return sock_map_update_elem_sys(map, key, value, flags);
266 	} else if (IS_FD_PROG_ARRAY(map)) {
267 		return bpf_fd_array_map_update_elem(map, map_file, key, value,
268 						    flags);
269 	}
270 
271 	bpf_disable_instrumentation();
272 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
273 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
274 		err = bpf_percpu_hash_update(map, key, value, flags);
275 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
276 		err = bpf_percpu_array_update(map, key, value, flags);
277 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
278 		err = bpf_percpu_cgroup_storage_update(map, key, value,
279 						       flags);
280 	} else if (IS_FD_ARRAY(map)) {
281 		err = bpf_fd_array_map_update_elem(map, map_file, key, value,
282 						   flags);
283 	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
284 		err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
285 						  flags);
286 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
287 		/* rcu_read_lock() is not needed */
288 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
289 							 flags);
290 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
291 		   map->map_type == BPF_MAP_TYPE_STACK ||
292 		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
293 		err = map->ops->map_push_elem(map, value, flags);
294 	} else {
295 		err = bpf_obj_pin_uptrs(map->record, value);
296 		if (!err) {
297 			rcu_read_lock();
298 			err = map->ops->map_update_elem(map, key, value, flags);
299 			rcu_read_unlock();
300 			if (err)
301 				bpf_obj_unpin_uptrs(map->record, value);
302 		}
303 	}
304 	bpf_enable_instrumentation();
305 
306 	return err;
307 }
308 
309 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
310 			      __u64 flags)
311 {
312 	void *ptr;
313 	int err;
314 
315 	if (bpf_map_is_offloaded(map))
316 		return bpf_map_offload_lookup_elem(map, key, value);
317 
318 	bpf_disable_instrumentation();
319 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
320 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
321 		err = bpf_percpu_hash_copy(map, key, value, flags);
322 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
323 		err = bpf_percpu_array_copy(map, key, value, flags);
324 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
325 		err = bpf_percpu_cgroup_storage_copy(map, key, value, flags);
326 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
327 		err = bpf_stackmap_extract(map, key, value, false);
328 	} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
329 		err = bpf_fd_array_map_lookup_elem(map, key, value);
330 	} else if (IS_FD_HASH(map)) {
331 		err = bpf_fd_htab_map_lookup_elem(map, key, value);
332 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
333 		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
334 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
335 		   map->map_type == BPF_MAP_TYPE_STACK ||
336 		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
337 		err = map->ops->map_peek_elem(map, value);
338 	} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
339 		/* struct_ops map requires directly updating "value" */
340 		err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
341 	} else {
342 		rcu_read_lock();
343 		if (map->ops->map_lookup_elem_sys_only)
344 			ptr = map->ops->map_lookup_elem_sys_only(map, key);
345 		else
346 			ptr = map->ops->map_lookup_elem(map, key);
347 		if (IS_ERR(ptr)) {
348 			err = PTR_ERR(ptr);
349 		} else if (!ptr) {
350 			err = -ENOENT;
351 		} else {
352 			err = 0;
353 			if (flags & BPF_F_LOCK)
354 				/* lock 'ptr' and copy everything but lock */
355 				copy_map_value_locked(map, value, ptr, true);
356 			else
357 				copy_map_value(map, value, ptr);
358 			/* mask lock and timer, since value wasn't zero inited */
359 			check_and_init_map_value(map, value);
360 		}
361 		rcu_read_unlock();
362 	}
363 
364 	bpf_enable_instrumentation();
365 
366 	return err;
367 }
368 
369 /* Please, do not use this function outside from the map creation path
370  * (e.g. in map update path) without taking care of setting the active
371  * memory cgroup (see at bpf_map_kmalloc_node() for example).
372  */
373 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
374 {
375 	/* We really just want to fail instead of triggering OOM killer
376 	 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
377 	 * which is used for lower order allocation requests.
378 	 *
379 	 * It has been observed that higher order allocation requests done by
380 	 * vmalloc with __GFP_NORETRY being set might fail due to not trying
381 	 * to reclaim memory from the page cache, thus we set
382 	 * __GFP_RETRY_MAYFAIL to avoid such situations.
383 	 */
384 
385 	gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO);
386 	unsigned int flags = 0;
387 	unsigned long align = 1;
388 	void *area;
389 
390 	if (size >= SIZE_MAX)
391 		return NULL;
392 
393 	/* kmalloc()'ed memory can't be mmap()'ed */
394 	if (mmapable) {
395 		BUG_ON(!PAGE_ALIGNED(size));
396 		align = SHMLBA;
397 		flags = VM_USERMAP;
398 	} else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
399 		area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
400 				    numa_node);
401 		if (area != NULL)
402 			return area;
403 	}
404 
405 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
406 			gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
407 			flags, numa_node, __builtin_return_address(0));
408 }
409 
410 void *bpf_map_area_alloc(u64 size, int numa_node)
411 {
412 	return __bpf_map_area_alloc(size, numa_node, false);
413 }
414 
415 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
416 {
417 	return __bpf_map_area_alloc(size, numa_node, true);
418 }
419 
420 void bpf_map_area_free(void *area)
421 {
422 	kvfree(area);
423 }
424 
425 static u32 bpf_map_flags_retain_permanent(u32 flags)
426 {
427 	/* Some map creation flags are not tied to the map object but
428 	 * rather to the map fd instead, so they have no meaning upon
429 	 * map object inspection since multiple file descriptors with
430 	 * different (access) properties can exist here. Thus, given
431 	 * this has zero meaning for the map itself, lets clear these
432 	 * from here.
433 	 */
434 	return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
435 }
436 
437 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
438 {
439 	map->map_type = attr->map_type;
440 	map->key_size = attr->key_size;
441 	map->value_size = attr->value_size;
442 	map->max_entries = attr->max_entries;
443 	map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
444 	map->numa_node = bpf_map_attr_numa_node(attr);
445 	map->map_extra = attr->map_extra;
446 }
447 
448 static int bpf_map_alloc_id(struct bpf_map *map)
449 {
450 	int id;
451 
452 	idr_preload(GFP_KERNEL);
453 	spin_lock_bh(&map_idr_lock);
454 	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
455 	if (id > 0)
456 		map->id = id;
457 	spin_unlock_bh(&map_idr_lock);
458 	idr_preload_end();
459 
460 	if (WARN_ON_ONCE(!id))
461 		return -ENOSPC;
462 
463 	return id > 0 ? 0 : id;
464 }
465 
466 void bpf_map_free_id(struct bpf_map *map)
467 {
468 	unsigned long flags;
469 
470 	/* Offloaded maps are removed from the IDR store when their device
471 	 * disappears - even if someone holds an fd to them they are unusable,
472 	 * the memory is gone, all ops will fail; they are simply waiting for
473 	 * refcnt to drop to be freed.
474 	 */
475 	if (!map->id)
476 		return;
477 
478 	spin_lock_irqsave(&map_idr_lock, flags);
479 
480 	idr_remove(&map_idr, map->id);
481 	map->id = 0;
482 
483 	spin_unlock_irqrestore(&map_idr_lock, flags);
484 }
485 
486 #ifdef CONFIG_MEMCG
487 static void bpf_map_save_memcg(struct bpf_map *map)
488 {
489 	/* Currently if a map is created by a process belonging to the root
490 	 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
491 	 * So we have to check map->objcg for being NULL each time it's
492 	 * being used.
493 	 */
494 	if (memcg_bpf_enabled())
495 		map->objcg = get_obj_cgroup_from_current();
496 }
497 
498 static void bpf_map_release_memcg(struct bpf_map *map)
499 {
500 	if (map->objcg)
501 		obj_cgroup_put(map->objcg);
502 }
503 
504 static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
505 {
506 	if (map->objcg)
507 		return get_mem_cgroup_from_objcg(map->objcg);
508 
509 	return root_mem_cgroup;
510 }
511 
512 void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
513 			 struct mem_cgroup **new_memcg)
514 {
515 	*new_memcg = bpf_map_get_memcg(map);
516 	*old_memcg = set_active_memcg(*new_memcg);
517 }
518 
519 void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
520 			struct mem_cgroup *new_memcg)
521 {
522 	set_active_memcg(old_memcg);
523 	mem_cgroup_put(new_memcg);
524 }
525 
526 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
527 			   int node)
528 {
529 	struct mem_cgroup *memcg, *old_memcg;
530 	void *ptr;
531 
532 	bpf_map_memcg_enter(map, &old_memcg, &memcg);
533 	ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
534 	bpf_map_memcg_exit(old_memcg, memcg);
535 
536 	return ptr;
537 }
538 
539 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
540 			     int node)
541 {
542 	struct mem_cgroup *memcg, *old_memcg;
543 	void *ptr;
544 
545 	bpf_map_memcg_enter(map, &old_memcg, &memcg);
546 	ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
547 	bpf_map_memcg_exit(old_memcg, memcg);
548 
549 	return ptr;
550 }
551 
552 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
553 {
554 	struct mem_cgroup *memcg, *old_memcg;
555 	void *ptr;
556 
557 	bpf_map_memcg_enter(map, &old_memcg, &memcg);
558 	ptr = kzalloc(size, flags | __GFP_ACCOUNT);
559 	bpf_map_memcg_exit(old_memcg, memcg);
560 
561 	return ptr;
562 }
563 
564 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
565 		       gfp_t flags)
566 {
567 	struct mem_cgroup *memcg, *old_memcg;
568 	void *ptr;
569 
570 	bpf_map_memcg_enter(map, &old_memcg, &memcg);
571 	ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
572 	bpf_map_memcg_exit(old_memcg, memcg);
573 
574 	return ptr;
575 }
576 
577 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
578 				    size_t align, gfp_t flags)
579 {
580 	struct mem_cgroup *memcg, *old_memcg;
581 	void __percpu *ptr;
582 
583 	bpf_map_memcg_enter(map, &old_memcg, &memcg);
584 	ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
585 	bpf_map_memcg_exit(old_memcg, memcg);
586 
587 	return ptr;
588 }
589 
590 #else
591 static void bpf_map_save_memcg(struct bpf_map *map)
592 {
593 }
594 
595 static void bpf_map_release_memcg(struct bpf_map *map)
596 {
597 }
598 #endif
599 
600 static bool can_alloc_pages(void)
601 {
602 	return preempt_count() == 0 && !irqs_disabled() &&
603 		!IS_ENABLED(CONFIG_PREEMPT_RT);
604 }
605 
606 static struct page *__bpf_alloc_page(int nid)
607 {
608 	if (!can_alloc_pages())
609 		return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0);
610 
611 	return alloc_pages_node(nid,
612 				GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
613 				| __GFP_NOWARN,
614 				0);
615 }
616 
617 int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
618 			unsigned long nr_pages, struct page **pages)
619 {
620 	unsigned long i, j;
621 	struct page *pg;
622 	int ret = 0;
623 
624 	for (i = 0; i < nr_pages; i++) {
625 		pg = __bpf_alloc_page(nid);
626 
627 		if (pg) {
628 			pages[i] = pg;
629 			continue;
630 		}
631 		for (j = 0; j < i; j++)
632 			free_pages_nolock(pages[j], 0);
633 		ret = -ENOMEM;
634 		break;
635 	}
636 
637 	return ret;
638 }
639 
640 
641 static int btf_field_cmp(const void *a, const void *b)
642 {
643 	const struct btf_field *f1 = a, *f2 = b;
644 
645 	if (f1->offset < f2->offset)
646 		return -1;
647 	else if (f1->offset > f2->offset)
648 		return 1;
649 	return 0;
650 }
651 
652 struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset,
653 				  u32 field_mask)
654 {
655 	struct btf_field *field;
656 
657 	if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask))
658 		return NULL;
659 	field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp);
660 	if (!field || !(field->type & field_mask))
661 		return NULL;
662 	return field;
663 }
664 
665 void btf_record_free(struct btf_record *rec)
666 {
667 	int i;
668 
669 	if (IS_ERR_OR_NULL(rec))
670 		return;
671 	for (i = 0; i < rec->cnt; i++) {
672 		switch (rec->fields[i].type) {
673 		case BPF_KPTR_UNREF:
674 		case BPF_KPTR_REF:
675 		case BPF_KPTR_PERCPU:
676 		case BPF_UPTR:
677 			if (rec->fields[i].kptr.module)
678 				module_put(rec->fields[i].kptr.module);
679 			if (btf_is_kernel(rec->fields[i].kptr.btf))
680 				btf_put(rec->fields[i].kptr.btf);
681 			break;
682 		case BPF_LIST_HEAD:
683 		case BPF_LIST_NODE:
684 		case BPF_RB_ROOT:
685 		case BPF_RB_NODE:
686 		case BPF_SPIN_LOCK:
687 		case BPF_RES_SPIN_LOCK:
688 		case BPF_TIMER:
689 		case BPF_REFCOUNT:
690 		case BPF_WORKQUEUE:
691 		case BPF_TASK_WORK:
692 			/* Nothing to release */
693 			break;
694 		default:
695 			WARN_ON_ONCE(1);
696 			continue;
697 		}
698 	}
699 	kfree(rec);
700 }
701 
702 void bpf_map_free_record(struct bpf_map *map)
703 {
704 	btf_record_free(map->record);
705 	map->record = NULL;
706 }
707 
708 struct btf_record *btf_record_dup(const struct btf_record *rec)
709 {
710 	const struct btf_field *fields;
711 	struct btf_record *new_rec;
712 	int ret, size, i;
713 
714 	if (IS_ERR_OR_NULL(rec))
715 		return NULL;
716 	size = struct_size(rec, fields, rec->cnt);
717 	new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
718 	if (!new_rec)
719 		return ERR_PTR(-ENOMEM);
720 	/* Do a deep copy of the btf_record */
721 	fields = rec->fields;
722 	new_rec->cnt = 0;
723 	for (i = 0; i < rec->cnt; i++) {
724 		switch (fields[i].type) {
725 		case BPF_KPTR_UNREF:
726 		case BPF_KPTR_REF:
727 		case BPF_KPTR_PERCPU:
728 		case BPF_UPTR:
729 			if (btf_is_kernel(fields[i].kptr.btf))
730 				btf_get(fields[i].kptr.btf);
731 			if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
732 				ret = -ENXIO;
733 				goto free;
734 			}
735 			break;
736 		case BPF_LIST_HEAD:
737 		case BPF_LIST_NODE:
738 		case BPF_RB_ROOT:
739 		case BPF_RB_NODE:
740 		case BPF_SPIN_LOCK:
741 		case BPF_RES_SPIN_LOCK:
742 		case BPF_TIMER:
743 		case BPF_REFCOUNT:
744 		case BPF_WORKQUEUE:
745 		case BPF_TASK_WORK:
746 			/* Nothing to acquire */
747 			break;
748 		default:
749 			ret = -EFAULT;
750 			WARN_ON_ONCE(1);
751 			goto free;
752 		}
753 		new_rec->cnt++;
754 	}
755 	return new_rec;
756 free:
757 	btf_record_free(new_rec);
758 	return ERR_PTR(ret);
759 }
760 
761 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b)
762 {
763 	bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b);
764 	int size;
765 
766 	if (!a_has_fields && !b_has_fields)
767 		return true;
768 	if (a_has_fields != b_has_fields)
769 		return false;
770 	if (rec_a->cnt != rec_b->cnt)
771 		return false;
772 	size = struct_size(rec_a, fields, rec_a->cnt);
773 	/* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
774 	 * members are zeroed out. So memcmp is safe to do without worrying
775 	 * about padding/unused fields.
776 	 *
777 	 * While spin_lock, timer, and kptr have no relation to map BTF,
778 	 * list_head metadata is specific to map BTF, the btf and value_rec
779 	 * members in particular. btf is the map BTF, while value_rec points to
780 	 * btf_record in that map BTF.
781 	 *
782 	 * So while by default, we don't rely on the map BTF (which the records
783 	 * were parsed from) matching for both records, which is not backwards
784 	 * compatible, in case list_head is part of it, we implicitly rely on
785 	 * that by way of depending on memcmp succeeding for it.
786 	 */
787 	return !memcmp(rec_a, rec_b, size);
788 }
789 
790 void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
791 {
792 	if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER)))
793 		return;
794 	bpf_timer_cancel_and_free(obj + rec->timer_off);
795 }
796 
797 void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
798 {
799 	if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE)))
800 		return;
801 	bpf_wq_cancel_and_free(obj + rec->wq_off);
802 }
803 
804 void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
805 {
806 	if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
807 		return;
808 	bpf_task_work_cancel_and_free(obj + rec->task_work_off);
809 }
810 
811 void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
812 {
813 	const struct btf_field *fields;
814 	int i;
815 
816 	if (IS_ERR_OR_NULL(rec))
817 		return;
818 	fields = rec->fields;
819 	for (i = 0; i < rec->cnt; i++) {
820 		struct btf_struct_meta *pointee_struct_meta;
821 		const struct btf_field *field = &fields[i];
822 		void *field_ptr = obj + field->offset;
823 		void *xchgd_field;
824 
825 		switch (fields[i].type) {
826 		case BPF_SPIN_LOCK:
827 		case BPF_RES_SPIN_LOCK:
828 			break;
829 		case BPF_TIMER:
830 			bpf_timer_cancel_and_free(field_ptr);
831 			break;
832 		case BPF_WORKQUEUE:
833 			bpf_wq_cancel_and_free(field_ptr);
834 			break;
835 		case BPF_TASK_WORK:
836 			bpf_task_work_cancel_and_free(field_ptr);
837 			break;
838 		case BPF_KPTR_UNREF:
839 			WRITE_ONCE(*(u64 *)field_ptr, 0);
840 			break;
841 		case BPF_KPTR_REF:
842 		case BPF_KPTR_PERCPU:
843 			xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
844 			if (!xchgd_field)
845 				break;
846 
847 			if (!btf_is_kernel(field->kptr.btf)) {
848 				pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
849 									   field->kptr.btf_id);
850 				__bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
851 								 pointee_struct_meta->record : NULL,
852 								 fields[i].type == BPF_KPTR_PERCPU);
853 			} else {
854 				field->kptr.dtor(xchgd_field);
855 			}
856 			break;
857 		case BPF_UPTR:
858 			/* The caller ensured that no one is using the uptr */
859 			unpin_uptr_kaddr(*(void **)field_ptr);
860 			break;
861 		case BPF_LIST_HEAD:
862 			if (WARN_ON_ONCE(rec->spin_lock_off < 0))
863 				continue;
864 			bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
865 			break;
866 		case BPF_RB_ROOT:
867 			if (WARN_ON_ONCE(rec->spin_lock_off < 0))
868 				continue;
869 			bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
870 			break;
871 		case BPF_LIST_NODE:
872 		case BPF_RB_NODE:
873 		case BPF_REFCOUNT:
874 			break;
875 		default:
876 			WARN_ON_ONCE(1);
877 			continue;
878 		}
879 	}
880 }
881 
882 static void bpf_map_free(struct bpf_map *map)
883 {
884 	struct btf_record *rec = map->record;
885 	struct btf *btf = map->btf;
886 
887 	/* implementation dependent freeing. Disabling migration to simplify
888 	 * the free of values or special fields allocated from bpf memory
889 	 * allocator.
890 	 */
891 	kfree(map->excl_prog_sha);
892 	migrate_disable();
893 	map->ops->map_free(map);
894 	migrate_enable();
895 
896 	/* Delay freeing of btf_record for maps, as map_free
897 	 * callback usually needs access to them. It is better to do it here
898 	 * than require each callback to do the free itself manually.
899 	 *
900 	 * Note that the btf_record stashed in map->inner_map_meta->record was
901 	 * already freed using the map_free callback for map in map case which
902 	 * eventually calls bpf_map_free_meta, since inner_map_meta is only a
903 	 * template bpf_map struct used during verification.
904 	 */
905 	btf_record_free(rec);
906 	/* Delay freeing of btf for maps, as map_free callback may need
907 	 * struct_meta info which will be freed with btf_put().
908 	 */
909 	btf_put(btf);
910 }
911 
912 /* called from workqueue */
913 static void bpf_map_free_deferred(struct work_struct *work)
914 {
915 	struct bpf_map *map = container_of(work, struct bpf_map, work);
916 
917 	security_bpf_map_free(map);
918 	bpf_map_release_memcg(map);
919 	bpf_map_owner_free(map);
920 	bpf_map_free(map);
921 }
922 
923 static void bpf_map_put_uref(struct bpf_map *map)
924 {
925 	if (atomic64_dec_and_test(&map->usercnt)) {
926 		if (map->ops->map_release_uref)
927 			map->ops->map_release_uref(map);
928 	}
929 }
930 
931 static void bpf_map_free_in_work(struct bpf_map *map)
932 {
933 	INIT_WORK(&map->work, bpf_map_free_deferred);
934 	/* Avoid spawning kworkers, since they all might contend
935 	 * for the same mutex like slab_mutex.
936 	 */
937 	queue_work(system_dfl_wq, &map->work);
938 }
939 
940 static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
941 {
942 	bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
943 }
944 
945 /* decrement map refcnt and schedule it for freeing via workqueue
946  * (underlying map implementation ops->map_free() might sleep)
947  */
948 void bpf_map_put(struct bpf_map *map)
949 {
950 	if (atomic64_dec_and_test(&map->refcnt)) {
951 		/* bpf_map_free_id() must be called first */
952 		bpf_map_free_id(map);
953 
954 		WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
955 		/* RCU tasks trace grace period implies RCU grace period. */
956 		if (READ_ONCE(map->free_after_mult_rcu_gp))
957 			call_rcu_tasks_trace(&map->rcu, bpf_map_free_rcu_gp);
958 		else if (READ_ONCE(map->free_after_rcu_gp))
959 			call_rcu(&map->rcu, bpf_map_free_rcu_gp);
960 		else
961 			bpf_map_free_in_work(map);
962 	}
963 }
964 EXPORT_SYMBOL_GPL(bpf_map_put);
965 
966 void bpf_map_put_with_uref(struct bpf_map *map)
967 {
968 	bpf_map_put_uref(map);
969 	bpf_map_put(map);
970 }
971 
972 static int bpf_map_release(struct inode *inode, struct file *filp)
973 {
974 	struct bpf_map *map = filp->private_data;
975 
976 	if (map->ops->map_release)
977 		map->ops->map_release(map, filp);
978 
979 	bpf_map_put_with_uref(map);
980 	return 0;
981 }
982 
983 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
984 {
985 	fmode_t mode = fd_file(f)->f_mode;
986 
987 	/* Our file permissions may have been overridden by global
988 	 * map permissions facing syscall side.
989 	 */
990 	if (READ_ONCE(map->frozen))
991 		mode &= ~FMODE_CAN_WRITE;
992 	return mode;
993 }
994 
995 #ifdef CONFIG_PROC_FS
996 /* Show the memory usage of a bpf map */
997 static u64 bpf_map_memory_usage(const struct bpf_map *map)
998 {
999 	return map->ops->map_mem_usage(map);
1000 }
1001 
1002 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
1003 {
1004 	struct bpf_map *map = filp->private_data;
1005 	u32 type = 0, jited = 0;
1006 
1007 	spin_lock(&map->owner_lock);
1008 	if (map->owner) {
1009 		type  = map->owner->type;
1010 		jited = map->owner->jited;
1011 	}
1012 	spin_unlock(&map->owner_lock);
1013 
1014 	seq_printf(m,
1015 		   "map_type:\t%u\n"
1016 		   "key_size:\t%u\n"
1017 		   "value_size:\t%u\n"
1018 		   "max_entries:\t%u\n"
1019 		   "map_flags:\t%#x\n"
1020 		   "map_extra:\t%#llx\n"
1021 		   "memlock:\t%llu\n"
1022 		   "map_id:\t%u\n"
1023 		   "frozen:\t%u\n",
1024 		   map->map_type,
1025 		   map->key_size,
1026 		   map->value_size,
1027 		   map->max_entries,
1028 		   map->map_flags,
1029 		   (unsigned long long)map->map_extra,
1030 		   bpf_map_memory_usage(map),
1031 		   map->id,
1032 		   READ_ONCE(map->frozen));
1033 	if (type) {
1034 		seq_printf(m, "owner_prog_type:\t%u\n", type);
1035 		seq_printf(m, "owner_jited:\t%u\n", jited);
1036 	}
1037 }
1038 #endif
1039 
1040 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
1041 			      loff_t *ppos)
1042 {
1043 	/* We need this handler such that alloc_file() enables
1044 	 * f_mode with FMODE_CAN_READ.
1045 	 */
1046 	return -EINVAL;
1047 }
1048 
1049 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
1050 			       size_t siz, loff_t *ppos)
1051 {
1052 	/* We need this handler such that alloc_file() enables
1053 	 * f_mode with FMODE_CAN_WRITE.
1054 	 */
1055 	return -EINVAL;
1056 }
1057 
1058 /* called for any extra memory-mapped regions (except initial) */
1059 static void bpf_map_mmap_open(struct vm_area_struct *vma)
1060 {
1061 	struct bpf_map *map = vma->vm_file->private_data;
1062 
1063 	if (vma->vm_flags & VM_MAYWRITE)
1064 		bpf_map_write_active_inc(map);
1065 }
1066 
1067 /* called for all unmapped memory region (including initial) */
1068 static void bpf_map_mmap_close(struct vm_area_struct *vma)
1069 {
1070 	struct bpf_map *map = vma->vm_file->private_data;
1071 
1072 	if (vma->vm_flags & VM_MAYWRITE)
1073 		bpf_map_write_active_dec(map);
1074 }
1075 
1076 static const struct vm_operations_struct bpf_map_default_vmops = {
1077 	.open		= bpf_map_mmap_open,
1078 	.close		= bpf_map_mmap_close,
1079 };
1080 
1081 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
1082 {
1083 	struct bpf_map *map = filp->private_data;
1084 	int err = 0;
1085 
1086 	if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
1087 		return -ENOTSUPP;
1088 
1089 	if (!(vma->vm_flags & VM_SHARED))
1090 		return -EINVAL;
1091 
1092 	mutex_lock(&map->freeze_mutex);
1093 
1094 	if (vma->vm_flags & VM_WRITE) {
1095 		if (map->frozen) {
1096 			err = -EPERM;
1097 			goto out;
1098 		}
1099 		/* map is meant to be read-only, so do not allow mapping as
1100 		 * writable, because it's possible to leak a writable page
1101 		 * reference and allows user-space to still modify it after
1102 		 * freezing, while verifier will assume contents do not change
1103 		 */
1104 		if (map->map_flags & BPF_F_RDONLY_PROG) {
1105 			err = -EACCES;
1106 			goto out;
1107 		}
1108 		bpf_map_write_active_inc(map);
1109 	}
1110 out:
1111 	mutex_unlock(&map->freeze_mutex);
1112 	if (err)
1113 		return err;
1114 
1115 	/* set default open/close callbacks */
1116 	vma->vm_ops = &bpf_map_default_vmops;
1117 	vma->vm_private_data = map;
1118 	vm_flags_clear(vma, VM_MAYEXEC);
1119 	/* If mapping is read-only, then disallow potentially re-mapping with
1120 	 * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing
1121 	 * means that as far as BPF map's memory-mapped VMAs are concerned,
1122 	 * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set,
1123 	 * both should be set, so we can forget about VM_MAYWRITE and always
1124 	 * check just VM_WRITE
1125 	 */
1126 	if (!(vma->vm_flags & VM_WRITE))
1127 		vm_flags_clear(vma, VM_MAYWRITE);
1128 
1129 	err = map->ops->map_mmap(map, vma);
1130 	if (err) {
1131 		if (vma->vm_flags & VM_WRITE)
1132 			bpf_map_write_active_dec(map);
1133 	}
1134 
1135 	return err;
1136 }
1137 
1138 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
1139 {
1140 	struct bpf_map *map = filp->private_data;
1141 
1142 	if (map->ops->map_poll)
1143 		return map->ops->map_poll(map, filp, pts);
1144 
1145 	return EPOLLERR;
1146 }
1147 
1148 static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr,
1149 					   unsigned long len, unsigned long pgoff,
1150 					   unsigned long flags)
1151 {
1152 	struct bpf_map *map = filp->private_data;
1153 
1154 	if (map->ops->map_get_unmapped_area)
1155 		return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags);
1156 #ifdef CONFIG_MMU
1157 	return mm_get_unmapped_area(filp, addr, len, pgoff, flags);
1158 #else
1159 	return addr;
1160 #endif
1161 }
1162 
1163 const struct file_operations bpf_map_fops = {
1164 #ifdef CONFIG_PROC_FS
1165 	.show_fdinfo	= bpf_map_show_fdinfo,
1166 #endif
1167 	.release	= bpf_map_release,
1168 	.read		= bpf_dummy_read,
1169 	.write		= bpf_dummy_write,
1170 	.mmap		= bpf_map_mmap,
1171 	.poll		= bpf_map_poll,
1172 	.get_unmapped_area = bpf_get_unmapped_area,
1173 };
1174 
1175 int bpf_map_new_fd(struct bpf_map *map, int flags)
1176 {
1177 	int ret;
1178 
1179 	ret = security_bpf_map(map, OPEN_FMODE(flags));
1180 	if (ret < 0)
1181 		return ret;
1182 
1183 	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
1184 				flags | O_CLOEXEC);
1185 }
1186 
1187 int bpf_get_file_flag(int flags)
1188 {
1189 	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
1190 		return -EINVAL;
1191 	if (flags & BPF_F_RDONLY)
1192 		return O_RDONLY;
1193 	if (flags & BPF_F_WRONLY)
1194 		return O_WRONLY;
1195 	return O_RDWR;
1196 }
1197 
1198 /* helper macro to check that unused fields 'union bpf_attr' are zero */
1199 #define CHECK_ATTR(CMD) \
1200 	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
1201 		   sizeof(attr->CMD##_LAST_FIELD), 0, \
1202 		   sizeof(*attr) - \
1203 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
1204 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
1205 
1206 /* dst and src must have at least "size" number of bytes.
1207  * Return strlen on success and < 0 on error.
1208  */
1209 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
1210 {
1211 	const char *end = src + size;
1212 	const char *orig_src = src;
1213 
1214 	memset(dst, 0, size);
1215 	/* Copy all isalnum(), '_' and '.' chars. */
1216 	while (src < end && *src) {
1217 		if (!isalnum(*src) &&
1218 		    *src != '_' && *src != '.')
1219 			return -EINVAL;
1220 		*dst++ = *src++;
1221 	}
1222 
1223 	/* No '\0' found in "size" number of bytes */
1224 	if (src == end)
1225 		return -EINVAL;
1226 
1227 	return src - orig_src;
1228 }
1229 EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
1230 
1231 int map_check_no_btf(struct bpf_map *map,
1232 		     const struct btf *btf,
1233 		     const struct btf_type *key_type,
1234 		     const struct btf_type *value_type)
1235 {
1236 	return -ENOTSUPP;
1237 }
1238 
1239 static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
1240 			 const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
1241 {
1242 	const struct btf_type *key_type, *value_type;
1243 	u32 key_size, value_size;
1244 	int ret = 0;
1245 
1246 	/* Some maps allow key to be unspecified. */
1247 	if (btf_key_id) {
1248 		key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
1249 		if (!key_type || key_size != map->key_size)
1250 			return -EINVAL;
1251 	} else {
1252 		key_type = btf_type_by_id(btf, 0);
1253 		if (!map->ops->map_check_btf)
1254 			return -EINVAL;
1255 	}
1256 
1257 	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
1258 	if (!value_type || value_size != map->value_size)
1259 		return -EINVAL;
1260 
1261 	map->record = btf_parse_fields(btf, value_type,
1262 				       BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1263 				       BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
1264 				       BPF_TASK_WORK,
1265 				       map->value_size);
1266 	if (!IS_ERR_OR_NULL(map->record)) {
1267 		int i;
1268 
1269 		if (!bpf_token_capable(token, CAP_BPF)) {
1270 			ret = -EPERM;
1271 			goto free_map_tab;
1272 		}
1273 		if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
1274 			ret = -EACCES;
1275 			goto free_map_tab;
1276 		}
1277 		for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) {
1278 			switch (map->record->field_mask & (1 << i)) {
1279 			case 0:
1280 				continue;
1281 			case BPF_SPIN_LOCK:
1282 			case BPF_RES_SPIN_LOCK:
1283 				if (map->map_type != BPF_MAP_TYPE_HASH &&
1284 				    map->map_type != BPF_MAP_TYPE_RHASH &&
1285 				    map->map_type != BPF_MAP_TYPE_ARRAY &&
1286 				    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
1287 				    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
1288 				    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
1289 				    map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
1290 				    map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
1291 					ret = -EOPNOTSUPP;
1292 					goto free_map_tab;
1293 				}
1294 				break;
1295 			case BPF_TIMER:
1296 			case BPF_WORKQUEUE:
1297 			case BPF_TASK_WORK:
1298 				if (map->map_type != BPF_MAP_TYPE_HASH &&
1299 				    map->map_type != BPF_MAP_TYPE_RHASH &&
1300 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1301 				    map->map_type != BPF_MAP_TYPE_ARRAY) {
1302 					ret = -EOPNOTSUPP;
1303 					goto free_map_tab;
1304 				}
1305 				break;
1306 			case BPF_KPTR_UNREF:
1307 			case BPF_KPTR_REF:
1308 			case BPF_KPTR_PERCPU:
1309 			case BPF_REFCOUNT:
1310 				if (map->map_type != BPF_MAP_TYPE_HASH &&
1311 				    map->map_type != BPF_MAP_TYPE_RHASH &&
1312 				    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
1313 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1314 				    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH &&
1315 				    map->map_type != BPF_MAP_TYPE_ARRAY &&
1316 				    map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
1317 				    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
1318 				    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
1319 				    map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
1320 				    map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
1321 					ret = -EOPNOTSUPP;
1322 					goto free_map_tab;
1323 				}
1324 				break;
1325 			case BPF_UPTR:
1326 				if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) {
1327 					ret = -EOPNOTSUPP;
1328 					goto free_map_tab;
1329 				}
1330 				break;
1331 			case BPF_LIST_HEAD:
1332 			case BPF_RB_ROOT:
1333 				if (map->map_type != BPF_MAP_TYPE_HASH &&
1334 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1335 				    map->map_type != BPF_MAP_TYPE_ARRAY) {
1336 					ret = -EOPNOTSUPP;
1337 					goto free_map_tab;
1338 				}
1339 				break;
1340 			default:
1341 				/* Fail if map_type checks are missing for a field type */
1342 				ret = -EOPNOTSUPP;
1343 				goto free_map_tab;
1344 			}
1345 		}
1346 	}
1347 
1348 	ret = btf_check_and_fixup_fields(btf, map->record);
1349 	if (ret < 0)
1350 		goto free_map_tab;
1351 
1352 	if (map->ops->map_check_btf) {
1353 		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
1354 		if (ret < 0)
1355 			goto free_map_tab;
1356 	}
1357 
1358 	return ret;
1359 free_map_tab:
1360 	bpf_map_free_record(map);
1361 	return ret;
1362 }
1363 
1364 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
1365 /* called via syscall */
1366 static int map_create_alloc(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log,
1367 			    struct bpf_map **mapp, struct bpf_token **tokenp)
1368 {
1369 	const struct bpf_map_ops *ops;
1370 	struct bpf_token *token = NULL;
1371 	int numa_node = bpf_map_attr_numa_node(attr);
1372 	u32 map_type = attr->map_type;
1373 	struct bpf_map *map;
1374 	bool token_flag;
1375 	int err;
1376 
1377 	err = CHECK_ATTR(BPF_MAP_CREATE);
1378 	if (err) {
1379 		bpf_log(log, "Invalid attr.\n");
1380 		return -EINVAL;
1381 	}
1382 
1383 	/* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
1384 	 * to avoid per-map type checks tripping on unknown flag
1385 	 */
1386 	token_flag = attr->map_flags & BPF_F_TOKEN_FD;
1387 	attr->map_flags &= ~BPF_F_TOKEN_FD;
1388 
1389 	if (attr->btf_vmlinux_value_type_id) {
1390 		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
1391 			bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n");
1392 			return -EINVAL;
1393 		}
1394 		if (attr->btf_key_type_id || attr->btf_value_type_id) {
1395 			bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n");
1396 			return -EINVAL;
1397 		}
1398 	} else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
1399 		bpf_log(log, "Invalid btf_value_type_id.\n");
1400 		return -EINVAL;
1401 	}
1402 
1403 	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
1404 	    attr->map_type != BPF_MAP_TYPE_ARENA &&
1405 	    attr->map_type != BPF_MAP_TYPE_RHASH &&
1406 	    attr->map_extra != 0) {
1407 		bpf_log(log, "Invalid map_extra.\n");
1408 		return -EINVAL;
1409 	}
1410 
1411 	if (numa_node != NUMA_NO_NODE &&
1412 	    ((unsigned int)numa_node >= nr_node_ids ||
1413 	     !node_online(numa_node))) {
1414 		bpf_log(log, "Invalid numa_node.\n");
1415 		return -EINVAL;
1416 	}
1417 
1418 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
1419 	map_type = attr->map_type;
1420 	if (map_type >= ARRAY_SIZE(bpf_map_types)) {
1421 		bpf_log(log, "Invalid map_type.\n");
1422 		return -EINVAL;
1423 	}
1424 	map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
1425 	ops = bpf_map_types[map_type];
1426 	if (!ops)
1427 		return -EINVAL;
1428 
1429 	if (ops->map_alloc_check) {
1430 		err = ops->map_alloc_check(attr);
1431 		if (err)
1432 			return err;
1433 	}
1434 	if (attr->map_ifindex)
1435 		ops = &bpf_map_offload_ops;
1436 	if (!ops->map_mem_usage)
1437 		return -EINVAL;
1438 
1439 	if (token_flag) {
1440 		token = bpf_token_get_from_fd(attr->map_token_fd);
1441 		if (IS_ERR(token)) {
1442 			bpf_log(log, "Invalid map_token_fd.\n");
1443 			return PTR_ERR(token);
1444 		}
1445 
1446 		/* if current token doesn't grant map creation permissions,
1447 		 * then we can't use this token, so ignore it and rely on
1448 		 * system-wide capabilities checks
1449 		 */
1450 		if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
1451 		    !bpf_token_allow_map_type(token, attr->map_type)) {
1452 			bpf_token_put(token);
1453 			token = NULL;
1454 		}
1455 	}
1456 
1457 	err = -EPERM;
1458 
1459 	/* Intent here is for unprivileged_bpf_disabled to block BPF map
1460 	 * creation for unprivileged users; other actions depend
1461 	 * on fd availability and access to bpffs, so are dependent on
1462 	 * object creation success. Even with unprivileged BPF disabled,
1463 	 * capability checks are still carried out.
1464 	 */
1465 	if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
1466 		goto put_token;
1467 
1468 	/* check privileged map type permissions */
1469 	switch (map_type) {
1470 	case BPF_MAP_TYPE_ARRAY:
1471 	case BPF_MAP_TYPE_PERCPU_ARRAY:
1472 	case BPF_MAP_TYPE_PROG_ARRAY:
1473 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
1474 	case BPF_MAP_TYPE_CGROUP_ARRAY:
1475 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1476 	case BPF_MAP_TYPE_HASH:
1477 	case BPF_MAP_TYPE_RHASH:
1478 	case BPF_MAP_TYPE_PERCPU_HASH:
1479 	case BPF_MAP_TYPE_HASH_OF_MAPS:
1480 	case BPF_MAP_TYPE_RINGBUF:
1481 	case BPF_MAP_TYPE_USER_RINGBUF:
1482 	case BPF_MAP_TYPE_CGROUP_STORAGE:
1483 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
1484 		/* unprivileged */
1485 		break;
1486 	case BPF_MAP_TYPE_SK_STORAGE:
1487 	case BPF_MAP_TYPE_INODE_STORAGE:
1488 	case BPF_MAP_TYPE_TASK_STORAGE:
1489 	case BPF_MAP_TYPE_CGRP_STORAGE:
1490 	case BPF_MAP_TYPE_BLOOM_FILTER:
1491 	case BPF_MAP_TYPE_LPM_TRIE:
1492 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
1493 	case BPF_MAP_TYPE_STACK_TRACE:
1494 	case BPF_MAP_TYPE_QUEUE:
1495 	case BPF_MAP_TYPE_STACK:
1496 	case BPF_MAP_TYPE_LRU_HASH:
1497 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
1498 	case BPF_MAP_TYPE_STRUCT_OPS:
1499 	case BPF_MAP_TYPE_CPUMAP:
1500 	case BPF_MAP_TYPE_ARENA:
1501 	case BPF_MAP_TYPE_INSN_ARRAY:
1502 		if (!bpf_token_capable(token, CAP_BPF))
1503 			goto put_token;
1504 		break;
1505 	case BPF_MAP_TYPE_SOCKMAP:
1506 	case BPF_MAP_TYPE_SOCKHASH:
1507 	case BPF_MAP_TYPE_DEVMAP:
1508 	case BPF_MAP_TYPE_DEVMAP_HASH:
1509 	case BPF_MAP_TYPE_XSKMAP:
1510 		if (!bpf_token_capable(token, CAP_NET_ADMIN))
1511 			goto put_token;
1512 		break;
1513 	default:
1514 		WARN(1, "unsupported map type %d", map_type);
1515 		goto put_token;
1516 	}
1517 
1518 	map = ops->map_alloc(attr);
1519 	if (IS_ERR(map)) {
1520 		err = PTR_ERR(map);
1521 		goto put_token;
1522 	}
1523 	map->ops = ops;
1524 	map->map_type = map_type;
1525 
1526 	err = bpf_obj_name_cpy(map->name, attr->map_name,
1527 			       sizeof(attr->map_name));
1528 	if (err < 0) {
1529 		bpf_log(log, "Invalid map_name.\n");
1530 		goto free_map;
1531 	}
1532 
1533 	preempt_disable();
1534 	map->cookie = gen_cookie_next(&bpf_map_cookie);
1535 	preempt_enable();
1536 
1537 	atomic64_set(&map->refcnt, 1);
1538 	atomic64_set(&map->usercnt, 1);
1539 	mutex_init(&map->freeze_mutex);
1540 	spin_lock_init(&map->owner_lock);
1541 
1542 	if (attr->btf_key_type_id || attr->btf_value_type_id ||
1543 	    /* Even the map's value is a kernel's struct,
1544 	     * the bpf_prog.o must have BTF to begin with
1545 	     * to figure out the corresponding kernel's
1546 	     * counter part.  Thus, attr->btf_fd has
1547 	     * to be valid also.
1548 	     */
1549 	    attr->btf_vmlinux_value_type_id) {
1550 		struct btf *btf;
1551 
1552 		btf = btf_get_by_fd(attr->btf_fd);
1553 		if (IS_ERR(btf)) {
1554 			bpf_log(log, "Invalid btf_fd.\n");
1555 			err = PTR_ERR(btf);
1556 			goto free_map;
1557 		}
1558 		if (btf_is_kernel(btf)) {
1559 			btf_put(btf);
1560 			err = -EACCES;
1561 			goto free_map;
1562 		}
1563 		map->btf = btf;
1564 
1565 		if (attr->btf_value_type_id) {
1566 			err = map_check_btf(map, token, btf, attr->btf_key_type_id,
1567 					    attr->btf_value_type_id);
1568 			if (err)
1569 				goto free_map;
1570 		}
1571 
1572 		map->btf_key_type_id = attr->btf_key_type_id;
1573 		map->btf_value_type_id = attr->btf_value_type_id;
1574 		map->btf_vmlinux_value_type_id =
1575 			attr->btf_vmlinux_value_type_id;
1576 	}
1577 
1578 	if (attr->excl_prog_hash) {
1579 		bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel);
1580 
1581 		if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) {
1582 			bpf_log(log, "Invalid excl_prog_hash_size.\n");
1583 			err = -EINVAL;
1584 			goto free_map;
1585 		}
1586 
1587 		map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
1588 		if (!map->excl_prog_sha) {
1589 			err = -ENOMEM;
1590 			goto free_map;
1591 		}
1592 
1593 		if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) {
1594 			err = -EFAULT;
1595 			goto free_map;
1596 		}
1597 
1598 		/* See libbpf: emit_signature_match() */
1599 		BUILD_BUG_ON(offsetof(struct bpf_map, excl) != SHA256_DIGEST_SIZE);
1600 		BUILD_BUG_ON(!__same_type(map->excl, u32));
1601 		BUILD_BUG_ON(offsetof(struct bpf_map, sha)  != 0);
1602 		BUILD_BUG_ON(!__same_type(map->sha, u8[SHA256_DIGEST_SIZE]));
1603 		map->excl = 1;
1604 	} else if (attr->excl_prog_hash_size) {
1605 		bpf_log(log, "Invalid excl_prog_hash_size.\n");
1606 		err = -EINVAL;
1607 		goto free_map;
1608 	}
1609 
1610 	*mapp = map;
1611 	*tokenp = token;
1612 	return 0;
1613 
1614 free_map:
1615 	bpf_map_free(map);
1616 put_token:
1617 	bpf_token_put(token);
1618 	return err;
1619 }
1620 
1621 static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *attr_common,
1622 		      bpfptr_t uattr_common, u32 size_common)
1623 {
1624 	struct bpf_token *token = NULL;
1625 	struct bpf_verifier_log *log;
1626 	struct bpf_log_attr attr_log;
1627 	struct bpf_map *map = NULL;
1628 	int err, ret;
1629 	int f_flags;
1630 
1631 	log = bpf_log_attr_create_vlog(&attr_log, attr_common, uattr_common, size_common);
1632 	if (IS_ERR(log))
1633 		return PTR_ERR(log);
1634 
1635 	err = map_create_alloc(attr, uattr, log, &map, &token);
1636 
1637 	/* preserve original error even if log finalization is successful */
1638 	ret = bpf_log_attr_finalize(&attr_log, log);
1639 	if (ret)
1640 		err = ret;
1641 
1642 	kfree(log);
1643 
1644 	if (err)
1645 		goto free_map;
1646 
1647 	f_flags = bpf_get_file_flag(attr->map_flags);
1648 	if (f_flags < 0) {
1649 		err = f_flags;
1650 		goto free_map;
1651 	}
1652 
1653 	err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
1654 	if (err)
1655 		goto free_map_sec;
1656 
1657 	err = bpf_map_alloc_id(map);
1658 	if (err)
1659 		goto free_map_sec;
1660 
1661 	bpf_map_save_memcg(map);
1662 	bpf_token_put(token);
1663 
1664 	err = bpf_map_new_fd(map, f_flags);
1665 	if (err < 0) {
1666 		/* failed to allocate fd.
1667 		 * bpf_map_put_with_uref() is needed because the above
1668 		 * bpf_map_alloc_id() has published the map
1669 		 * to the userspace and the userspace may
1670 		 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
1671 		 */
1672 		bpf_map_put_with_uref(map);
1673 		return err;
1674 	}
1675 
1676 	return err;
1677 
1678 free_map_sec:
1679 	security_bpf_map_free(map);
1680 free_map:
1681 	if (map)
1682 		bpf_map_free(map);
1683 	bpf_token_put(token);
1684 	return err;
1685 }
1686 
1687 void bpf_map_inc(struct bpf_map *map)
1688 {
1689 	atomic64_inc(&map->refcnt);
1690 }
1691 EXPORT_SYMBOL_GPL(bpf_map_inc);
1692 
1693 void bpf_map_inc_with_uref(struct bpf_map *map)
1694 {
1695 	atomic64_inc(&map->refcnt);
1696 	atomic64_inc(&map->usercnt);
1697 }
1698 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
1699 
1700 struct bpf_map *bpf_map_get(u32 ufd)
1701 {
1702 	CLASS(fd, f)(ufd);
1703 	struct bpf_map *map = __bpf_map_get(f);
1704 
1705 	if (!IS_ERR(map))
1706 		bpf_map_inc(map);
1707 
1708 	return map;
1709 }
1710 EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL");
1711 
1712 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
1713 {
1714 	CLASS(fd, f)(ufd);
1715 	struct bpf_map *map = __bpf_map_get(f);
1716 
1717 	if (!IS_ERR(map))
1718 		bpf_map_inc_with_uref(map);
1719 
1720 	return map;
1721 }
1722 
1723 /* map_idr_lock should have been held or the map should have been
1724  * protected by rcu read lock.
1725  */
1726 struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
1727 {
1728 	int refold;
1729 
1730 	refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
1731 	if (!refold)
1732 		return ERR_PTR(-ENOENT);
1733 	if (uref)
1734 		atomic64_inc(&map->usercnt);
1735 
1736 	return map;
1737 }
1738 
1739 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
1740 {
1741 	lockdep_assert(rcu_read_lock_held());
1742 	return __bpf_map_inc_not_zero(map, false);
1743 }
1744 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
1745 
1746 int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
1747 				bool delete)
1748 {
1749 	return -ENOTSUPP;
1750 }
1751 
1752 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
1753 {
1754 	if (key_size)
1755 		return vmemdup_user(ukey, key_size);
1756 
1757 	if (ukey)
1758 		return ERR_PTR(-EINVAL);
1759 
1760 	return NULL;
1761 }
1762 
1763 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
1764 {
1765 	if (key_size)
1766 		return kvmemdup_bpfptr(ukey, key_size);
1767 
1768 	if (!bpfptr_is_null(ukey))
1769 		return ERR_PTR(-EINVAL);
1770 
1771 	return NULL;
1772 }
1773 
1774 /* last field in 'union bpf_attr' used by this command */
1775 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
1776 
1777 static int map_lookup_elem(union bpf_attr *attr)
1778 {
1779 	void __user *ukey = u64_to_user_ptr(attr->key);
1780 	void __user *uvalue = u64_to_user_ptr(attr->value);
1781 	struct bpf_map *map;
1782 	void *key, *value;
1783 	u32 value_size;
1784 	int err;
1785 
1786 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1787 		return -EINVAL;
1788 
1789 	CLASS(fd, f)(attr->map_fd);
1790 	map = __bpf_map_get(f);
1791 	if (IS_ERR(map))
1792 		return PTR_ERR(map);
1793 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
1794 		return -EPERM;
1795 
1796 	err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU);
1797 	if (err)
1798 		return err;
1799 
1800 	key = __bpf_copy_key(ukey, map->key_size);
1801 	if (IS_ERR(key))
1802 		return PTR_ERR(key);
1803 
1804 	value_size = bpf_map_value_size(map, attr->flags);
1805 
1806 	err = -ENOMEM;
1807 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
1808 	if (!value)
1809 		goto free_key;
1810 
1811 	if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
1812 		if (copy_from_user(value, uvalue, value_size))
1813 			err = -EFAULT;
1814 		else
1815 			err = bpf_map_copy_value(map, key, value, attr->flags);
1816 		goto free_value;
1817 	}
1818 
1819 	err = bpf_map_copy_value(map, key, value, attr->flags);
1820 	if (err)
1821 		goto free_value;
1822 
1823 	err = -EFAULT;
1824 	if (copy_to_user(uvalue, value, value_size) != 0)
1825 		goto free_value;
1826 
1827 	err = 0;
1828 
1829 free_value:
1830 	kvfree(value);
1831 free_key:
1832 	kvfree(key);
1833 	return err;
1834 }
1835 
1836 
1837 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
1838 
1839 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
1840 {
1841 	bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1842 	bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
1843 	struct bpf_map *map;
1844 	void *key, *value;
1845 	u32 value_size;
1846 	int err;
1847 
1848 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1849 		return -EINVAL;
1850 
1851 	CLASS(fd, f)(attr->map_fd);
1852 	map = __bpf_map_get(f);
1853 	if (IS_ERR(map))
1854 		return PTR_ERR(map);
1855 	bpf_map_write_active_inc(map);
1856 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1857 		err = -EPERM;
1858 		goto err_put;
1859 	}
1860 
1861 	err = bpf_map_check_op_flags(map, attr->flags, ~0);
1862 	if (err)
1863 		goto err_put;
1864 
1865 	key = ___bpf_copy_key(ukey, map->key_size);
1866 	if (IS_ERR(key)) {
1867 		err = PTR_ERR(key);
1868 		goto err_put;
1869 	}
1870 
1871 	value_size = bpf_map_value_size(map, attr->flags);
1872 	value = kvmemdup_bpfptr(uvalue, value_size);
1873 	if (IS_ERR(value)) {
1874 		err = PTR_ERR(value);
1875 		goto free_key;
1876 	}
1877 
1878 	err = bpf_map_update_value(map, fd_file(f), key, value, attr->flags);
1879 	if (!err)
1880 		maybe_wait_bpf_programs(map);
1881 
1882 	kvfree(value);
1883 free_key:
1884 	kvfree(key);
1885 err_put:
1886 	bpf_map_write_active_dec(map);
1887 	return err;
1888 }
1889 
1890 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1891 
1892 static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
1893 {
1894 	bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1895 	struct bpf_map *map;
1896 	void *key;
1897 	int err;
1898 
1899 	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1900 		return -EINVAL;
1901 
1902 	CLASS(fd, f)(attr->map_fd);
1903 	map = __bpf_map_get(f);
1904 	if (IS_ERR(map))
1905 		return PTR_ERR(map);
1906 	bpf_map_write_active_inc(map);
1907 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1908 		err = -EPERM;
1909 		goto err_put;
1910 	}
1911 
1912 	key = ___bpf_copy_key(ukey, map->key_size);
1913 	if (IS_ERR(key)) {
1914 		err = PTR_ERR(key);
1915 		goto err_put;
1916 	}
1917 
1918 	if (bpf_map_is_offloaded(map)) {
1919 		err = bpf_map_offload_delete_elem(map, key);
1920 		goto out;
1921 	} else if (IS_FD_PROG_ARRAY(map) ||
1922 		   map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1923 		/* These maps require sleepable context */
1924 		err = map->ops->map_delete_elem(map, key);
1925 		goto out;
1926 	}
1927 
1928 	bpf_disable_instrumentation();
1929 	rcu_read_lock();
1930 	err = map->ops->map_delete_elem(map, key);
1931 	rcu_read_unlock();
1932 	bpf_enable_instrumentation();
1933 	if (!err)
1934 		maybe_wait_bpf_programs(map);
1935 out:
1936 	kvfree(key);
1937 err_put:
1938 	bpf_map_write_active_dec(map);
1939 	return err;
1940 }
1941 
1942 /* last field in 'union bpf_attr' used by this command */
1943 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1944 
1945 static int map_get_next_key(union bpf_attr *attr)
1946 {
1947 	void __user *ukey = u64_to_user_ptr(attr->key);
1948 	void __user *unext_key = u64_to_user_ptr(attr->next_key);
1949 	struct bpf_map *map;
1950 	void *key, *next_key;
1951 	int err;
1952 
1953 	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1954 		return -EINVAL;
1955 
1956 	CLASS(fd, f)(attr->map_fd);
1957 	map = __bpf_map_get(f);
1958 	if (IS_ERR(map))
1959 		return PTR_ERR(map);
1960 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
1961 		return -EPERM;
1962 
1963 	if (ukey) {
1964 		key = __bpf_copy_key(ukey, map->key_size);
1965 		if (IS_ERR(key))
1966 			return PTR_ERR(key);
1967 	} else {
1968 		key = NULL;
1969 	}
1970 
1971 	err = -ENOMEM;
1972 	next_key = kvmalloc(map->key_size, GFP_USER);
1973 	if (!next_key)
1974 		goto free_key;
1975 
1976 	if (bpf_map_is_offloaded(map)) {
1977 		err = bpf_map_offload_get_next_key(map, key, next_key);
1978 		goto out;
1979 	}
1980 
1981 	rcu_read_lock();
1982 	err = map->ops->map_get_next_key(map, key, next_key);
1983 	rcu_read_unlock();
1984 out:
1985 	if (err)
1986 		goto free_next_key;
1987 
1988 	err = -EFAULT;
1989 	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1990 		goto free_next_key;
1991 
1992 	err = 0;
1993 
1994 free_next_key:
1995 	kvfree(next_key);
1996 free_key:
1997 	kvfree(key);
1998 	return err;
1999 }
2000 
2001 int generic_map_delete_batch(struct bpf_map *map,
2002 			     const union bpf_attr *attr,
2003 			     union bpf_attr __user *uattr)
2004 {
2005 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
2006 	u32 cp, max_count;
2007 	int err = 0;
2008 	void *key;
2009 
2010 	if (attr->batch.elem_flags & ~BPF_F_LOCK)
2011 		return -EINVAL;
2012 
2013 	if ((attr->batch.elem_flags & BPF_F_LOCK) &&
2014 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
2015 		return -EINVAL;
2016 	}
2017 
2018 	max_count = attr->batch.count;
2019 	if (!max_count)
2020 		return 0;
2021 
2022 	if (put_user(0, &uattr->batch.count))
2023 		return -EFAULT;
2024 
2025 	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2026 	if (!key)
2027 		return -ENOMEM;
2028 
2029 	for (cp = 0; cp < max_count; cp++) {
2030 		err = -EFAULT;
2031 		if (copy_from_user(key, keys + cp * map->key_size,
2032 				   map->key_size))
2033 			break;
2034 
2035 		if (bpf_map_is_offloaded(map)) {
2036 			err = bpf_map_offload_delete_elem(map, key);
2037 			break;
2038 		}
2039 
2040 		bpf_disable_instrumentation();
2041 		rcu_read_lock();
2042 		err = map->ops->map_delete_elem(map, key);
2043 		rcu_read_unlock();
2044 		bpf_enable_instrumentation();
2045 		if (err)
2046 			break;
2047 		cond_resched();
2048 	}
2049 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
2050 		err = -EFAULT;
2051 
2052 	kvfree(key);
2053 
2054 	return err;
2055 }
2056 
2057 int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
2058 			     const union bpf_attr *attr,
2059 			     union bpf_attr __user *uattr)
2060 {
2061 	void __user *values = u64_to_user_ptr(attr->batch.values);
2062 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
2063 	u32 value_size, cp, max_count;
2064 	void *key, *value;
2065 	int err = 0;
2066 
2067 	err = bpf_map_check_op_flags(map, attr->batch.elem_flags,
2068 				     BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS);
2069 	if (err)
2070 		return err;
2071 
2072 	value_size = bpf_map_value_size(map, attr->batch.elem_flags);
2073 
2074 	max_count = attr->batch.count;
2075 	if (!max_count)
2076 		return 0;
2077 
2078 	if (put_user(0, &uattr->batch.count))
2079 		return -EFAULT;
2080 
2081 	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2082 	if (!key)
2083 		return -ENOMEM;
2084 
2085 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
2086 	if (!value) {
2087 		kvfree(key);
2088 		return -ENOMEM;
2089 	}
2090 
2091 	for (cp = 0; cp < max_count; cp++) {
2092 		err = -EFAULT;
2093 		if (copy_from_user(key, keys + cp * map->key_size,
2094 		    map->key_size) ||
2095 		    copy_from_user(value, values + cp * value_size, value_size))
2096 			break;
2097 
2098 		err = bpf_map_update_value(map, map_file, key, value,
2099 					   attr->batch.elem_flags);
2100 
2101 		if (err)
2102 			break;
2103 		cond_resched();
2104 	}
2105 
2106 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
2107 		err = -EFAULT;
2108 
2109 	kvfree(value);
2110 	kvfree(key);
2111 
2112 	return err;
2113 }
2114 
2115 int generic_map_lookup_batch(struct bpf_map *map,
2116 				    const union bpf_attr *attr,
2117 				    union bpf_attr __user *uattr)
2118 {
2119 	void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
2120 	void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
2121 	void __user *values = u64_to_user_ptr(attr->batch.values);
2122 	void __user *keys = u64_to_user_ptr(attr->batch.keys);
2123 	void *buf, *buf_prevkey, *prev_key, *key, *value;
2124 	u32 value_size, cp, max_count;
2125 	int err;
2126 
2127 	err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU);
2128 	if (err)
2129 		return err;
2130 
2131 	value_size = bpf_map_value_size(map, attr->batch.elem_flags);
2132 
2133 	max_count = attr->batch.count;
2134 	if (!max_count)
2135 		return 0;
2136 
2137 	if (put_user(0, &uattr->batch.count))
2138 		return -EFAULT;
2139 
2140 	buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2141 	if (!buf_prevkey)
2142 		return -ENOMEM;
2143 
2144 	buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
2145 	if (!buf) {
2146 		kvfree(buf_prevkey);
2147 		return -ENOMEM;
2148 	}
2149 
2150 	err = -EFAULT;
2151 	prev_key = NULL;
2152 	if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
2153 		goto free_buf;
2154 	key = buf;
2155 	value = key + map->key_size;
2156 	if (ubatch)
2157 		prev_key = buf_prevkey;
2158 
2159 	for (cp = 0; cp < max_count;) {
2160 		rcu_read_lock();
2161 		err = map->ops->map_get_next_key(map, prev_key, key);
2162 		rcu_read_unlock();
2163 		if (err)
2164 			break;
2165 		err = bpf_map_copy_value(map, key, value,
2166 					 attr->batch.elem_flags);
2167 
2168 		if (err == -ENOENT)
2169 			goto next_key;
2170 
2171 		if (err)
2172 			goto free_buf;
2173 
2174 		if (copy_to_user(keys + cp * map->key_size, key,
2175 				 map->key_size)) {
2176 			err = -EFAULT;
2177 			goto free_buf;
2178 		}
2179 		if (copy_to_user(values + cp * value_size, value, value_size)) {
2180 			err = -EFAULT;
2181 			goto free_buf;
2182 		}
2183 
2184 		cp++;
2185 next_key:
2186 		if (!prev_key)
2187 			prev_key = buf_prevkey;
2188 
2189 		swap(prev_key, key);
2190 		cond_resched();
2191 	}
2192 
2193 	if (err == -EFAULT)
2194 		goto free_buf;
2195 
2196 	if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
2197 		    (cp && copy_to_user(uobatch, prev_key, map->key_size))))
2198 		err = -EFAULT;
2199 
2200 free_buf:
2201 	kvfree(buf_prevkey);
2202 	kvfree(buf);
2203 	return err;
2204 }
2205 
2206 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
2207 
2208 static int map_lookup_and_delete_elem(union bpf_attr *attr)
2209 {
2210 	void __user *ukey = u64_to_user_ptr(attr->key);
2211 	void __user *uvalue = u64_to_user_ptr(attr->value);
2212 	struct bpf_map *map;
2213 	void *key, *value;
2214 	u32 value_size;
2215 	int err;
2216 
2217 	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
2218 		return -EINVAL;
2219 
2220 	if (attr->flags & ~BPF_F_LOCK)
2221 		return -EINVAL;
2222 
2223 	CLASS(fd, f)(attr->map_fd);
2224 	map = __bpf_map_get(f);
2225 	if (IS_ERR(map))
2226 		return PTR_ERR(map);
2227 	bpf_map_write_active_inc(map);
2228 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
2229 	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
2230 		err = -EPERM;
2231 		goto err_put;
2232 	}
2233 
2234 	if (attr->flags &&
2235 	    (map->map_type == BPF_MAP_TYPE_QUEUE ||
2236 	     map->map_type == BPF_MAP_TYPE_STACK)) {
2237 		err = -EINVAL;
2238 		goto err_put;
2239 	}
2240 
2241 	if ((attr->flags & BPF_F_LOCK) &&
2242 	    !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
2243 		err = -EINVAL;
2244 		goto err_put;
2245 	}
2246 
2247 	key = __bpf_copy_key(ukey, map->key_size);
2248 	if (IS_ERR(key)) {
2249 		err = PTR_ERR(key);
2250 		goto err_put;
2251 	}
2252 
2253 	value_size = bpf_map_value_size(map, 0);
2254 
2255 	err = -ENOMEM;
2256 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
2257 	if (!value)
2258 		goto free_key;
2259 
2260 	err = -ENOTSUPP;
2261 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
2262 	    map->map_type == BPF_MAP_TYPE_STACK) {
2263 		err = map->ops->map_pop_elem(map, value);
2264 	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
2265 		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
2266 		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
2267 		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
2268 		   map->map_type == BPF_MAP_TYPE_RHASH ||
2269 		   map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
2270 		if (!bpf_map_is_offloaded(map)) {
2271 			bpf_disable_instrumentation();
2272 			rcu_read_lock();
2273 			err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
2274 			rcu_read_unlock();
2275 			bpf_enable_instrumentation();
2276 		}
2277 	}
2278 
2279 	if (err)
2280 		goto free_value;
2281 
2282 	if (copy_to_user(uvalue, value, value_size) != 0) {
2283 		err = -EFAULT;
2284 		goto free_value;
2285 	}
2286 
2287 	err = 0;
2288 
2289 free_value:
2290 	kvfree(value);
2291 free_key:
2292 	kvfree(key);
2293 err_put:
2294 	bpf_map_write_active_dec(map);
2295 	return err;
2296 }
2297 
2298 #define BPF_MAP_FREEZE_LAST_FIELD map_fd
2299 
2300 static int map_freeze(const union bpf_attr *attr)
2301 {
2302 	int err = 0;
2303 	struct bpf_map *map;
2304 
2305 	if (CHECK_ATTR(BPF_MAP_FREEZE))
2306 		return -EINVAL;
2307 
2308 	CLASS(fd, f)(attr->map_fd);
2309 	map = __bpf_map_get(f);
2310 	if (IS_ERR(map))
2311 		return PTR_ERR(map);
2312 
2313 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record))
2314 		return -ENOTSUPP;
2315 
2316 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE))
2317 		return -EPERM;
2318 
2319 	mutex_lock(&map->freeze_mutex);
2320 	if (bpf_map_write_active(map)) {
2321 		err = -EBUSY;
2322 		goto err_put;
2323 	}
2324 	if (READ_ONCE(map->frozen)) {
2325 		err = -EBUSY;
2326 		goto err_put;
2327 	}
2328 
2329 	WRITE_ONCE(map->frozen, true);
2330 err_put:
2331 	mutex_unlock(&map->freeze_mutex);
2332 	return err;
2333 }
2334 
2335 static const struct bpf_prog_ops * const bpf_prog_types[] = {
2336 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
2337 	[_id] = & _name ## _prog_ops,
2338 #define BPF_MAP_TYPE(_id, _ops)
2339 #define BPF_LINK_TYPE(_id, _name)
2340 #include <linux/bpf_types.h>
2341 #undef BPF_PROG_TYPE
2342 #undef BPF_MAP_TYPE
2343 #undef BPF_LINK_TYPE
2344 };
2345 
2346 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
2347 {
2348 	const struct bpf_prog_ops *ops;
2349 
2350 	if (type >= ARRAY_SIZE(bpf_prog_types))
2351 		return -EINVAL;
2352 	type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
2353 	ops = bpf_prog_types[type];
2354 	if (!ops)
2355 		return -EINVAL;
2356 
2357 	if (!bpf_prog_is_offloaded(prog->aux))
2358 		prog->aux->ops = ops;
2359 	else
2360 		prog->aux->ops = &bpf_offload_prog_ops;
2361 	prog->type = type;
2362 	return 0;
2363 }
2364 
2365 enum bpf_audit {
2366 	BPF_AUDIT_LOAD,
2367 	BPF_AUDIT_UNLOAD,
2368 	BPF_AUDIT_MAX,
2369 };
2370 
2371 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
2372 	[BPF_AUDIT_LOAD]   = "LOAD",
2373 	[BPF_AUDIT_UNLOAD] = "UNLOAD",
2374 };
2375 
2376 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
2377 {
2378 	struct audit_context *ctx = NULL;
2379 	struct audit_buffer *ab;
2380 
2381 	if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
2382 		return;
2383 	if (audit_enabled == AUDIT_OFF)
2384 		return;
2385 	if (!in_hardirq() && !irqs_disabled())
2386 		ctx = audit_context();
2387 	ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
2388 	if (unlikely(!ab))
2389 		return;
2390 	audit_log_format(ab, "prog-id=%u op=%s",
2391 			 prog->aux->id, bpf_audit_str[op]);
2392 	audit_log_end(ab);
2393 }
2394 
2395 static int bpf_prog_alloc_id(struct bpf_prog *prog)
2396 {
2397 	int id;
2398 
2399 	idr_preload(GFP_KERNEL);
2400 	spin_lock_bh(&prog_idr_lock);
2401 	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
2402 	if (id > 0)
2403 		prog->aux->id = id;
2404 	spin_unlock_bh(&prog_idr_lock);
2405 	idr_preload_end();
2406 
2407 	/* id is in [1, INT_MAX) */
2408 	if (WARN_ON_ONCE(!id))
2409 		return -ENOSPC;
2410 
2411 	return id > 0 ? 0 : id;
2412 }
2413 
2414 void bpf_prog_free_id(struct bpf_prog *prog)
2415 {
2416 	unsigned long flags;
2417 
2418 	/* cBPF to eBPF migrations are currently not in the idr store.
2419 	 * Offloaded programs are removed from the store when their device
2420 	 * disappears - even if someone grabs an fd to them they are unusable,
2421 	 * simply waiting for refcnt to drop to be freed.
2422 	 */
2423 	if (!prog->aux->id)
2424 		return;
2425 
2426 	spin_lock_irqsave(&prog_idr_lock, flags);
2427 	idr_remove(&prog_idr, prog->aux->id);
2428 	prog->aux->id = 0;
2429 	spin_unlock_irqrestore(&prog_idr_lock, flags);
2430 }
2431 
2432 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
2433 {
2434 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
2435 
2436 	kvfree(aux->func_info);
2437 	kfree(aux->func_info_aux);
2438 	free_uid(aux->user);
2439 	security_bpf_prog_free(aux->prog);
2440 	bpf_prog_free(aux->prog);
2441 }
2442 
2443 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
2444 {
2445 	bpf_prog_kallsyms_del_all(prog);
2446 	btf_put(prog->aux->btf);
2447 	module_put(prog->aux->mod);
2448 	kvfree(prog->aux->jited_linfo);
2449 	kvfree(prog->aux->linfo);
2450 	kfree(prog->aux->kfunc_tab);
2451 	kfree(prog->aux->ctx_arg_info);
2452 	if (prog->aux->attach_btf)
2453 		btf_put(prog->aux->attach_btf);
2454 
2455 	if (deferred) {
2456 		if (prog->sleepable)
2457 			call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
2458 		else
2459 			call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
2460 	} else {
2461 		__bpf_prog_put_rcu(&prog->aux->rcu);
2462 	}
2463 }
2464 
2465 static void bpf_prog_put_deferred(struct work_struct *work)
2466 {
2467 	struct bpf_prog_aux *aux;
2468 	struct bpf_prog *prog;
2469 
2470 	aux = container_of(work, struct bpf_prog_aux, work);
2471 	prog = aux->prog;
2472 	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
2473 	bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
2474 	bpf_prog_free_id(prog);
2475 	__bpf_prog_put_noref(prog, true);
2476 }
2477 
2478 static void __bpf_prog_put(struct bpf_prog *prog)
2479 {
2480 	struct bpf_prog_aux *aux = prog->aux;
2481 
2482 	if (atomic64_dec_and_test(&aux->refcnt)) {
2483 		if (in_hardirq() || irqs_disabled()) {
2484 			INIT_WORK(&aux->work, bpf_prog_put_deferred);
2485 			schedule_work(&aux->work);
2486 		} else {
2487 			bpf_prog_put_deferred(&aux->work);
2488 		}
2489 	}
2490 }
2491 
2492 void bpf_prog_put(struct bpf_prog *prog)
2493 {
2494 	__bpf_prog_put(prog);
2495 }
2496 EXPORT_SYMBOL_GPL(bpf_prog_put);
2497 
2498 static int bpf_prog_release(struct inode *inode, struct file *filp)
2499 {
2500 	struct bpf_prog *prog = filp->private_data;
2501 
2502 	bpf_prog_put(prog);
2503 	return 0;
2504 }
2505 
2506 struct bpf_prog_kstats {
2507 	u64 nsecs;
2508 	u64 cnt;
2509 	u64 misses;
2510 };
2511 
2512 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
2513 {
2514 	struct bpf_prog_stats *stats;
2515 	unsigned int flags;
2516 
2517 	if (unlikely(!prog->stats))
2518 		return;
2519 
2520 	stats = this_cpu_ptr(prog->stats);
2521 	flags = u64_stats_update_begin_irqsave(&stats->syncp);
2522 	u64_stats_inc(&stats->misses);
2523 	u64_stats_update_end_irqrestore(&stats->syncp, flags);
2524 }
2525 
2526 static void bpf_prog_get_stats(const struct bpf_prog *prog,
2527 			       struct bpf_prog_kstats *stats)
2528 {
2529 	u64 nsecs = 0, cnt = 0, misses = 0;
2530 	int cpu;
2531 
2532 	for_each_possible_cpu(cpu) {
2533 		const struct bpf_prog_stats *st;
2534 		unsigned int start;
2535 		u64 tnsecs, tcnt, tmisses;
2536 
2537 		st = per_cpu_ptr(prog->stats, cpu);
2538 		do {
2539 			start = u64_stats_fetch_begin(&st->syncp);
2540 			tnsecs = u64_stats_read(&st->nsecs);
2541 			tcnt = u64_stats_read(&st->cnt);
2542 			tmisses = u64_stats_read(&st->misses);
2543 		} while (u64_stats_fetch_retry(&st->syncp, start));
2544 		nsecs += tnsecs;
2545 		cnt += tcnt;
2546 		misses += tmisses;
2547 	}
2548 	stats->nsecs = nsecs;
2549 	stats->cnt = cnt;
2550 	stats->misses = misses;
2551 }
2552 
2553 #ifdef CONFIG_PROC_FS
2554 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
2555 {
2556 	const struct bpf_prog *prog = filp->private_data;
2557 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
2558 	struct bpf_prog_kstats stats;
2559 
2560 	bpf_prog_get_stats(prog, &stats);
2561 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2562 	seq_printf(m,
2563 		   "prog_type:\t%u\n"
2564 		   "prog_jited:\t%u\n"
2565 		   "prog_tag:\t%s\n"
2566 		   "memlock:\t%llu\n"
2567 		   "prog_id:\t%u\n"
2568 		   "run_time_ns:\t%llu\n"
2569 		   "run_cnt:\t%llu\n"
2570 		   "recursion_misses:\t%llu\n"
2571 		   "verified_insns:\t%u\n",
2572 		   prog->type,
2573 		   prog->jited,
2574 		   prog_tag,
2575 		   prog->pages * 1ULL << PAGE_SHIFT,
2576 		   prog->aux->id,
2577 		   stats.nsecs,
2578 		   stats.cnt,
2579 		   stats.misses,
2580 		   prog->aux->verified_insns);
2581 }
2582 #endif
2583 
2584 const struct file_operations bpf_prog_fops = {
2585 #ifdef CONFIG_PROC_FS
2586 	.show_fdinfo	= bpf_prog_show_fdinfo,
2587 #endif
2588 	.release	= bpf_prog_release,
2589 	.read		= bpf_dummy_read,
2590 	.write		= bpf_dummy_write,
2591 };
2592 
2593 int bpf_prog_new_fd(struct bpf_prog *prog)
2594 {
2595 	int ret;
2596 
2597 	ret = security_bpf_prog(prog);
2598 	if (ret < 0)
2599 		return ret;
2600 
2601 	return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
2602 				O_RDWR | O_CLOEXEC);
2603 }
2604 
2605 void bpf_prog_add(struct bpf_prog *prog, int i)
2606 {
2607 	atomic64_add(i, &prog->aux->refcnt);
2608 }
2609 EXPORT_SYMBOL_GPL(bpf_prog_add);
2610 
2611 void bpf_prog_sub(struct bpf_prog *prog, int i)
2612 {
2613 	/* Only to be used for undoing previous bpf_prog_add() in some
2614 	 * error path. We still know that another entity in our call
2615 	 * path holds a reference to the program, thus atomic_sub() can
2616 	 * be safely used in such cases!
2617 	 */
2618 	WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
2619 }
2620 EXPORT_SYMBOL_GPL(bpf_prog_sub);
2621 
2622 void bpf_prog_inc(struct bpf_prog *prog)
2623 {
2624 	atomic64_inc(&prog->aux->refcnt);
2625 }
2626 EXPORT_SYMBOL_GPL(bpf_prog_inc);
2627 
2628 /* prog_idr_lock should have been held */
2629 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
2630 {
2631 	int refold;
2632 
2633 	refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
2634 
2635 	if (!refold)
2636 		return ERR_PTR(-ENOENT);
2637 
2638 	return prog;
2639 }
2640 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
2641 
2642 bool bpf_prog_get_ok(struct bpf_prog *prog,
2643 			    enum bpf_prog_type *attach_type, bool attach_drv)
2644 {
2645 	/* not an attachment, just a refcount inc, always allow */
2646 	if (!attach_type)
2647 		return true;
2648 
2649 	if (prog->type != *attach_type)
2650 		return false;
2651 	if (bpf_prog_is_offloaded(prog->aux) && !attach_drv)
2652 		return false;
2653 
2654 	return true;
2655 }
2656 
2657 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
2658 				       bool attach_drv)
2659 {
2660 	CLASS(fd, f)(ufd);
2661 	struct bpf_prog *prog;
2662 
2663 	if (fd_empty(f))
2664 		return ERR_PTR(-EBADF);
2665 	if (fd_file(f)->f_op != &bpf_prog_fops)
2666 		return ERR_PTR(-EINVAL);
2667 
2668 	prog = fd_file(f)->private_data;
2669 	if (!bpf_prog_get_ok(prog, attach_type, attach_drv))
2670 		return ERR_PTR(-EINVAL);
2671 
2672 	bpf_prog_inc(prog);
2673 	return prog;
2674 }
2675 
2676 struct bpf_prog *bpf_prog_get(u32 ufd)
2677 {
2678 	return __bpf_prog_get(ufd, NULL, false);
2679 }
2680 
2681 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
2682 				       bool attach_drv)
2683 {
2684 	return __bpf_prog_get(ufd, &type, attach_drv);
2685 }
2686 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
2687 
2688 /* Initially all BPF programs could be loaded w/o specifying
2689  * expected_attach_type. Later for some of them specifying expected_attach_type
2690  * at load time became required so that program could be validated properly.
2691  * Programs of types that are allowed to be loaded both w/ and w/o (for
2692  * backward compatibility) expected_attach_type, should have the default attach
2693  * type assigned to expected_attach_type for the latter case, so that it can be
2694  * validated later at attach time.
2695  *
2696  * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
2697  * prog type requires it but has some attach types that have to be backward
2698  * compatible.
2699  */
2700 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2701 {
2702 	switch (attr->prog_type) {
2703 	case BPF_PROG_TYPE_CGROUP_SOCK:
2704 		/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2705 		 * exist so checking for non-zero is the way to go here.
2706 		 */
2707 		if (!attr->expected_attach_type)
2708 			attr->expected_attach_type =
2709 				BPF_CGROUP_INET_SOCK_CREATE;
2710 		break;
2711 	case BPF_PROG_TYPE_SK_REUSEPORT:
2712 		if (!attr->expected_attach_type)
2713 			attr->expected_attach_type =
2714 				BPF_SK_REUSEPORT_SELECT;
2715 		break;
2716 	}
2717 }
2718 
2719 static int
2720 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
2721 			   enum bpf_attach_type expected_attach_type,
2722 			   struct btf *attach_btf, u32 btf_id,
2723 			   struct bpf_prog *dst_prog,
2724 			   bool multi_func)
2725 {
2726 	if (btf_id) {
2727 		if (btf_id > BTF_MAX_TYPE)
2728 			return -EINVAL;
2729 
2730 		if (!attach_btf && !dst_prog)
2731 			return -EINVAL;
2732 
2733 		switch (prog_type) {
2734 		case BPF_PROG_TYPE_TRACING:
2735 		case BPF_PROG_TYPE_LSM:
2736 		case BPF_PROG_TYPE_STRUCT_OPS:
2737 		case BPF_PROG_TYPE_EXT:
2738 			break;
2739 		default:
2740 			return -EINVAL;
2741 		}
2742 	}
2743 
2744 	if (multi_func) {
2745 		if (prog_type != BPF_PROG_TYPE_TRACING)
2746 			return -EINVAL;
2747 		if (!attach_btf || btf_id)
2748 			return -EINVAL;
2749 		return 0;
2750 	}
2751 
2752 	if (attach_btf && (!btf_id || dst_prog))
2753 		return -EINVAL;
2754 
2755 	if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
2756 	    prog_type != BPF_PROG_TYPE_EXT)
2757 		return -EINVAL;
2758 
2759 	switch (prog_type) {
2760 	case BPF_PROG_TYPE_CGROUP_SOCK:
2761 		switch (expected_attach_type) {
2762 		case BPF_CGROUP_INET_SOCK_CREATE:
2763 		case BPF_CGROUP_INET_SOCK_RELEASE:
2764 		case BPF_CGROUP_INET4_POST_BIND:
2765 		case BPF_CGROUP_INET6_POST_BIND:
2766 			return 0;
2767 		default:
2768 			return -EINVAL;
2769 		}
2770 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2771 		switch (expected_attach_type) {
2772 		case BPF_CGROUP_INET4_BIND:
2773 		case BPF_CGROUP_INET6_BIND:
2774 		case BPF_CGROUP_INET4_CONNECT:
2775 		case BPF_CGROUP_INET6_CONNECT:
2776 		case BPF_CGROUP_UNIX_CONNECT:
2777 		case BPF_CGROUP_INET4_GETPEERNAME:
2778 		case BPF_CGROUP_INET6_GETPEERNAME:
2779 		case BPF_CGROUP_UNIX_GETPEERNAME:
2780 		case BPF_CGROUP_INET4_GETSOCKNAME:
2781 		case BPF_CGROUP_INET6_GETSOCKNAME:
2782 		case BPF_CGROUP_UNIX_GETSOCKNAME:
2783 		case BPF_CGROUP_UDP4_SENDMSG:
2784 		case BPF_CGROUP_UDP6_SENDMSG:
2785 		case BPF_CGROUP_UNIX_SENDMSG:
2786 		case BPF_CGROUP_UDP4_RECVMSG:
2787 		case BPF_CGROUP_UDP6_RECVMSG:
2788 		case BPF_CGROUP_UNIX_RECVMSG:
2789 			return 0;
2790 		default:
2791 			return -EINVAL;
2792 		}
2793 	case BPF_PROG_TYPE_CGROUP_SKB:
2794 		switch (expected_attach_type) {
2795 		case BPF_CGROUP_INET_INGRESS:
2796 		case BPF_CGROUP_INET_EGRESS:
2797 			return 0;
2798 		default:
2799 			return -EINVAL;
2800 		}
2801 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2802 		switch (expected_attach_type) {
2803 		case BPF_CGROUP_SETSOCKOPT:
2804 		case BPF_CGROUP_GETSOCKOPT:
2805 			return 0;
2806 		default:
2807 			return -EINVAL;
2808 		}
2809 	case BPF_PROG_TYPE_SK_LOOKUP:
2810 		if (expected_attach_type == BPF_SK_LOOKUP)
2811 			return 0;
2812 		return -EINVAL;
2813 	case BPF_PROG_TYPE_SK_REUSEPORT:
2814 		switch (expected_attach_type) {
2815 		case BPF_SK_REUSEPORT_SELECT:
2816 		case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
2817 			return 0;
2818 		default:
2819 			return -EINVAL;
2820 		}
2821 	case BPF_PROG_TYPE_NETFILTER:
2822 		if (expected_attach_type == BPF_NETFILTER)
2823 			return 0;
2824 		return -EINVAL;
2825 	case BPF_PROG_TYPE_SYSCALL:
2826 	case BPF_PROG_TYPE_EXT:
2827 		if (expected_attach_type)
2828 			return -EINVAL;
2829 		fallthrough;
2830 	default:
2831 		return 0;
2832 	}
2833 }
2834 
2835 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2836 {
2837 	switch (prog_type) {
2838 	case BPF_PROG_TYPE_SCHED_CLS:
2839 	case BPF_PROG_TYPE_SCHED_ACT:
2840 	case BPF_PROG_TYPE_XDP:
2841 	case BPF_PROG_TYPE_LWT_IN:
2842 	case BPF_PROG_TYPE_LWT_OUT:
2843 	case BPF_PROG_TYPE_LWT_XMIT:
2844 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2845 	case BPF_PROG_TYPE_SK_SKB:
2846 	case BPF_PROG_TYPE_SK_MSG:
2847 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
2848 	case BPF_PROG_TYPE_CGROUP_DEVICE:
2849 	case BPF_PROG_TYPE_CGROUP_SOCK:
2850 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2851 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2852 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
2853 	case BPF_PROG_TYPE_SOCK_OPS:
2854 	case BPF_PROG_TYPE_EXT: /* extends any prog */
2855 	case BPF_PROG_TYPE_NETFILTER:
2856 		return true;
2857 	case BPF_PROG_TYPE_CGROUP_SKB:
2858 		/* always unpriv */
2859 	case BPF_PROG_TYPE_SK_REUSEPORT:
2860 		/* equivalent to SOCKET_FILTER. need CAP_BPF only */
2861 	default:
2862 		return false;
2863 	}
2864 }
2865 
2866 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2867 {
2868 	switch (prog_type) {
2869 	case BPF_PROG_TYPE_KPROBE:
2870 	case BPF_PROG_TYPE_TRACEPOINT:
2871 	case BPF_PROG_TYPE_PERF_EVENT:
2872 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
2873 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2874 	case BPF_PROG_TYPE_TRACING:
2875 	case BPF_PROG_TYPE_LSM:
2876 	case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2877 	case BPF_PROG_TYPE_EXT: /* extends any prog */
2878 		return true;
2879 	default:
2880 		return false;
2881 	}
2882 }
2883 
2884 static enum bpf_sig_keyring bpf_classify_keyring(s32 keyring_id)
2885 {
2886 	switch (keyring_id) {
2887 	case 0:
2888 		return BPF_SIG_KEYRING_BUILTIN;
2889 	case (s32)(unsigned long)VERIFY_USE_SECONDARY_KEYRING:
2890 		return BPF_SIG_KEYRING_SECONDARY;
2891 	case (s32)(unsigned long)VERIFY_USE_PLATFORM_KEYRING:
2892 		return BPF_SIG_KEYRING_PLATFORM;
2893 	default:
2894 		return BPF_SIG_KEYRING_USER;
2895 	}
2896 }
2897 
2898 static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr,
2899 				     bool is_kernel, s32 *keyring_serial)
2900 {
2901 	bpfptr_t usig = make_bpfptr(attr->signature, is_kernel);
2902 	struct bpf_dynptr_kern sig_ptr, insns_ptr;
2903 	struct bpf_key *key = NULL;
2904 	void *sig;
2905 	int err = 0;
2906 
2907 	/*
2908 	 * Don't attempt to use kmalloc_large or vmalloc for signatures.
2909 	 * Practical signature for BPF program should be below this limit.
2910 	 */
2911 	if (attr->signature_size > KMALLOC_MAX_CACHE_SIZE)
2912 		return -EINVAL;
2913 
2914 	if (system_keyring_id_check(attr->keyring_id) == 0)
2915 		key = bpf_lookup_system_key(attr->keyring_id);
2916 	else
2917 		key = bpf_lookup_user_key(attr->keyring_id, 0);
2918 
2919 	if (!key)
2920 		return -EINVAL;
2921 
2922 	sig = kvmemdup_bpfptr(usig, attr->signature_size);
2923 	if (IS_ERR(sig)) {
2924 		bpf_key_put(key);
2925 		return PTR_ERR(sig);
2926 	}
2927 
2928 	bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0,
2929 			attr->signature_size);
2930 	bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0,
2931 			prog->len * sizeof(struct bpf_insn));
2932 
2933 	err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr,
2934 					 (struct bpf_dynptr *)&sig_ptr, key);
2935 	if (!err)
2936 		*keyring_serial = bpf_key_serial(key);
2937 	bpf_key_put(key);
2938 	kvfree(sig);
2939 	return err;
2940 }
2941 
2942 static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
2943 {
2944 	int err;
2945 	int i;
2946 
2947 	for (i = 0; i < prog->aux->used_map_cnt; i++) {
2948 		if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY)
2949 			continue;
2950 
2951 		err = bpf_insn_array_ready(prog->aux->used_maps[i]);
2952 		if (err)
2953 			return err;
2954 	}
2955 
2956 	return 0;
2957 }
2958 
2959 extern int bpf_multi_func(void);
2960 int __init __used bpf_multi_func(void) { return 0; }
2961 
2962 BTF_ID_LIST_GLOBAL_SINGLE(bpf_multi_func_btf_id, func, bpf_multi_func)
2963 
2964 /* last field in 'union bpf_attr' used by this command */
2965 #define BPF_PROG_LOAD_LAST_FIELD keyring_id
2966 
2967 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log)
2968 {
2969 	enum bpf_prog_type type = attr->prog_type;
2970 	struct bpf_prog *prog, *dst_prog = NULL;
2971 	struct btf *attach_btf = NULL;
2972 	struct bpf_token *token = NULL;
2973 	bool bpf_cap;
2974 	int err;
2975 	char license[128];
2976 	bool multi_func;
2977 
2978 	if (CHECK_ATTR(BPF_PROG_LOAD))
2979 		return -EINVAL;
2980 
2981 	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2982 				 BPF_F_ANY_ALIGNMENT |
2983 				 BPF_F_TEST_STATE_FREQ |
2984 				 BPF_F_SLEEPABLE |
2985 				 BPF_F_TEST_RND_HI32 |
2986 				 BPF_F_XDP_HAS_FRAGS |
2987 				 BPF_F_XDP_DEV_BOUND_ONLY |
2988 				 BPF_F_TEST_REG_INVARIANTS |
2989 				 BPF_F_TOKEN_FD))
2990 		return -EINVAL;
2991 
2992 	bpf_prog_load_fixup_attach_type(attr);
2993 
2994 	if (attr->prog_flags & BPF_F_TOKEN_FD) {
2995 		token = bpf_token_get_from_fd(attr->prog_token_fd);
2996 		if (IS_ERR(token))
2997 			return PTR_ERR(token);
2998 		/* if current token doesn't grant prog loading permissions,
2999 		 * then we can't use this token, so ignore it and rely on
3000 		 * system-wide capabilities checks
3001 		 */
3002 		if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
3003 		    !bpf_token_allow_prog_type(token, attr->prog_type,
3004 					       attr->expected_attach_type)) {
3005 			bpf_token_put(token);
3006 			token = NULL;
3007 		}
3008 	}
3009 
3010 	bpf_cap = bpf_token_capable(token, CAP_BPF);
3011 	err = -EPERM;
3012 
3013 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
3014 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
3015 	    !bpf_cap)
3016 		goto put_token;
3017 
3018 	/* Intent here is for unprivileged_bpf_disabled to block BPF program
3019 	 * creation for unprivileged users; other actions depend
3020 	 * on fd availability and access to bpffs, so are dependent on
3021 	 * object creation success. Even with unprivileged BPF disabled,
3022 	 * capability checks are still carried out for these
3023 	 * and other operations.
3024 	 */
3025 	if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
3026 		goto put_token;
3027 
3028 	if (attr->insn_cnt == 0 ||
3029 	    attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
3030 		err = -E2BIG;
3031 		goto put_token;
3032 	}
3033 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
3034 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
3035 	    !bpf_cap)
3036 		goto put_token;
3037 
3038 	if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
3039 		goto put_token;
3040 	if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
3041 		goto put_token;
3042 
3043 	multi_func = is_tracing_multi(attr->expected_attach_type);
3044 
3045 	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
3046 	 * or btf, we need to check which one it is
3047 	 */
3048 	if (attr->attach_prog_fd) {
3049 		dst_prog = bpf_prog_get(attr->attach_prog_fd);
3050 		if (IS_ERR(dst_prog)) {
3051 			dst_prog = NULL;
3052 			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
3053 			if (IS_ERR(attach_btf)) {
3054 				err = -EINVAL;
3055 				goto put_token;
3056 			}
3057 			if (!btf_is_kernel(attach_btf)) {
3058 				/* attaching through specifying bpf_prog's BTF
3059 				 * objects directly might be supported eventually
3060 				 */
3061 				btf_put(attach_btf);
3062 				err = -ENOTSUPP;
3063 				goto put_token;
3064 			}
3065 		}
3066 	} else if (attr->attach_btf_id || multi_func) {
3067 		/* fall back to vmlinux BTF, if BTF type ID is specified */
3068 		attach_btf = bpf_get_btf_vmlinux();
3069 		if (IS_ERR(attach_btf)) {
3070 			err = PTR_ERR(attach_btf);
3071 			goto put_token;
3072 		}
3073 		if (!attach_btf) {
3074 			err = -EINVAL;
3075 			goto put_token;
3076 		}
3077 		btf_get(attach_btf);
3078 	}
3079 
3080 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
3081 				       attach_btf, attr->attach_btf_id,
3082 				       dst_prog, multi_func)) {
3083 		if (dst_prog)
3084 			bpf_prog_put(dst_prog);
3085 		if (attach_btf)
3086 			btf_put(attach_btf);
3087 		err = -EINVAL;
3088 		goto put_token;
3089 	}
3090 
3091 	/* plain bpf_prog allocation */
3092 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
3093 	if (!prog) {
3094 		if (dst_prog)
3095 			bpf_prog_put(dst_prog);
3096 		if (attach_btf)
3097 			btf_put(attach_btf);
3098 		err = -EINVAL;
3099 		goto put_token;
3100 	}
3101 
3102 	prog->expected_attach_type = attr->expected_attach_type;
3103 	prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
3104 	prog->aux->attach_btf = attach_btf;
3105 	prog->aux->attach_btf_id = multi_func ? bpf_multi_func_btf_id[0] : attr->attach_btf_id;
3106 	prog->aux->dst_prog = dst_prog;
3107 	prog->aux->dev_bound = !!attr->prog_ifindex;
3108 	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
3109 
3110 	/* move token into prog->aux, reuse taken refcnt */
3111 	prog->aux->token = token;
3112 	token = NULL;
3113 
3114 	prog->aux->user = get_current_user();
3115 	prog->len = attr->insn_cnt;
3116 
3117 	err = -EFAULT;
3118 	if (copy_from_bpfptr(prog->insns,
3119 			     make_bpfptr(attr->insns, uattr.is_kernel),
3120 			     bpf_prog_insn_size(prog)) != 0)
3121 		goto free_prog;
3122 	/* copy eBPF program license from user space */
3123 	if (strncpy_from_bpfptr(license,
3124 				make_bpfptr(attr->license, uattr.is_kernel),
3125 				sizeof(license) - 1) < 0)
3126 		goto free_prog;
3127 	license[sizeof(license) - 1] = 0;
3128 
3129 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
3130 	prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
3131 	if (attr->signature) {
3132 		err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel,
3133 						&prog->aux->sig.keyring_serial);
3134 		if (err)
3135 			goto free_prog;
3136 		prog->aux->sig.keyring_type = bpf_classify_keyring(attr->keyring_id);
3137 		prog->aux->sig.verdict = BPF_SIG_VERIFIED;
3138 	} else {
3139 		prog->aux->sig.keyring_type = BPF_SIG_KEYRING_NONE;
3140 		prog->aux->sig.verdict = BPF_SIG_UNSIGNED;
3141 	}
3142 	prog->orig_prog = NULL;
3143 	prog->jited = 0;
3144 
3145 	atomic64_set(&prog->aux->refcnt, 1);
3146 
3147 	if (bpf_prog_is_dev_bound(prog->aux)) {
3148 		err = bpf_prog_dev_bound_init(prog, attr);
3149 		if (err)
3150 			goto free_prog;
3151 	}
3152 
3153 	if (type == BPF_PROG_TYPE_EXT && dst_prog &&
3154 	    bpf_prog_is_dev_bound(dst_prog->aux)) {
3155 		err = bpf_prog_dev_bound_inherit(prog, dst_prog);
3156 		if (err)
3157 			goto free_prog;
3158 	}
3159 
3160 	/*
3161 	 * Bookkeeping for managing the program attachment chain.
3162 	 *
3163 	 * It might be tempting to set attach_tracing_prog flag at the attachment
3164 	 * time, but this will not prevent from loading bunch of tracing prog
3165 	 * first, then attach them one to another.
3166 	 *
3167 	 * The flag attach_tracing_prog is set for the whole program lifecycle, and
3168 	 * doesn't have to be cleared in bpf_tracing_link_release, since tracing
3169 	 * programs cannot change attachment target.
3170 	 */
3171 	if (type == BPF_PROG_TYPE_TRACING && dst_prog &&
3172 	    dst_prog->type == BPF_PROG_TYPE_TRACING) {
3173 		prog->aux->attach_tracing_prog = true;
3174 	}
3175 
3176 	/* find program type: socket_filter vs tracing_filter */
3177 	err = find_prog_type(type, prog);
3178 	if (err < 0)
3179 		goto free_prog;
3180 
3181 	prog->aux->load_time = ktime_get_boottime_ns();
3182 	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
3183 			       sizeof(attr->prog_name));
3184 	if (err < 0)
3185 		goto free_prog;
3186 
3187 	err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel);
3188 	if (err)
3189 		goto free_prog;
3190 
3191 	/* run eBPF verifier */
3192 	err = bpf_check(&prog, attr, uattr, attr_log);
3193 	if (err < 0)
3194 		goto free_used_maps;
3195 
3196 	err = bpf_prog_mark_insn_arrays_ready(prog);
3197 	if (err < 0)
3198 		goto free_used_maps;
3199 
3200 	err = bpf_prog_alloc_id(prog);
3201 	if (err)
3202 		goto free_used_maps;
3203 
3204 	/* Upon success of bpf_prog_alloc_id(), the BPF prog is
3205 	 * effectively publicly exposed. However, retrieving via
3206 	 * bpf_prog_get_fd_by_id() will take another reference,
3207 	 * therefore it cannot be gone underneath us.
3208 	 *
3209 	 * Only for the time /after/ successful bpf_prog_new_fd()
3210 	 * and before returning to userspace, we might just hold
3211 	 * one reference and any parallel close on that fd could
3212 	 * rip everything out. Hence, below notifications must
3213 	 * happen before bpf_prog_new_fd().
3214 	 *
3215 	 * Also, any failure handling from this point onwards must
3216 	 * be using bpf_prog_put() given the program is exposed.
3217 	 */
3218 	bpf_prog_kallsyms_add(prog);
3219 	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
3220 	bpf_audit_prog(prog, BPF_AUDIT_LOAD);
3221 
3222 	err = bpf_prog_new_fd(prog);
3223 	if (err < 0)
3224 		bpf_prog_put(prog);
3225 	return err;
3226 
3227 free_used_maps:
3228 	/* In case we have subprogs, we need to wait for a grace
3229 	 * period before we can tear down JIT memory since symbols
3230 	 * are already exposed under kallsyms.
3231 	 */
3232 	__bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
3233 	return err;
3234 
3235 free_prog:
3236 	free_uid(prog->aux->user);
3237 	if (prog->aux->attach_btf)
3238 		btf_put(prog->aux->attach_btf);
3239 	bpf_prog_free(prog);
3240 put_token:
3241 	bpf_token_put(token);
3242 	return err;
3243 }
3244 
3245 #define BPF_OBJ_LAST_FIELD path_fd
3246 
3247 static int bpf_obj_pin(const union bpf_attr *attr)
3248 {
3249 	int path_fd;
3250 
3251 	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD)
3252 		return -EINVAL;
3253 
3254 	/* path_fd has to be accompanied by BPF_F_PATH_FD flag */
3255 	if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
3256 		return -EINVAL;
3257 
3258 	path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
3259 	return bpf_obj_pin_user(attr->bpf_fd, path_fd,
3260 				u64_to_user_ptr(attr->pathname));
3261 }
3262 
3263 static int bpf_obj_get(const union bpf_attr *attr)
3264 {
3265 	int path_fd;
3266 
3267 	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
3268 	    attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD))
3269 		return -EINVAL;
3270 
3271 	/* path_fd has to be accompanied by BPF_F_PATH_FD flag */
3272 	if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
3273 		return -EINVAL;
3274 
3275 	path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
3276 	return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname),
3277 				attr->file_flags);
3278 }
3279 
3280 /* bpf_link_init_sleepable() allows to specify whether BPF link itself has
3281  * "sleepable" semantics, which normally would mean that BPF link's attach
3282  * hook can dereference link or link's underlying program for some time after
3283  * detachment due to RCU Tasks Trace-based lifetime protection scheme.
3284  * BPF program itself can be non-sleepable, yet, because it's transitively
3285  * reachable through BPF link, its freeing has to be delayed until after RCU
3286  * Tasks Trace GP.
3287  */
3288 void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
3289 			     const struct bpf_link_ops *ops, struct bpf_prog *prog,
3290 			     enum bpf_attach_type attach_type, bool sleepable)
3291 {
3292 	WARN_ON(ops->dealloc && ops->dealloc_deferred);
3293 	atomic64_set(&link->refcnt, 1);
3294 	link->type = type;
3295 	link->sleepable = sleepable;
3296 	link->id = 0;
3297 	link->ops = ops;
3298 	link->prog = prog;
3299 	link->attach_type = attach_type;
3300 }
3301 
3302 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
3303 		   const struct bpf_link_ops *ops, struct bpf_prog *prog,
3304 		   enum bpf_attach_type attach_type)
3305 {
3306 	bpf_link_init_sleepable(link, type, ops, prog, attach_type, false);
3307 }
3308 
3309 void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
3310 			 const struct bpf_link_ops *ops, struct bpf_prog *prog,
3311 			 enum bpf_attach_type attach_type, u64 cookie)
3312 {
3313 	bpf_link_init(&link->link, type, ops, prog, attach_type);
3314 	link->node.link = &link->link;
3315 	link->node.cookie = cookie;
3316 }
3317 
3318 static void bpf_link_free_id(int id)
3319 {
3320 	if (!id)
3321 		return;
3322 
3323 	spin_lock_bh(&link_idr_lock);
3324 	idr_remove(&link_idr, id);
3325 	spin_unlock_bh(&link_idr_lock);
3326 }
3327 
3328 /* Clean up bpf_link and corresponding anon_inode file and FD. After
3329  * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
3330  * anon_inode's release() call. This helper marks bpf_link as
3331  * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
3332  * is not decremented, it's the responsibility of a calling code that failed
3333  * to complete bpf_link initialization.
3334  * This helper eventually calls link's dealloc callback, but does not call
3335  * link's release callback.
3336  */
3337 void bpf_link_cleanup(struct bpf_link_primer *primer)
3338 {
3339 	primer->link->prog = NULL;
3340 	bpf_link_free_id(primer->id);
3341 	fput(primer->file);
3342 	put_unused_fd(primer->fd);
3343 }
3344 
3345 void bpf_link_inc(struct bpf_link *link)
3346 {
3347 	atomic64_inc(&link->refcnt);
3348 }
3349 
3350 static void bpf_link_dealloc(struct bpf_link *link)
3351 {
3352 	/* now that we know that bpf_link itself can't be reached, put underlying BPF program */
3353 	if (link->prog)
3354 		bpf_prog_put(link->prog);
3355 
3356 	/* free bpf_link and its containing memory */
3357 	if (link->ops->dealloc_deferred)
3358 		link->ops->dealloc_deferred(link);
3359 	else
3360 		link->ops->dealloc(link);
3361 }
3362 
3363 static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
3364 {
3365 	struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
3366 
3367 	bpf_link_dealloc(link);
3368 }
3369 
3370 static bool bpf_link_is_tracepoint(struct bpf_link *link)
3371 {
3372 	/*
3373 	 * Only these combinations support a tracepoint bpf_link.
3374 	 * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
3375 	 * bpf_raw_tp_link_lops and thus dealloc_deferred(), see
3376 	 * bpf_raw_tp_link_attach().
3377 	 */
3378 	return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
3379 	       (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
3380 }
3381 
3382 /* bpf_link_free is guaranteed to be called from process context */
3383 static void bpf_link_free(struct bpf_link *link)
3384 {
3385 	const struct bpf_link_ops *ops = link->ops;
3386 
3387 	bpf_link_free_id(link->id);
3388 	/* detach BPF program, clean up used resources */
3389 	if (link->prog)
3390 		ops->release(link);
3391 	if (ops->dealloc_deferred) {
3392 		/*
3393 		 * Schedule BPF link deallocation, which will only then
3394 		 * trigger putting BPF program refcount.
3395 		 * If underlying BPF program is sleepable or BPF link's target
3396 		 * attach hookpoint is sleepable or otherwise requires RCU GPs
3397 		 * to ensure link and its underlying BPF program is not
3398 		 * reachable anymore, we need to first wait for RCU tasks
3399 		 * trace sync, and then go through "classic" RCU grace period.
3400 		 *
3401 		 * For tracepoint BPF links, we need to go through SRCU grace
3402 		 * period wait instead when non-faultable tracepoint is used. We
3403 		 * don't need to chain SRCU grace period waits, however, for the
3404 		 * faultable case, since it exclusively uses RCU Tasks Trace.
3405 		 */
3406 		if (link->sleepable || (link->prog && link->prog->sleepable))
3407 			/* RCU Tasks Trace grace period implies RCU grace period. */
3408 			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
3409 		/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
3410 		else if (bpf_link_is_tracepoint(link))
3411 			call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
3412 		else
3413 			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
3414 	} else if (ops->dealloc) {
3415 		bpf_link_dealloc(link);
3416 	}
3417 }
3418 
3419 static void bpf_link_put_deferred(struct work_struct *work)
3420 {
3421 	struct bpf_link *link = container_of(work, struct bpf_link, work);
3422 
3423 	bpf_link_free(link);
3424 }
3425 
3426 /* bpf_link_put might be called from atomic context. It needs to be called
3427  * from sleepable context in order to acquire sleeping locks during the process.
3428  */
3429 void bpf_link_put(struct bpf_link *link)
3430 {
3431 	if (!atomic64_dec_and_test(&link->refcnt))
3432 		return;
3433 
3434 	INIT_WORK(&link->work, bpf_link_put_deferred);
3435 	schedule_work(&link->work);
3436 }
3437 EXPORT_SYMBOL(bpf_link_put);
3438 
3439 static void bpf_link_put_direct(struct bpf_link *link)
3440 {
3441 	if (!atomic64_dec_and_test(&link->refcnt))
3442 		return;
3443 	bpf_link_free(link);
3444 }
3445 
3446 static int bpf_link_release(struct inode *inode, struct file *filp)
3447 {
3448 	struct bpf_link *link = filp->private_data;
3449 
3450 	bpf_link_put_direct(link);
3451 	return 0;
3452 }
3453 
3454 #ifdef CONFIG_PROC_FS
3455 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
3456 #define BPF_MAP_TYPE(_id, _ops)
3457 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
3458 static const char *bpf_link_type_strs[] = {
3459 	[BPF_LINK_TYPE_UNSPEC] = "<invalid>",
3460 #include <linux/bpf_types.h>
3461 };
3462 #undef BPF_PROG_TYPE
3463 #undef BPF_MAP_TYPE
3464 #undef BPF_LINK_TYPE
3465 
3466 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
3467 {
3468 	const struct bpf_link *link = filp->private_data;
3469 	const struct bpf_prog *prog = link->prog;
3470 	enum bpf_link_type type = link->type;
3471 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
3472 
3473 	if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) {
3474 		if (link->type == BPF_LINK_TYPE_KPROBE_MULTI)
3475 			seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_KPROBE_MULTI_RETURN ?
3476 				   "kretprobe_multi" : "kprobe_multi");
3477 		else if (link->type == BPF_LINK_TYPE_UPROBE_MULTI)
3478 			seq_printf(m, "link_type:\t%s\n", link->flags == BPF_F_UPROBE_MULTI_RETURN ?
3479 				   "uretprobe_multi" : "uprobe_multi");
3480 		else
3481 			seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]);
3482 	} else {
3483 		WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type);
3484 		seq_printf(m, "link_type:\t<%u>\n", type);
3485 	}
3486 	seq_printf(m, "link_id:\t%u\n", link->id);
3487 
3488 	if (prog) {
3489 		bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
3490 		seq_printf(m,
3491 			   "prog_tag:\t%s\n"
3492 			   "prog_id:\t%u\n",
3493 			   prog_tag,
3494 			   prog->aux->id);
3495 	}
3496 	if (link->ops->show_fdinfo)
3497 		link->ops->show_fdinfo(link, m);
3498 }
3499 #endif
3500 
3501 static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts)
3502 {
3503 	struct bpf_link *link = file->private_data;
3504 
3505 	return link->ops->poll(file, pts);
3506 }
3507 
3508 static const struct file_operations bpf_link_fops = {
3509 #ifdef CONFIG_PROC_FS
3510 	.show_fdinfo	= bpf_link_show_fdinfo,
3511 #endif
3512 	.release	= bpf_link_release,
3513 	.read		= bpf_dummy_read,
3514 	.write		= bpf_dummy_write,
3515 };
3516 
3517 static const struct file_operations bpf_link_fops_poll = {
3518 #ifdef CONFIG_PROC_FS
3519 	.show_fdinfo	= bpf_link_show_fdinfo,
3520 #endif
3521 	.release	= bpf_link_release,
3522 	.read		= bpf_dummy_read,
3523 	.write		= bpf_dummy_write,
3524 	.poll		= bpf_link_poll,
3525 };
3526 
3527 static int bpf_link_alloc_id(struct bpf_link *link)
3528 {
3529 	int id;
3530 
3531 	idr_preload(GFP_KERNEL);
3532 	spin_lock_bh(&link_idr_lock);
3533 	id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
3534 	spin_unlock_bh(&link_idr_lock);
3535 	idr_preload_end();
3536 
3537 	return id;
3538 }
3539 
3540 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
3541  * reserving unused FD and allocating ID from link_idr. This is to be paired
3542  * with bpf_link_settle() to install FD and ID and expose bpf_link to
3543  * user-space, if bpf_link is successfully attached. If not, bpf_link and
3544  * pre-allocated resources are to be freed with bpf_cleanup() call. All the
3545  * transient state is passed around in struct bpf_link_primer.
3546  * This is preferred way to create and initialize bpf_link, especially when
3547  * there are complicated and expensive operations in between creating bpf_link
3548  * itself and attaching it to BPF hook. By using bpf_link_prime() and
3549  * bpf_link_settle() kernel code using bpf_link doesn't have to perform
3550  * expensive (and potentially failing) roll back operations in a rare case
3551  * that file, FD, or ID can't be allocated.
3552  */
3553 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
3554 {
3555 	struct file *file;
3556 	int fd, id;
3557 
3558 	fd = get_unused_fd_flags(O_CLOEXEC);
3559 	if (fd < 0)
3560 		return fd;
3561 
3562 
3563 	id = bpf_link_alloc_id(link);
3564 	if (id < 0) {
3565 		put_unused_fd(fd);
3566 		return id;
3567 	}
3568 
3569 	file = anon_inode_getfile("bpf_link",
3570 				  link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
3571 				  link, O_CLOEXEC);
3572 	if (IS_ERR(file)) {
3573 		bpf_link_free_id(id);
3574 		put_unused_fd(fd);
3575 		return PTR_ERR(file);
3576 	}
3577 
3578 	primer->link = link;
3579 	primer->file = file;
3580 	primer->fd = fd;
3581 	primer->id = id;
3582 	return 0;
3583 }
3584 
3585 int bpf_link_settle(struct bpf_link_primer *primer)
3586 {
3587 	/* make bpf_link fetchable by ID */
3588 	spin_lock_bh(&link_idr_lock);
3589 	primer->link->id = primer->id;
3590 	spin_unlock_bh(&link_idr_lock);
3591 	/* make bpf_link fetchable by FD */
3592 	fd_install(primer->fd, primer->file);
3593 	/* pass through installed FD */
3594 	return primer->fd;
3595 }
3596 
3597 int bpf_link_new_fd(struct bpf_link *link)
3598 {
3599 	return anon_inode_getfd("bpf-link",
3600 				link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops,
3601 				link, O_CLOEXEC);
3602 }
3603 
3604 struct bpf_link *bpf_link_get_from_fd(u32 ufd)
3605 {
3606 	CLASS(fd, f)(ufd);
3607 	struct bpf_link *link;
3608 
3609 	if (fd_empty(f))
3610 		return ERR_PTR(-EBADF);
3611 	if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll)
3612 		return ERR_PTR(-EINVAL);
3613 
3614 	link = fd_file(f)->private_data;
3615 	bpf_link_inc(link);
3616 	return link;
3617 }
3618 EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL");
3619 
3620 static void bpf_tracing_link_release(struct bpf_link *link)
3621 {
3622 	struct bpf_tracing_link *tr_link =
3623 		container_of(link, struct bpf_tracing_link, link.link);
3624 
3625 	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link.node,
3626 						tr_link->trampoline,
3627 						tr_link->tgt_prog));
3628 
3629 	bpf_trampoline_put(tr_link->trampoline);
3630 
3631 	/* tgt_prog is NULL if target is a kernel function */
3632 	if (tr_link->tgt_prog)
3633 		bpf_prog_put(tr_link->tgt_prog);
3634 }
3635 
3636 static void bpf_tracing_link_dealloc(struct bpf_link *link)
3637 {
3638 	struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
3639 
3640 	kfree(tr_link);
3641 }
3642 
3643 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
3644 					 struct seq_file *seq)
3645 {
3646 	struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
3647 
3648 	u32 target_btf_id, target_obj_id;
3649 
3650 	bpf_trampoline_unpack_key(tr_link->trampoline->key,
3651 				  &target_obj_id, &target_btf_id);
3652 	seq_printf(seq,
3653 		   "attach_type:\t%d\n"
3654 		   "target_obj_id:\t%u\n"
3655 		   "target_btf_id:\t%u\n"
3656 		   "cookie:\t%llu\n",
3657 		   link->attach_type,
3658 		   target_obj_id,
3659 		   target_btf_id,
3660 		   tr_link->link.node.cookie);
3661 }
3662 
3663 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
3664 					   struct bpf_link_info *info)
3665 {
3666 	struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
3667 
3668 	info->tracing.attach_type = link->attach_type;
3669 	info->tracing.cookie = tr_link->link.node.cookie;
3670 	bpf_trampoline_unpack_key(tr_link->trampoline->key,
3671 				  &info->tracing.target_obj_id,
3672 				  &info->tracing.target_btf_id);
3673 
3674 	return 0;
3675 }
3676 
3677 static const struct bpf_link_ops bpf_tracing_link_lops = {
3678 	.release = bpf_tracing_link_release,
3679 	.dealloc = bpf_tracing_link_dealloc,
3680 	.show_fdinfo = bpf_tracing_link_show_fdinfo,
3681 	.fill_link_info = bpf_tracing_link_fill_link_info,
3682 };
3683 
3684 static int bpf_tracing_prog_attach(struct bpf_prog *prog,
3685 				   int tgt_prog_fd,
3686 				   u32 btf_id,
3687 				   u64 bpf_cookie,
3688 				   enum bpf_attach_type attach_type)
3689 {
3690 	struct bpf_link_primer link_primer;
3691 	struct bpf_prog *tgt_prog = NULL;
3692 	struct bpf_trampoline *tr = NULL;
3693 	struct bpf_tracing_link *link;
3694 	u64 key = 0;
3695 	int err;
3696 
3697 	switch (prog->type) {
3698 	case BPF_PROG_TYPE_TRACING:
3699 		if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
3700 		    prog->expected_attach_type != BPF_TRACE_FEXIT &&
3701 		    prog->expected_attach_type != BPF_TRACE_FSESSION &&
3702 		    prog->expected_attach_type != BPF_MODIFY_RETURN) {
3703 			err = -EINVAL;
3704 			goto out_put_prog;
3705 		}
3706 		break;
3707 	case BPF_PROG_TYPE_EXT:
3708 		if (prog->expected_attach_type != 0) {
3709 			err = -EINVAL;
3710 			goto out_put_prog;
3711 		}
3712 		break;
3713 	case BPF_PROG_TYPE_LSM:
3714 		if (prog->expected_attach_type != BPF_LSM_MAC) {
3715 			err = -EINVAL;
3716 			goto out_put_prog;
3717 		}
3718 		break;
3719 	default:
3720 		err = -EINVAL;
3721 		goto out_put_prog;
3722 	}
3723 
3724 	if (!!tgt_prog_fd != !!btf_id) {
3725 		err = -EINVAL;
3726 		goto out_put_prog;
3727 	}
3728 
3729 	if (tgt_prog_fd) {
3730 		/*
3731 		 * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this
3732 		 * part would be changed to implement the same for
3733 		 * BPF_PROG_TYPE_TRACING, do not forget to update the way how
3734 		 * attach_tracing_prog flag is set.
3735 		 */
3736 		if (prog->type != BPF_PROG_TYPE_EXT) {
3737 			err = -EINVAL;
3738 			goto out_put_prog;
3739 		}
3740 
3741 		tgt_prog = bpf_prog_get(tgt_prog_fd);
3742 		if (IS_ERR(tgt_prog)) {
3743 			err = PTR_ERR(tgt_prog);
3744 			tgt_prog = NULL;
3745 			goto out_put_prog;
3746 		}
3747 
3748 		key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
3749 	}
3750 
3751 	link = kzalloc_obj(*link, GFP_USER);
3752 	if (!link) {
3753 		err = -ENOMEM;
3754 		goto out_put_prog;
3755 	}
3756 	bpf_tramp_link_init(&link->link, BPF_LINK_TYPE_TRACING,
3757 			    &bpf_tracing_link_lops, prog, attach_type, bpf_cookie);
3758 
3759 	if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
3760 		link->fexit.link = &link->link.link;
3761 		link->fexit.cookie = bpf_cookie;
3762 	}
3763 
3764 	mutex_lock(&prog->aux->dst_mutex);
3765 
3766 	/* There are a few possible cases here:
3767 	 *
3768 	 * - if prog->aux->dst_trampoline is set, the program was just loaded
3769 	 *   and not yet attached to anything, so we can use the values stored
3770 	 *   in prog->aux
3771 	 *
3772 	 * - if prog->aux->dst_trampoline is NULL, the program has already been
3773 	 *   attached to a target and its initial target was cleared (below)
3774 	 *
3775 	 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
3776 	 *   target_btf_id using the link_create API.
3777 	 *
3778 	 * - if tgt_prog == NULL when this function was called using the old
3779 	 *   raw_tracepoint_open API, and we need a target from prog->aux
3780 	 *
3781 	 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
3782 	 *   was detached and is going for re-attachment.
3783 	 *
3784 	 * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf
3785 	 *   are NULL, then program was already attached and user did not provide
3786 	 *   tgt_prog_fd so we have no way to find out or create trampoline
3787 	 */
3788 	if (!prog->aux->dst_trampoline && !tgt_prog) {
3789 		/*
3790 		 * Allow re-attach for TRACING and LSM programs. If it's
3791 		 * currently linked, bpf_trampoline_link_prog will fail.
3792 		 * EXT programs need to specify tgt_prog_fd, so they
3793 		 * re-attach in separate code path.
3794 		 */
3795 		if (prog->type != BPF_PROG_TYPE_TRACING &&
3796 		    prog->type != BPF_PROG_TYPE_LSM) {
3797 			err = -EINVAL;
3798 			goto out_unlock;
3799 		}
3800 		/* We can allow re-attach only if we have valid attach_btf. */
3801 		if (!prog->aux->attach_btf) {
3802 			err = -EINVAL;
3803 			goto out_unlock;
3804 		}
3805 		btf_id = prog->aux->attach_btf_id;
3806 		key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
3807 	}
3808 
3809 	if (!prog->aux->dst_trampoline ||
3810 	    (key && key != prog->aux->dst_trampoline->key)) {
3811 		/* If there is no saved target, or the specified target is
3812 		 * different from the destination specified at load time, we
3813 		 * need a new trampoline and a check for compatibility
3814 		 */
3815 		struct bpf_attach_target_info tgt_info = {};
3816 
3817 		err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
3818 					      &tgt_info);
3819 		if (err)
3820 			goto out_unlock;
3821 
3822 		if (tgt_info.tgt_mod) {
3823 			module_put(prog->aux->mod);
3824 			prog->aux->mod = tgt_info.tgt_mod;
3825 		}
3826 
3827 		tr = bpf_trampoline_get(key, &tgt_info);
3828 		if (!tr) {
3829 			err = -ENOMEM;
3830 			goto out_unlock;
3831 		}
3832 	} else {
3833 		/* The caller didn't specify a target, or the target was the
3834 		 * same as the destination supplied during program load. This
3835 		 * means we can reuse the trampoline and reference from program
3836 		 * load time, and there is no need to allocate a new one. This
3837 		 * can only happen once for any program, as the saved values in
3838 		 * prog->aux are cleared below.
3839 		 */
3840 		tr = prog->aux->dst_trampoline;
3841 		tgt_prog = prog->aux->dst_prog;
3842 	}
3843 	/*
3844 	 * It is to prevent modifying struct pt_regs via kprobe_write_ctx=true
3845 	 * freplace prog. Without this check, kprobe_write_ctx=true freplace
3846 	 * prog is allowed to attach to kprobe_write_ctx=false kprobe prog, and
3847 	 * then modify the registers of the kprobe prog's target kernel
3848 	 * function.
3849 	 *
3850 	 * This also blocks the combination of uprobe+freplace, because it is
3851 	 * unable to recognize the use of the tgt_prog as an uprobe or a kprobe
3852 	 * by tgt_prog itself. At attach time, uprobe/kprobe is recognized by
3853 	 * the target perf event flags in __perf_event_set_bpf_prog().
3854 	 */
3855 	if (prog->type == BPF_PROG_TYPE_EXT &&
3856 	    prog->aux->kprobe_write_ctx != tgt_prog->aux->kprobe_write_ctx) {
3857 		err = -EINVAL;
3858 		goto out_unlock;
3859 	}
3860 
3861 	err = bpf_link_prime(&link->link.link, &link_primer);
3862 	if (err)
3863 		goto out_unlock;
3864 
3865 	err = bpf_trampoline_link_prog(&link->link.node, tr, tgt_prog);
3866 	if (err) {
3867 		bpf_link_cleanup(&link_primer);
3868 		link = NULL;
3869 		goto out_unlock;
3870 	}
3871 
3872 	link->tgt_prog = tgt_prog;
3873 	link->trampoline = tr;
3874 
3875 	/* Always clear the trampoline and target prog from prog->aux to make
3876 	 * sure the original attach destination is not kept alive after a
3877 	 * program is (re-)attached to another target.
3878 	 */
3879 	if (prog->aux->dst_prog &&
3880 	    (tgt_prog_fd || tr != prog->aux->dst_trampoline))
3881 		/* got extra prog ref from syscall, or attaching to different prog */
3882 		bpf_prog_put(prog->aux->dst_prog);
3883 	if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
3884 		/* we allocated a new trampoline, so free the old one */
3885 		bpf_trampoline_put(prog->aux->dst_trampoline);
3886 
3887 	prog->aux->dst_prog = NULL;
3888 	prog->aux->dst_trampoline = NULL;
3889 	mutex_unlock(&prog->aux->dst_mutex);
3890 
3891 	return bpf_link_settle(&link_primer);
3892 out_unlock:
3893 	if (tr && tr != prog->aux->dst_trampoline)
3894 		bpf_trampoline_put(tr);
3895 	mutex_unlock(&prog->aux->dst_mutex);
3896 	kfree(link);
3897 out_put_prog:
3898 	if (tgt_prog_fd && tgt_prog)
3899 		bpf_prog_put(tgt_prog);
3900 	return err;
3901 }
3902 
3903 static void bpf_raw_tp_link_release(struct bpf_link *link)
3904 {
3905 	struct bpf_raw_tp_link *raw_tp =
3906 		container_of(link, struct bpf_raw_tp_link, link);
3907 
3908 	bpf_probe_unregister(raw_tp->btp, raw_tp);
3909 	bpf_put_raw_tracepoint(raw_tp->btp);
3910 }
3911 
3912 static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
3913 {
3914 	struct bpf_raw_tp_link *raw_tp =
3915 		container_of(link, struct bpf_raw_tp_link, link);
3916 
3917 	kfree(raw_tp);
3918 }
3919 
3920 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
3921 					struct seq_file *seq)
3922 {
3923 	struct bpf_raw_tp_link *raw_tp_link =
3924 		container_of(link, struct bpf_raw_tp_link, link);
3925 
3926 	seq_printf(seq,
3927 		   "tp_name:\t%s\n"
3928 		   "cookie:\t%llu\n",
3929 		   raw_tp_link->btp->tp->name,
3930 		   raw_tp_link->cookie);
3931 }
3932 
3933 static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen,
3934 			    u32 len)
3935 {
3936 	if (ulen >= len + 1) {
3937 		if (copy_to_user(ubuf, buf, len + 1))
3938 			return -EFAULT;
3939 	} else {
3940 		char zero = '\0';
3941 
3942 		if (copy_to_user(ubuf, buf, ulen - 1))
3943 			return -EFAULT;
3944 		if (put_user(zero, ubuf + ulen - 1))
3945 			return -EFAULT;
3946 		return -ENOSPC;
3947 	}
3948 
3949 	return 0;
3950 }
3951 
3952 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
3953 					  struct bpf_link_info *info)
3954 {
3955 	struct bpf_raw_tp_link *raw_tp_link =
3956 		container_of(link, struct bpf_raw_tp_link, link);
3957 	char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
3958 	const char *tp_name = raw_tp_link->btp->tp->name;
3959 	u32 ulen = info->raw_tracepoint.tp_name_len;
3960 	size_t tp_len = strlen(tp_name);
3961 
3962 	if (!ulen ^ !ubuf)
3963 		return -EINVAL;
3964 
3965 	info->raw_tracepoint.tp_name_len = tp_len + 1;
3966 	info->raw_tracepoint.cookie = raw_tp_link->cookie;
3967 
3968 	if (!ubuf)
3969 		return 0;
3970 
3971 	return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len);
3972 }
3973 
3974 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
3975 	.release = bpf_raw_tp_link_release,
3976 	.dealloc_deferred = bpf_raw_tp_link_dealloc,
3977 	.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
3978 	.fill_link_info = bpf_raw_tp_link_fill_link_info,
3979 };
3980 
3981 #ifdef CONFIG_PERF_EVENTS
3982 struct bpf_perf_link {
3983 	struct bpf_link link;
3984 	struct file *perf_file;
3985 };
3986 
3987 static void bpf_perf_link_release(struct bpf_link *link)
3988 {
3989 	struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3990 	struct perf_event *event = perf_link->perf_file->private_data;
3991 
3992 	perf_event_free_bpf_prog(event);
3993 	fput(perf_link->perf_file);
3994 }
3995 
3996 static void bpf_perf_link_dealloc(struct bpf_link *link)
3997 {
3998 	struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3999 
4000 	kfree(perf_link);
4001 }
4002 
4003 static int bpf_perf_link_fill_common(const struct perf_event *event,
4004 				     char __user *uname, u32 *ulenp,
4005 				     u64 *probe_offset, u64 *probe_addr,
4006 				     u32 *fd_type, unsigned long *missed)
4007 {
4008 	const char *buf;
4009 	u32 prog_id, ulen;
4010 	size_t len;
4011 	int err;
4012 
4013 	ulen = *ulenp;
4014 	if (!ulen ^ !uname)
4015 		return -EINVAL;
4016 
4017 	err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
4018 				      probe_offset, probe_addr, missed);
4019 	if (err)
4020 		return err;
4021 
4022 	if (buf) {
4023 		len = strlen(buf);
4024 		*ulenp = len + 1;
4025 	} else {
4026 		*ulenp = 1;
4027 	}
4028 	if (!uname)
4029 		return 0;
4030 
4031 	if (buf) {
4032 		err = bpf_copy_to_user(uname, buf, ulen, len);
4033 		if (err)
4034 			return err;
4035 	} else {
4036 		char zero = '\0';
4037 
4038 		if (put_user(zero, uname))
4039 			return -EFAULT;
4040 	}
4041 	return 0;
4042 }
4043 
4044 #ifdef CONFIG_KPROBE_EVENTS
4045 static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
4046 				     struct bpf_link_info *info)
4047 {
4048 	unsigned long missed;
4049 	char __user *uname;
4050 	u64 addr, offset;
4051 	u32 ulen, type;
4052 	int err;
4053 
4054 	uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
4055 	ulen = info->perf_event.kprobe.name_len;
4056 	err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
4057 					&type, &missed);
4058 	if (err)
4059 		return err;
4060 	if (type == BPF_FD_TYPE_KRETPROBE)
4061 		info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
4062 	else
4063 		info->perf_event.type = BPF_PERF_EVENT_KPROBE;
4064 	info->perf_event.kprobe.name_len = ulen;
4065 	info->perf_event.kprobe.offset = offset;
4066 	info->perf_event.kprobe.missed = missed;
4067 	if (!kallsyms_show_value(current_cred()))
4068 		addr = 0;
4069 	info->perf_event.kprobe.addr = addr;
4070 	info->perf_event.kprobe.cookie = event->bpf_cookie;
4071 	return 0;
4072 }
4073 
4074 static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event,
4075 					struct seq_file *seq)
4076 {
4077 	const char *name;
4078 	int err;
4079 	u32 prog_id, type;
4080 	u64 offset, addr;
4081 	unsigned long missed;
4082 
4083 	err = bpf_get_perf_event_info(event, &prog_id, &type, &name,
4084 				      &offset, &addr, &missed);
4085 	if (err)
4086 		return;
4087 
4088 	seq_printf(seq,
4089 		   "name:\t%s\n"
4090 		   "offset:\t%#llx\n"
4091 		   "missed:\t%lu\n"
4092 		   "addr:\t%#llx\n"
4093 		   "event_type:\t%s\n"
4094 		   "cookie:\t%llu\n",
4095 		   name, offset, missed, addr,
4096 		   type == BPF_FD_TYPE_KRETPROBE ?  "kretprobe" : "kprobe",
4097 		   event->bpf_cookie);
4098 }
4099 #endif
4100 
4101 #ifdef CONFIG_UPROBE_EVENTS
4102 static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
4103 				     struct bpf_link_info *info)
4104 {
4105 	u64 ref_ctr_offset, offset;
4106 	char __user *uname;
4107 	u32 ulen, type;
4108 	int err;
4109 
4110 	uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
4111 	ulen = info->perf_event.uprobe.name_len;
4112 	err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset,
4113 					&type, NULL);
4114 	if (err)
4115 		return err;
4116 
4117 	if (type == BPF_FD_TYPE_URETPROBE)
4118 		info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
4119 	else
4120 		info->perf_event.type = BPF_PERF_EVENT_UPROBE;
4121 	info->perf_event.uprobe.name_len = ulen;
4122 	info->perf_event.uprobe.offset = offset;
4123 	info->perf_event.uprobe.cookie = event->bpf_cookie;
4124 	info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset;
4125 	return 0;
4126 }
4127 
4128 static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event,
4129 					struct seq_file *seq)
4130 {
4131 	const char *name;
4132 	int err;
4133 	u32 prog_id, type;
4134 	u64 offset, ref_ctr_offset;
4135 	unsigned long missed;
4136 
4137 	err = bpf_get_perf_event_info(event, &prog_id, &type, &name,
4138 				      &offset, &ref_ctr_offset, &missed);
4139 	if (err)
4140 		return;
4141 
4142 	seq_printf(seq,
4143 		   "name:\t%s\n"
4144 		   "offset:\t%#llx\n"
4145 		   "ref_ctr_offset:\t%#llx\n"
4146 		   "event_type:\t%s\n"
4147 		   "cookie:\t%llu\n",
4148 		   name, offset, ref_ctr_offset,
4149 		   type == BPF_FD_TYPE_URETPROBE ?  "uretprobe" : "uprobe",
4150 		   event->bpf_cookie);
4151 }
4152 #endif
4153 
4154 static int bpf_perf_link_fill_probe(const struct perf_event *event,
4155 				    struct bpf_link_info *info)
4156 {
4157 #ifdef CONFIG_KPROBE_EVENTS
4158 	if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE)
4159 		return bpf_perf_link_fill_kprobe(event, info);
4160 #endif
4161 #ifdef CONFIG_UPROBE_EVENTS
4162 	if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE)
4163 		return bpf_perf_link_fill_uprobe(event, info);
4164 #endif
4165 	return -EOPNOTSUPP;
4166 }
4167 
4168 static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
4169 					 struct bpf_link_info *info)
4170 {
4171 	char __user *uname;
4172 	u32 ulen;
4173 	int err;
4174 
4175 	uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
4176 	ulen = info->perf_event.tracepoint.name_len;
4177 	err = bpf_perf_link_fill_common(event, uname, &ulen, NULL, NULL, NULL, NULL);
4178 	if (err)
4179 		return err;
4180 
4181 	info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
4182 	info->perf_event.tracepoint.name_len = ulen;
4183 	info->perf_event.tracepoint.cookie = event->bpf_cookie;
4184 	return 0;
4185 }
4186 
4187 static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
4188 					 struct bpf_link_info *info)
4189 {
4190 	info->perf_event.event.type = event->attr.type;
4191 	info->perf_event.event.config = event->attr.config;
4192 	info->perf_event.event.cookie = event->bpf_cookie;
4193 	info->perf_event.type = BPF_PERF_EVENT_EVENT;
4194 	return 0;
4195 }
4196 
4197 static int bpf_perf_link_fill_link_info(const struct bpf_link *link,
4198 					struct bpf_link_info *info)
4199 {
4200 	struct bpf_perf_link *perf_link;
4201 	const struct perf_event *event;
4202 
4203 	perf_link = container_of(link, struct bpf_perf_link, link);
4204 	event = perf_get_event(perf_link->perf_file);
4205 	if (IS_ERR(event))
4206 		return PTR_ERR(event);
4207 
4208 	switch (event->prog->type) {
4209 	case BPF_PROG_TYPE_PERF_EVENT:
4210 		return bpf_perf_link_fill_perf_event(event, info);
4211 	case BPF_PROG_TYPE_TRACEPOINT:
4212 		return bpf_perf_link_fill_tracepoint(event, info);
4213 	case BPF_PROG_TYPE_KPROBE:
4214 		return bpf_perf_link_fill_probe(event, info);
4215 	default:
4216 		return -EOPNOTSUPP;
4217 	}
4218 }
4219 
4220 static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event,
4221 					    struct seq_file *seq)
4222 {
4223 	seq_printf(seq,
4224 		   "type:\t%u\n"
4225 		   "config:\t%llu\n"
4226 		   "event_type:\t%s\n"
4227 		   "cookie:\t%llu\n",
4228 		   event->attr.type, event->attr.config,
4229 		   "event", event->bpf_cookie);
4230 }
4231 
4232 static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event,
4233 					    struct seq_file *seq)
4234 {
4235 	int err;
4236 	const char *name;
4237 	u32 prog_id;
4238 
4239 	err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL,
4240 				      NULL, NULL);
4241 	if (err)
4242 		return;
4243 
4244 	seq_printf(seq,
4245 		   "tp_name:\t%s\n"
4246 		   "event_type:\t%s\n"
4247 		   "cookie:\t%llu\n",
4248 		   name, "tracepoint", event->bpf_cookie);
4249 }
4250 
4251 static void bpf_probe_link_show_fdinfo(const struct perf_event *event,
4252 				       struct seq_file *seq)
4253 {
4254 #ifdef CONFIG_KPROBE_EVENTS
4255 	if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE)
4256 		return bpf_perf_link_fdinfo_kprobe(event, seq);
4257 #endif
4258 
4259 #ifdef CONFIG_UPROBE_EVENTS
4260 	if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE)
4261 		return bpf_perf_link_fdinfo_uprobe(event, seq);
4262 #endif
4263 }
4264 
4265 static void bpf_perf_link_show_fdinfo(const struct bpf_link *link,
4266 				      struct seq_file *seq)
4267 {
4268 	struct bpf_perf_link *perf_link;
4269 	const struct perf_event *event;
4270 
4271 	perf_link = container_of(link, struct bpf_perf_link, link);
4272 	event = perf_get_event(perf_link->perf_file);
4273 	if (IS_ERR(event))
4274 		return;
4275 
4276 	switch (event->prog->type) {
4277 	case BPF_PROG_TYPE_PERF_EVENT:
4278 		return bpf_perf_event_link_show_fdinfo(event, seq);
4279 	case BPF_PROG_TYPE_TRACEPOINT:
4280 		return bpf_tracepoint_link_show_fdinfo(event, seq);
4281 	case BPF_PROG_TYPE_KPROBE:
4282 		return bpf_probe_link_show_fdinfo(event, seq);
4283 	default:
4284 		return;
4285 	}
4286 }
4287 
4288 static const struct bpf_link_ops bpf_perf_link_lops = {
4289 	.release = bpf_perf_link_release,
4290 	.dealloc = bpf_perf_link_dealloc,
4291 	.fill_link_info = bpf_perf_link_fill_link_info,
4292 	.show_fdinfo = bpf_perf_link_show_fdinfo,
4293 };
4294 
4295 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
4296 {
4297 	struct bpf_link_primer link_primer;
4298 	struct bpf_perf_link *link;
4299 	struct perf_event *event;
4300 	struct file *perf_file;
4301 	int err;
4302 
4303 	if (attr->link_create.flags)
4304 		return -EINVAL;
4305 
4306 	perf_file = perf_event_get(attr->link_create.target_fd);
4307 	if (IS_ERR(perf_file))
4308 		return PTR_ERR(perf_file);
4309 
4310 	link = kzalloc_obj(*link, GFP_USER);
4311 	if (!link) {
4312 		err = -ENOMEM;
4313 		goto out_put_file;
4314 	}
4315 	bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog,
4316 		      attr->link_create.attach_type);
4317 	link->perf_file = perf_file;
4318 
4319 	err = bpf_link_prime(&link->link, &link_primer);
4320 	if (err) {
4321 		kfree(link);
4322 		goto out_put_file;
4323 	}
4324 
4325 	event = perf_file->private_data;
4326 	err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
4327 	if (err) {
4328 		bpf_link_cleanup(&link_primer);
4329 		goto out_put_file;
4330 	}
4331 	/* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
4332 	bpf_prog_inc(prog);
4333 
4334 	return bpf_link_settle(&link_primer);
4335 
4336 out_put_file:
4337 	fput(perf_file);
4338 	return err;
4339 }
4340 #else
4341 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
4342 {
4343 	return -EOPNOTSUPP;
4344 }
4345 #endif /* CONFIG_PERF_EVENTS */
4346 
4347 static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
4348 				  const char __user *user_tp_name, u64 cookie,
4349 				  enum bpf_attach_type attach_type)
4350 {
4351 	struct bpf_link_primer link_primer;
4352 	struct bpf_raw_tp_link *link;
4353 	struct bpf_raw_event_map *btp;
4354 	const char *tp_name;
4355 	char buf[128];
4356 	int err;
4357 
4358 	switch (prog->type) {
4359 	case BPF_PROG_TYPE_TRACING:
4360 	case BPF_PROG_TYPE_EXT:
4361 	case BPF_PROG_TYPE_LSM:
4362 		if (user_tp_name)
4363 			/* The attach point for this category of programs
4364 			 * should be specified via btf_id during program load.
4365 			 */
4366 			return -EINVAL;
4367 		if (prog->type == BPF_PROG_TYPE_TRACING &&
4368 		    prog->expected_attach_type == BPF_TRACE_RAW_TP) {
4369 			tp_name = prog->aux->attach_func_name;
4370 			break;
4371 		}
4372 		return bpf_tracing_prog_attach(prog, 0, 0, 0, attach_type);
4373 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
4374 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
4375 		if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0)
4376 			return -EFAULT;
4377 		buf[sizeof(buf) - 1] = 0;
4378 		tp_name = buf;
4379 		break;
4380 	default:
4381 		return -EINVAL;
4382 	}
4383 
4384 	btp = bpf_get_raw_tracepoint(tp_name);
4385 	if (!btp)
4386 		return -ENOENT;
4387 
4388 	if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
4389 		bpf_put_raw_tracepoint(btp);
4390 		return -EINVAL;
4391 	}
4392 
4393 	link = kzalloc_obj(*link, GFP_USER);
4394 	if (!link) {
4395 		err = -ENOMEM;
4396 		goto out_put_btp;
4397 	}
4398 	bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
4399 				&bpf_raw_tp_link_lops, prog, attach_type,
4400 				tracepoint_is_faultable(btp->tp));
4401 	link->btp = btp;
4402 	link->cookie = cookie;
4403 
4404 	err = bpf_link_prime(&link->link, &link_primer);
4405 	if (err) {
4406 		kfree(link);
4407 		goto out_put_btp;
4408 	}
4409 
4410 	err = bpf_probe_register(link->btp, link);
4411 	if (err) {
4412 		bpf_link_cleanup(&link_primer);
4413 		goto out_put_btp;
4414 	}
4415 
4416 	return bpf_link_settle(&link_primer);
4417 
4418 out_put_btp:
4419 	bpf_put_raw_tracepoint(btp);
4420 	return err;
4421 }
4422 
4423 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie
4424 
4425 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
4426 {
4427 	struct bpf_prog *prog;
4428 	void __user *tp_name;
4429 	__u64 cookie;
4430 	int fd;
4431 
4432 	if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
4433 		return -EINVAL;
4434 
4435 	prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
4436 	if (IS_ERR(prog))
4437 		return PTR_ERR(prog);
4438 
4439 	tp_name = u64_to_user_ptr(attr->raw_tracepoint.name);
4440 	cookie = attr->raw_tracepoint.cookie;
4441 	fd = bpf_raw_tp_link_attach(prog, tp_name, cookie, prog->expected_attach_type);
4442 	if (fd < 0)
4443 		bpf_prog_put(prog);
4444 	return fd;
4445 }
4446 
4447 static enum bpf_prog_type
4448 attach_type_to_prog_type(enum bpf_attach_type attach_type)
4449 {
4450 	switch (attach_type) {
4451 	case BPF_CGROUP_INET_INGRESS:
4452 	case BPF_CGROUP_INET_EGRESS:
4453 		return BPF_PROG_TYPE_CGROUP_SKB;
4454 	case BPF_CGROUP_INET_SOCK_CREATE:
4455 	case BPF_CGROUP_INET_SOCK_RELEASE:
4456 	case BPF_CGROUP_INET4_POST_BIND:
4457 	case BPF_CGROUP_INET6_POST_BIND:
4458 		return BPF_PROG_TYPE_CGROUP_SOCK;
4459 	case BPF_CGROUP_INET4_BIND:
4460 	case BPF_CGROUP_INET6_BIND:
4461 	case BPF_CGROUP_INET4_CONNECT:
4462 	case BPF_CGROUP_INET6_CONNECT:
4463 	case BPF_CGROUP_UNIX_CONNECT:
4464 	case BPF_CGROUP_INET4_GETPEERNAME:
4465 	case BPF_CGROUP_INET6_GETPEERNAME:
4466 	case BPF_CGROUP_UNIX_GETPEERNAME:
4467 	case BPF_CGROUP_INET4_GETSOCKNAME:
4468 	case BPF_CGROUP_INET6_GETSOCKNAME:
4469 	case BPF_CGROUP_UNIX_GETSOCKNAME:
4470 	case BPF_CGROUP_UDP4_SENDMSG:
4471 	case BPF_CGROUP_UDP6_SENDMSG:
4472 	case BPF_CGROUP_UNIX_SENDMSG:
4473 	case BPF_CGROUP_UDP4_RECVMSG:
4474 	case BPF_CGROUP_UDP6_RECVMSG:
4475 	case BPF_CGROUP_UNIX_RECVMSG:
4476 		return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
4477 	case BPF_CGROUP_SOCK_OPS:
4478 		return BPF_PROG_TYPE_SOCK_OPS;
4479 	case BPF_CGROUP_DEVICE:
4480 		return BPF_PROG_TYPE_CGROUP_DEVICE;
4481 	case BPF_SK_MSG_VERDICT:
4482 		return BPF_PROG_TYPE_SK_MSG;
4483 	case BPF_SK_SKB_STREAM_PARSER:
4484 	case BPF_SK_SKB_STREAM_VERDICT:
4485 	case BPF_SK_SKB_VERDICT:
4486 		return BPF_PROG_TYPE_SK_SKB;
4487 	case BPF_LIRC_MODE2:
4488 		return BPF_PROG_TYPE_LIRC_MODE2;
4489 	case BPF_FLOW_DISSECTOR:
4490 		return BPF_PROG_TYPE_FLOW_DISSECTOR;
4491 	case BPF_CGROUP_SYSCTL:
4492 		return BPF_PROG_TYPE_CGROUP_SYSCTL;
4493 	case BPF_CGROUP_GETSOCKOPT:
4494 	case BPF_CGROUP_SETSOCKOPT:
4495 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
4496 	case BPF_TRACE_ITER:
4497 	case BPF_TRACE_RAW_TP:
4498 	case BPF_TRACE_FENTRY:
4499 	case BPF_TRACE_FEXIT:
4500 	case BPF_TRACE_FSESSION:
4501 	case BPF_TRACE_FSESSION_MULTI:
4502 	case BPF_TRACE_FENTRY_MULTI:
4503 	case BPF_TRACE_FEXIT_MULTI:
4504 	case BPF_MODIFY_RETURN:
4505 		return BPF_PROG_TYPE_TRACING;
4506 	case BPF_LSM_MAC:
4507 		return BPF_PROG_TYPE_LSM;
4508 	case BPF_SK_LOOKUP:
4509 		return BPF_PROG_TYPE_SK_LOOKUP;
4510 	case BPF_XDP:
4511 		return BPF_PROG_TYPE_XDP;
4512 	case BPF_LSM_CGROUP:
4513 		return BPF_PROG_TYPE_LSM;
4514 	case BPF_TCX_INGRESS:
4515 	case BPF_TCX_EGRESS:
4516 	case BPF_NETKIT_PRIMARY:
4517 	case BPF_NETKIT_PEER:
4518 		return BPF_PROG_TYPE_SCHED_CLS;
4519 	default:
4520 		return BPF_PROG_TYPE_UNSPEC;
4521 	}
4522 }
4523 
4524 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
4525 					     enum bpf_attach_type attach_type)
4526 {
4527 	enum bpf_prog_type ptype;
4528 
4529 	switch (prog->type) {
4530 	case BPF_PROG_TYPE_CGROUP_SOCK:
4531 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4532 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4533 	case BPF_PROG_TYPE_SK_LOOKUP:
4534 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
4535 	case BPF_PROG_TYPE_CGROUP_SKB:
4536 		if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
4537 			/* cg-skb progs can be loaded by unpriv user.
4538 			 * check permissions at attach time.
4539 			 */
4540 			return -EPERM;
4541 
4542 		ptype = attach_type_to_prog_type(attach_type);
4543 		if (prog->type != ptype)
4544 			return -EINVAL;
4545 
4546 		return prog->enforce_expected_attach_type &&
4547 			prog->expected_attach_type != attach_type ?
4548 			-EINVAL : 0;
4549 	case BPF_PROG_TYPE_EXT:
4550 		return 0;
4551 	case BPF_PROG_TYPE_NETFILTER:
4552 		if (attach_type != BPF_NETFILTER)
4553 			return -EINVAL;
4554 		return 0;
4555 	case BPF_PROG_TYPE_PERF_EVENT:
4556 	case BPF_PROG_TYPE_TRACEPOINT:
4557 		if (attach_type != BPF_PERF_EVENT)
4558 			return -EINVAL;
4559 		return 0;
4560 	case BPF_PROG_TYPE_KPROBE:
4561 		if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
4562 		    attach_type != BPF_TRACE_KPROBE_MULTI)
4563 			return -EINVAL;
4564 		if (prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION &&
4565 		    attach_type != BPF_TRACE_KPROBE_SESSION)
4566 			return -EINVAL;
4567 		if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI &&
4568 		    attach_type != BPF_TRACE_UPROBE_MULTI)
4569 			return -EINVAL;
4570 		if (prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION &&
4571 		    attach_type != BPF_TRACE_UPROBE_SESSION)
4572 			return -EINVAL;
4573 		if (attach_type != BPF_PERF_EVENT &&
4574 		    attach_type != BPF_TRACE_KPROBE_MULTI &&
4575 		    attach_type != BPF_TRACE_KPROBE_SESSION &&
4576 		    attach_type != BPF_TRACE_UPROBE_MULTI &&
4577 		    attach_type != BPF_TRACE_UPROBE_SESSION)
4578 			return -EINVAL;
4579 		return 0;
4580 	case BPF_PROG_TYPE_SCHED_CLS:
4581 		if (attach_type != BPF_TCX_INGRESS &&
4582 		    attach_type != BPF_TCX_EGRESS &&
4583 		    attach_type != BPF_NETKIT_PRIMARY &&
4584 		    attach_type != BPF_NETKIT_PEER)
4585 			return -EINVAL;
4586 		return 0;
4587 	default:
4588 		ptype = attach_type_to_prog_type(attach_type);
4589 		if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type)
4590 			return -EINVAL;
4591 		return 0;
4592 	}
4593 }
4594 
4595 static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype,
4596 				bool check_atype)
4597 {
4598 	switch (ptype) {
4599 	case BPF_PROG_TYPE_CGROUP_DEVICE:
4600 	case BPF_PROG_TYPE_CGROUP_SKB:
4601 	case BPF_PROG_TYPE_CGROUP_SOCK:
4602 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4603 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4604 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
4605 	case BPF_PROG_TYPE_SOCK_OPS:
4606 		return true;
4607 	case BPF_PROG_TYPE_LSM:
4608 		return check_atype ? atype == BPF_LSM_CGROUP : true;
4609 	default:
4610 		return false;
4611 	}
4612 }
4613 
4614 #define BPF_PROG_ATTACH_LAST_FIELD expected_revision
4615 
4616 #define BPF_F_ATTACH_MASK_BASE	\
4617 	(BPF_F_ALLOW_OVERRIDE |	\
4618 	 BPF_F_ALLOW_MULTI |	\
4619 	 BPF_F_REPLACE |	\
4620 	 BPF_F_PREORDER)
4621 
4622 #define BPF_F_ATTACH_MASK_MPROG	\
4623 	(BPF_F_REPLACE |	\
4624 	 BPF_F_BEFORE |		\
4625 	 BPF_F_AFTER |		\
4626 	 BPF_F_ID |		\
4627 	 BPF_F_LINK)
4628 
4629 static int bpf_prog_attach(const union bpf_attr *attr)
4630 {
4631 	enum bpf_prog_type ptype;
4632 	struct bpf_prog *prog;
4633 	int ret;
4634 
4635 	if (CHECK_ATTR(BPF_PROG_ATTACH))
4636 		return -EINVAL;
4637 
4638 	ptype = attach_type_to_prog_type(attr->attach_type);
4639 	if (ptype == BPF_PROG_TYPE_UNSPEC)
4640 		return -EINVAL;
4641 	if (bpf_mprog_supported(ptype)) {
4642 		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
4643 			return -EINVAL;
4644 	} else if (is_cgroup_prog_type(ptype, 0, false)) {
4645 		if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG))
4646 			return -EINVAL;
4647 	} else {
4648 		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE)
4649 			return -EINVAL;
4650 		if (attr->relative_fd ||
4651 		    attr->expected_revision)
4652 			return -EINVAL;
4653 	}
4654 
4655 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
4656 	if (IS_ERR(prog))
4657 		return PTR_ERR(prog);
4658 
4659 	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
4660 		bpf_prog_put(prog);
4661 		return -EINVAL;
4662 	}
4663 
4664 	if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) {
4665 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
4666 		goto out;
4667 	}
4668 
4669 	switch (ptype) {
4670 	case BPF_PROG_TYPE_SK_SKB:
4671 	case BPF_PROG_TYPE_SK_MSG:
4672 		ret = sock_map_get_from_fd(attr, prog);
4673 		break;
4674 	case BPF_PROG_TYPE_LIRC_MODE2:
4675 		ret = lirc_prog_attach(attr, prog);
4676 		break;
4677 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4678 		ret = netns_bpf_prog_attach(attr, prog);
4679 		break;
4680 	case BPF_PROG_TYPE_SCHED_CLS:
4681 		if (attr->attach_type == BPF_TCX_INGRESS ||
4682 		    attr->attach_type == BPF_TCX_EGRESS)
4683 			ret = tcx_prog_attach(attr, prog);
4684 		else
4685 			ret = netkit_prog_attach(attr, prog);
4686 		break;
4687 	default:
4688 		ret = -EINVAL;
4689 	}
4690 out:
4691 	if (ret)
4692 		bpf_prog_put(prog);
4693 	return ret;
4694 }
4695 
4696 #define BPF_PROG_DETACH_LAST_FIELD expected_revision
4697 
4698 static int bpf_prog_detach(const union bpf_attr *attr)
4699 {
4700 	struct bpf_prog *prog = NULL;
4701 	enum bpf_prog_type ptype;
4702 	int ret;
4703 
4704 	if (CHECK_ATTR(BPF_PROG_DETACH))
4705 		return -EINVAL;
4706 
4707 	ptype = attach_type_to_prog_type(attr->attach_type);
4708 	if (bpf_mprog_supported(ptype)) {
4709 		if (ptype == BPF_PROG_TYPE_UNSPEC)
4710 			return -EINVAL;
4711 		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
4712 			return -EINVAL;
4713 		if (attr->attach_bpf_fd) {
4714 			prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
4715 			if (IS_ERR(prog))
4716 				return PTR_ERR(prog);
4717 		} else if (!bpf_mprog_detach_empty(ptype)) {
4718 			return -EPERM;
4719 		}
4720 	} else if (is_cgroup_prog_type(ptype, 0, false)) {
4721 		if (attr->attach_flags || attr->relative_fd)
4722 			return -EINVAL;
4723 	} else if (attr->attach_flags ||
4724 		   attr->relative_fd ||
4725 		   attr->expected_revision) {
4726 		return -EINVAL;
4727 	}
4728 
4729 	switch (ptype) {
4730 	case BPF_PROG_TYPE_SK_MSG:
4731 	case BPF_PROG_TYPE_SK_SKB:
4732 		ret = sock_map_prog_detach(attr, ptype);
4733 		break;
4734 	case BPF_PROG_TYPE_LIRC_MODE2:
4735 		ret = lirc_prog_detach(attr);
4736 		break;
4737 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4738 		ret = netns_bpf_prog_detach(attr, ptype);
4739 		break;
4740 	case BPF_PROG_TYPE_CGROUP_DEVICE:
4741 	case BPF_PROG_TYPE_CGROUP_SKB:
4742 	case BPF_PROG_TYPE_CGROUP_SOCK:
4743 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4744 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4745 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
4746 	case BPF_PROG_TYPE_SOCK_OPS:
4747 	case BPF_PROG_TYPE_LSM:
4748 		ret = cgroup_bpf_prog_detach(attr, ptype);
4749 		break;
4750 	case BPF_PROG_TYPE_SCHED_CLS:
4751 		if (attr->attach_type == BPF_TCX_INGRESS ||
4752 		    attr->attach_type == BPF_TCX_EGRESS)
4753 			ret = tcx_prog_detach(attr, prog);
4754 		else
4755 			ret = netkit_prog_detach(attr, prog);
4756 		break;
4757 	default:
4758 		ret = -EINVAL;
4759 	}
4760 
4761 	if (prog)
4762 		bpf_prog_put(prog);
4763 	return ret;
4764 }
4765 
4766 #define BPF_PROG_QUERY_LAST_FIELD query.revision
4767 
4768 static int bpf_prog_query(const union bpf_attr *attr,
4769 			  union bpf_attr __user *uattr, u32 uattr_size)
4770 {
4771 	if (!bpf_net_capable())
4772 		return -EPERM;
4773 	if (CHECK_ATTR(BPF_PROG_QUERY))
4774 		return -EINVAL;
4775 	if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
4776 		return -EINVAL;
4777 
4778 	switch (attr->query.attach_type) {
4779 	case BPF_CGROUP_INET_INGRESS:
4780 	case BPF_CGROUP_INET_EGRESS:
4781 	case BPF_CGROUP_INET_SOCK_CREATE:
4782 	case BPF_CGROUP_INET_SOCK_RELEASE:
4783 	case BPF_CGROUP_INET4_BIND:
4784 	case BPF_CGROUP_INET6_BIND:
4785 	case BPF_CGROUP_INET4_POST_BIND:
4786 	case BPF_CGROUP_INET6_POST_BIND:
4787 	case BPF_CGROUP_INET4_CONNECT:
4788 	case BPF_CGROUP_INET6_CONNECT:
4789 	case BPF_CGROUP_UNIX_CONNECT:
4790 	case BPF_CGROUP_INET4_GETPEERNAME:
4791 	case BPF_CGROUP_INET6_GETPEERNAME:
4792 	case BPF_CGROUP_UNIX_GETPEERNAME:
4793 	case BPF_CGROUP_INET4_GETSOCKNAME:
4794 	case BPF_CGROUP_INET6_GETSOCKNAME:
4795 	case BPF_CGROUP_UNIX_GETSOCKNAME:
4796 	case BPF_CGROUP_UDP4_SENDMSG:
4797 	case BPF_CGROUP_UDP6_SENDMSG:
4798 	case BPF_CGROUP_UNIX_SENDMSG:
4799 	case BPF_CGROUP_UDP4_RECVMSG:
4800 	case BPF_CGROUP_UDP6_RECVMSG:
4801 	case BPF_CGROUP_UNIX_RECVMSG:
4802 	case BPF_CGROUP_SOCK_OPS:
4803 	case BPF_CGROUP_DEVICE:
4804 	case BPF_CGROUP_SYSCTL:
4805 	case BPF_CGROUP_GETSOCKOPT:
4806 	case BPF_CGROUP_SETSOCKOPT:
4807 	case BPF_LSM_CGROUP:
4808 		return cgroup_bpf_prog_query(attr, uattr, uattr_size);
4809 	case BPF_LIRC_MODE2:
4810 		return lirc_prog_query(attr, uattr);
4811 	case BPF_FLOW_DISSECTOR:
4812 	case BPF_SK_LOOKUP:
4813 		return netns_bpf_prog_query(attr, uattr);
4814 	case BPF_SK_SKB_STREAM_PARSER:
4815 	case BPF_SK_SKB_STREAM_VERDICT:
4816 	case BPF_SK_MSG_VERDICT:
4817 	case BPF_SK_SKB_VERDICT:
4818 		return sock_map_bpf_prog_query(attr, uattr);
4819 	case BPF_TCX_INGRESS:
4820 	case BPF_TCX_EGRESS:
4821 		return tcx_prog_query(attr, uattr);
4822 	case BPF_NETKIT_PRIMARY:
4823 	case BPF_NETKIT_PEER:
4824 		return netkit_prog_query(attr, uattr);
4825 	default:
4826 		return -EINVAL;
4827 	}
4828 }
4829 
4830 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
4831 
4832 static int bpf_prog_test_run(const union bpf_attr *attr,
4833 			     union bpf_attr __user *uattr)
4834 {
4835 	struct bpf_prog *prog;
4836 	int ret = -ENOTSUPP;
4837 
4838 	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
4839 		return -EINVAL;
4840 
4841 	if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
4842 	    (!attr->test.ctx_size_in && attr->test.ctx_in))
4843 		return -EINVAL;
4844 
4845 	if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
4846 	    (!attr->test.ctx_size_out && attr->test.ctx_out))
4847 		return -EINVAL;
4848 
4849 	prog = bpf_prog_get(attr->test.prog_fd);
4850 	if (IS_ERR(prog))
4851 		return PTR_ERR(prog);
4852 
4853 	if (prog->aux->ops->test_run)
4854 		ret = prog->aux->ops->test_run(prog, attr, uattr);
4855 
4856 	bpf_prog_put(prog);
4857 	return ret;
4858 }
4859 
4860 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
4861 
4862 static int bpf_obj_get_next_id(const union bpf_attr *attr,
4863 			       union bpf_attr __user *uattr,
4864 			       struct idr *idr,
4865 			       spinlock_t *lock)
4866 {
4867 	u32 next_id = attr->start_id;
4868 	int err = 0;
4869 
4870 	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
4871 		return -EINVAL;
4872 
4873 	if (!capable(CAP_SYS_ADMIN))
4874 		return -EPERM;
4875 
4876 	next_id++;
4877 	spin_lock_bh(lock);
4878 	if (!idr_get_next(idr, &next_id))
4879 		err = -ENOENT;
4880 	spin_unlock_bh(lock);
4881 
4882 	if (!err)
4883 		err = put_user(next_id, &uattr->next_id);
4884 
4885 	return err;
4886 }
4887 
4888 struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
4889 {
4890 	struct bpf_map *map;
4891 
4892 	spin_lock_bh(&map_idr_lock);
4893 again:
4894 	map = idr_get_next(&map_idr, id);
4895 	if (map) {
4896 		map = __bpf_map_inc_not_zero(map, false);
4897 		if (IS_ERR(map)) {
4898 			(*id)++;
4899 			goto again;
4900 		}
4901 	}
4902 	spin_unlock_bh(&map_idr_lock);
4903 
4904 	return map;
4905 }
4906 
4907 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
4908 {
4909 	struct bpf_prog *prog;
4910 
4911 	spin_lock_bh(&prog_idr_lock);
4912 again:
4913 	prog = idr_get_next(&prog_idr, id);
4914 	if (prog) {
4915 		prog = bpf_prog_inc_not_zero(prog);
4916 		if (IS_ERR(prog)) {
4917 			(*id)++;
4918 			goto again;
4919 		}
4920 	}
4921 	spin_unlock_bh(&prog_idr_lock);
4922 
4923 	return prog;
4924 }
4925 
4926 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
4927 
4928 struct bpf_prog *bpf_prog_by_id(u32 id)
4929 {
4930 	struct bpf_prog *prog;
4931 
4932 	if (!id)
4933 		return ERR_PTR(-ENOENT);
4934 
4935 	spin_lock_bh(&prog_idr_lock);
4936 	prog = idr_find(&prog_idr, id);
4937 	if (prog)
4938 		prog = bpf_prog_inc_not_zero(prog);
4939 	else
4940 		prog = ERR_PTR(-ENOENT);
4941 	spin_unlock_bh(&prog_idr_lock);
4942 	return prog;
4943 }
4944 
4945 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
4946 {
4947 	struct bpf_prog *prog;
4948 	u32 id = attr->prog_id;
4949 	int fd;
4950 
4951 	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
4952 		return -EINVAL;
4953 
4954 	if (!capable(CAP_SYS_ADMIN))
4955 		return -EPERM;
4956 
4957 	prog = bpf_prog_by_id(id);
4958 	if (IS_ERR(prog))
4959 		return PTR_ERR(prog);
4960 
4961 	fd = bpf_prog_new_fd(prog);
4962 	if (fd < 0)
4963 		bpf_prog_put(prog);
4964 
4965 	return fd;
4966 }
4967 
4968 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
4969 
4970 static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
4971 {
4972 	struct bpf_map *map;
4973 	u32 id = attr->map_id;
4974 	int f_flags;
4975 	int fd;
4976 
4977 	if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
4978 	    attr->open_flags & ~BPF_OBJ_FLAG_MASK)
4979 		return -EINVAL;
4980 
4981 	if (!capable(CAP_SYS_ADMIN))
4982 		return -EPERM;
4983 
4984 	f_flags = bpf_get_file_flag(attr->open_flags);
4985 	if (f_flags < 0)
4986 		return f_flags;
4987 
4988 	spin_lock_bh(&map_idr_lock);
4989 	map = idr_find(&map_idr, id);
4990 	if (map)
4991 		map = __bpf_map_inc_not_zero(map, true);
4992 	else
4993 		map = ERR_PTR(-ENOENT);
4994 	spin_unlock_bh(&map_idr_lock);
4995 
4996 	if (IS_ERR(map))
4997 		return PTR_ERR(map);
4998 
4999 	fd = bpf_map_new_fd(map, f_flags);
5000 	if (fd < 0)
5001 		bpf_map_put_with_uref(map);
5002 
5003 	return fd;
5004 }
5005 
5006 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
5007 					      unsigned long addr, u32 *off,
5008 					      u32 *type)
5009 {
5010 	const struct bpf_map *map;
5011 	int i;
5012 
5013 	mutex_lock(&prog->aux->used_maps_mutex);
5014 	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
5015 		map = prog->aux->used_maps[i];
5016 		if (map == (void *)addr) {
5017 			*type = BPF_PSEUDO_MAP_FD;
5018 			goto out;
5019 		}
5020 		if (!map->ops->map_direct_value_meta)
5021 			continue;
5022 		if (!map->ops->map_direct_value_meta(map, addr, off)) {
5023 			*type = BPF_PSEUDO_MAP_VALUE;
5024 			goto out;
5025 		}
5026 	}
5027 	map = NULL;
5028 
5029 out:
5030 	mutex_unlock(&prog->aux->used_maps_mutex);
5031 	return map;
5032 }
5033 
5034 static void prepare_dump_pseudo_call(struct bpf_insn *insn)
5035 {
5036 	s32 call_off = insn->imm;
5037 
5038 	/*
5039 	 * BPF_CALL_ARGS only exists for interpreter fallback.
5040 	 * 1. For interpreter (BPF_CALL_ARGS): insn->off is the index of
5041 	 *    interpreters_args array, so here using bpf_call_args_imm()
5042 	 *    to get the real address offset.
5043 	 * 2. For JIT (BPF_CALL): insn->off is the subprog id.
5044 	 */
5045 	if (insn->code == (BPF_JMP | BPF_CALL_ARGS))
5046 		insn->imm = bpf_call_args_imm(insn->off);
5047 	else
5048 		insn->imm = insn->off;
5049 
5050 	/* Avoid dumping a truncated and misleading pc-relative offset. */
5051 	if (call_off > S16_MAX || call_off < S16_MIN)
5052 		insn->off = 0;
5053 	else
5054 		insn->off = call_off;
5055 }
5056 
5057 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
5058 					      const struct cred *f_cred)
5059 {
5060 	const struct bpf_map *map;
5061 	struct bpf_insn *insns;
5062 	u32 off, type;
5063 	u64 imm;
5064 	u8 code;
5065 	int i;
5066 
5067 	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
5068 			GFP_USER);
5069 	if (!insns)
5070 		return insns;
5071 
5072 	for (i = 0; i < prog->len; i++) {
5073 		code = insns[i].code;
5074 
5075 		if (code == (BPF_JMP | BPF_TAIL_CALL)) {
5076 			insns[i].code = BPF_JMP | BPF_CALL;
5077 			insns[i].imm = BPF_FUNC_tail_call;
5078 			/* fall-through */
5079 		}
5080 		if (code == (BPF_JMP | BPF_CALL) ||
5081 		    code == (BPF_JMP | BPF_CALL_ARGS)) {
5082 			/* Restore the legacy xlated dump layout. */
5083 			if (insns[i].src_reg == BPF_PSEUDO_CALL)
5084 				prepare_dump_pseudo_call(&insns[i]);
5085 			if (code == (BPF_JMP | BPF_CALL_ARGS))
5086 				insns[i].code = BPF_JMP | BPF_CALL;
5087 			if (!bpf_dump_raw_ok(f_cred))
5088 				insns[i].imm = 0;
5089 			continue;
5090 		}
5091 		if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
5092 			insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
5093 			continue;
5094 		}
5095 
5096 		if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX ||
5097 		     BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) {
5098 			insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM;
5099 			continue;
5100 		}
5101 
5102 		if (code != (BPF_LD | BPF_IMM | BPF_DW))
5103 			continue;
5104 
5105 		imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
5106 		map = bpf_map_from_imm(prog, imm, &off, &type);
5107 		if (map) {
5108 			insns[i].src_reg = type;
5109 			insns[i].imm = map->id;
5110 			insns[i + 1].imm = off;
5111 			continue;
5112 		}
5113 	}
5114 
5115 	return insns;
5116 }
5117 
5118 static int set_info_rec_size(struct bpf_prog_info *info)
5119 {
5120 	/*
5121 	 * Ensure info.*_rec_size is the same as kernel expected size
5122 	 *
5123 	 * or
5124 	 *
5125 	 * Only allow zero *_rec_size if both _rec_size and _cnt are
5126 	 * zero.  In this case, the kernel will set the expected
5127 	 * _rec_size back to the info.
5128 	 */
5129 
5130 	if ((info->nr_func_info || info->func_info_rec_size) &&
5131 	    info->func_info_rec_size != sizeof(struct bpf_func_info))
5132 		return -EINVAL;
5133 
5134 	if ((info->nr_line_info || info->line_info_rec_size) &&
5135 	    info->line_info_rec_size != sizeof(struct bpf_line_info))
5136 		return -EINVAL;
5137 
5138 	if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
5139 	    info->jited_line_info_rec_size != sizeof(__u64))
5140 		return -EINVAL;
5141 
5142 	info->func_info_rec_size = sizeof(struct bpf_func_info);
5143 	info->line_info_rec_size = sizeof(struct bpf_line_info);
5144 	info->jited_line_info_rec_size = sizeof(__u64);
5145 
5146 	return 0;
5147 }
5148 
5149 static int bpf_prog_get_info_by_fd(struct file *file,
5150 				   struct bpf_prog *prog,
5151 				   const union bpf_attr *attr,
5152 				   union bpf_attr __user *uattr)
5153 {
5154 	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5155 	struct btf *attach_btf = bpf_prog_get_target_btf(prog);
5156 	struct bpf_prog_info info;
5157 	u32 info_len = attr->info.info_len;
5158 	struct bpf_prog_kstats stats;
5159 	char __user *uinsns;
5160 	u32 ulen, len;
5161 	int err;
5162 
5163 	len = offsetofend(struct bpf_prog_info, attach_btf_id);
5164 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), len, info_len);
5165 	if (err)
5166 		return err;
5167 	info_len = min_t(u32, sizeof(info), info_len);
5168 
5169 	memset(&info, 0, sizeof(info));
5170 	if (copy_from_user(&info, uinfo, info_len))
5171 		return -EFAULT;
5172 
5173 	info.type = prog->type;
5174 	info.id = prog->aux->id;
5175 	info.load_time = prog->aux->load_time;
5176 	info.created_by_uid = from_kuid_munged(current_user_ns(),
5177 					       prog->aux->user->uid);
5178 	info.gpl_compatible = prog->gpl_compatible;
5179 
5180 	memcpy(info.tag, prog->tag, sizeof(prog->tag));
5181 	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
5182 
5183 	mutex_lock(&prog->aux->used_maps_mutex);
5184 	ulen = info.nr_map_ids;
5185 	info.nr_map_ids = prog->aux->used_map_cnt;
5186 	ulen = min_t(u32, info.nr_map_ids, ulen);
5187 	if (ulen) {
5188 		u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
5189 		u32 i;
5190 
5191 		for (i = 0; i < ulen; i++)
5192 			if (put_user(prog->aux->used_maps[i]->id,
5193 				     &user_map_ids[i])) {
5194 				mutex_unlock(&prog->aux->used_maps_mutex);
5195 				return -EFAULT;
5196 			}
5197 	}
5198 	mutex_unlock(&prog->aux->used_maps_mutex);
5199 
5200 	err = set_info_rec_size(&info);
5201 	if (err)
5202 		return err;
5203 
5204 	bpf_prog_get_stats(prog, &stats);
5205 	info.run_time_ns = stats.nsecs;
5206 	info.run_cnt = stats.cnt;
5207 	info.recursion_misses = stats.misses;
5208 
5209 	info.verified_insns = prog->aux->verified_insns;
5210 	if (prog->aux->btf)
5211 		info.btf_id = btf_obj_id(prog->aux->btf);
5212 
5213 	if (!bpf_capable()) {
5214 		info.jited_prog_len = 0;
5215 		info.xlated_prog_len = 0;
5216 		info.nr_jited_ksyms = 0;
5217 		info.nr_jited_func_lens = 0;
5218 		info.nr_func_info = 0;
5219 		info.nr_line_info = 0;
5220 		info.nr_jited_line_info = 0;
5221 		goto done;
5222 	}
5223 
5224 	ulen = info.xlated_prog_len;
5225 	info.xlated_prog_len = bpf_prog_insn_size(prog);
5226 	if (info.xlated_prog_len && ulen) {
5227 		struct bpf_insn *insns_sanitized;
5228 		bool fault;
5229 
5230 		if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) {
5231 			insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
5232 			if (!insns_sanitized)
5233 				return -ENOMEM;
5234 			uinsns = u64_to_user_ptr(info.xlated_prog_insns);
5235 			ulen = min_t(u32, info.xlated_prog_len, ulen);
5236 			fault = copy_to_user(uinsns, insns_sanitized, ulen);
5237 			kfree(insns_sanitized);
5238 			if (fault)
5239 				return -EFAULT;
5240 		} else {
5241 			info.xlated_prog_insns = 0;
5242 		}
5243 	}
5244 
5245 	if (bpf_prog_is_offloaded(prog->aux)) {
5246 		err = bpf_prog_offload_info_fill(&info, prog);
5247 		if (err)
5248 			return err;
5249 		goto done;
5250 	}
5251 
5252 	/* NOTE: the following code is supposed to be skipped for offload.
5253 	 * bpf_prog_offload_info_fill() is the place to fill similar fields
5254 	 * for offload.
5255 	 */
5256 	ulen = info.jited_prog_len;
5257 	if (prog->aux->func_cnt) {
5258 		u32 i;
5259 
5260 		info.jited_prog_len = 0;
5261 		for (i = 0; i < prog->aux->func_cnt; i++)
5262 			info.jited_prog_len += prog->aux->func[i]->jited_len;
5263 	} else {
5264 		info.jited_prog_len = prog->jited_len;
5265 	}
5266 
5267 	if (info.jited_prog_len && ulen) {
5268 		if (bpf_dump_raw_ok(file->f_cred)) {
5269 			uinsns = u64_to_user_ptr(info.jited_prog_insns);
5270 			ulen = min_t(u32, info.jited_prog_len, ulen);
5271 
5272 			/* for multi-function programs, copy the JITed
5273 			 * instructions for all the functions
5274 			 */
5275 			if (prog->aux->func_cnt) {
5276 				u32 len, free, i;
5277 				u8 *img;
5278 
5279 				free = ulen;
5280 				for (i = 0; i < prog->aux->func_cnt; i++) {
5281 					len = prog->aux->func[i]->jited_len;
5282 					len = min_t(u32, len, free);
5283 					img = (u8 *) prog->aux->func[i]->bpf_func;
5284 					if (copy_to_user(uinsns, img, len))
5285 						return -EFAULT;
5286 					uinsns += len;
5287 					free -= len;
5288 					if (!free)
5289 						break;
5290 				}
5291 			} else {
5292 				if (copy_to_user(uinsns, prog->bpf_func, ulen))
5293 					return -EFAULT;
5294 			}
5295 		} else {
5296 			info.jited_prog_insns = 0;
5297 		}
5298 	}
5299 
5300 	ulen = info.nr_jited_ksyms;
5301 	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
5302 	if (ulen) {
5303 		if (bpf_dump_raw_ok(file->f_cred)) {
5304 			unsigned long ksym_addr;
5305 			u64 __user *user_ksyms;
5306 			u32 i;
5307 
5308 			/* copy the address of the kernel symbol
5309 			 * corresponding to each function
5310 			 */
5311 			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
5312 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
5313 			if (prog->aux->func_cnt) {
5314 				for (i = 0; i < ulen; i++) {
5315 					ksym_addr = (unsigned long)
5316 						prog->aux->func[i]->bpf_func;
5317 					if (put_user((u64) ksym_addr,
5318 						     &user_ksyms[i]))
5319 						return -EFAULT;
5320 				}
5321 			} else {
5322 				ksym_addr = (unsigned long) prog->bpf_func;
5323 				if (put_user((u64) ksym_addr, &user_ksyms[0]))
5324 					return -EFAULT;
5325 			}
5326 		} else {
5327 			info.jited_ksyms = 0;
5328 		}
5329 	}
5330 
5331 	ulen = info.nr_jited_func_lens;
5332 	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
5333 	if (ulen) {
5334 		if (bpf_dump_raw_ok(file->f_cred)) {
5335 			u32 __user *user_lens;
5336 			u32 func_len, i;
5337 
5338 			/* copy the JITed image lengths for each function */
5339 			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
5340 			user_lens = u64_to_user_ptr(info.jited_func_lens);
5341 			if (prog->aux->func_cnt) {
5342 				for (i = 0; i < ulen; i++) {
5343 					func_len =
5344 						prog->aux->func[i]->jited_len;
5345 					if (put_user(func_len, &user_lens[i]))
5346 						return -EFAULT;
5347 				}
5348 			} else {
5349 				func_len = prog->jited_len;
5350 				if (put_user(func_len, &user_lens[0]))
5351 					return -EFAULT;
5352 			}
5353 		} else {
5354 			info.jited_func_lens = 0;
5355 		}
5356 	}
5357 
5358 	info.attach_btf_id = prog->aux->attach_btf_id;
5359 	if (attach_btf)
5360 		info.attach_btf_obj_id = btf_obj_id(attach_btf);
5361 
5362 	ulen = info.nr_func_info;
5363 	info.nr_func_info = prog->aux->func_info_cnt;
5364 	if (info.nr_func_info && ulen) {
5365 		char __user *user_finfo;
5366 
5367 		user_finfo = u64_to_user_ptr(info.func_info);
5368 		ulen = min_t(u32, info.nr_func_info, ulen);
5369 		if (copy_to_user(user_finfo, prog->aux->func_info,
5370 				 info.func_info_rec_size * ulen))
5371 			return -EFAULT;
5372 	}
5373 
5374 	ulen = info.nr_line_info;
5375 	info.nr_line_info = prog->aux->nr_linfo;
5376 	if (info.nr_line_info && ulen) {
5377 		__u8 __user *user_linfo;
5378 
5379 		user_linfo = u64_to_user_ptr(info.line_info);
5380 		ulen = min_t(u32, info.nr_line_info, ulen);
5381 		if (copy_to_user(user_linfo, prog->aux->linfo,
5382 				 info.line_info_rec_size * ulen))
5383 			return -EFAULT;
5384 	}
5385 
5386 	ulen = info.nr_jited_line_info;
5387 	if (prog->aux->jited_linfo)
5388 		info.nr_jited_line_info = prog->aux->nr_linfo;
5389 	else
5390 		info.nr_jited_line_info = 0;
5391 	if (info.nr_jited_line_info && ulen) {
5392 		if (bpf_dump_raw_ok(file->f_cred)) {
5393 			unsigned long line_addr;
5394 			__u64 __user *user_linfo;
5395 			u32 i;
5396 
5397 			user_linfo = u64_to_user_ptr(info.jited_line_info);
5398 			ulen = min_t(u32, info.nr_jited_line_info, ulen);
5399 			for (i = 0; i < ulen; i++) {
5400 				line_addr = (unsigned long)prog->aux->jited_linfo[i];
5401 				if (put_user((__u64)line_addr, &user_linfo[i]))
5402 					return -EFAULT;
5403 			}
5404 		} else {
5405 			info.jited_line_info = 0;
5406 		}
5407 	}
5408 
5409 	ulen = info.nr_prog_tags;
5410 	info.nr_prog_tags = prog->aux->func_cnt ? : 1;
5411 	if (ulen) {
5412 		__u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
5413 		u32 i;
5414 
5415 		user_prog_tags = u64_to_user_ptr(info.prog_tags);
5416 		ulen = min_t(u32, info.nr_prog_tags, ulen);
5417 		if (prog->aux->func_cnt) {
5418 			for (i = 0; i < ulen; i++) {
5419 				if (copy_to_user(user_prog_tags[i],
5420 						 prog->aux->func[i]->tag,
5421 						 BPF_TAG_SIZE))
5422 					return -EFAULT;
5423 			}
5424 		} else {
5425 			if (copy_to_user(user_prog_tags[0],
5426 					 prog->tag, BPF_TAG_SIZE))
5427 				return -EFAULT;
5428 		}
5429 	}
5430 
5431 done:
5432 	if (copy_to_user(uinfo, &info, info_len) ||
5433 	    put_user(info_len, &uattr->info.info_len))
5434 		return -EFAULT;
5435 
5436 	return 0;
5437 }
5438 
5439 static int bpf_map_get_info_by_fd(struct file *file,
5440 				  struct bpf_map *map,
5441 				  const union bpf_attr *attr,
5442 				  union bpf_attr __user *uattr)
5443 {
5444 	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5445 	struct bpf_map_info info;
5446 	u32 info_len = attr->info.info_len, len;
5447 	int err;
5448 
5449 	len = offsetofend(struct bpf_map_info, hash_size);
5450 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), len, info_len);
5451 	if (err)
5452 		return err;
5453 	info_len = min_t(u32, sizeof(info), info_len);
5454 
5455 	memset(&info, 0, sizeof(info));
5456 	if (copy_from_user(&info, uinfo, info_len))
5457 		return -EFAULT;
5458 
5459 	info.type = map->map_type;
5460 	info.id = map->id;
5461 	info.key_size = map->key_size;
5462 	info.value_size = map->value_size;
5463 	info.max_entries = map->max_entries;
5464 	info.map_flags = map->map_flags;
5465 	info.map_extra = map->map_extra;
5466 	memcpy(info.name, map->name, sizeof(map->name));
5467 
5468 	if (map->btf) {
5469 		info.btf_id = btf_obj_id(map->btf);
5470 		info.btf_key_type_id = map->btf_key_type_id;
5471 		info.btf_value_type_id = map->btf_value_type_id;
5472 	}
5473 	info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5474 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS)
5475 		bpf_map_struct_ops_info_fill(&info, map);
5476 
5477 	if (bpf_map_is_offloaded(map)) {
5478 		err = bpf_map_offload_info_fill(&info, map);
5479 		if (err)
5480 			return err;
5481 	}
5482 
5483 	if (info.hash) {
5484 		char __user *uhash = u64_to_user_ptr(info.hash);
5485 
5486 		if (!map->ops->map_get_hash)
5487 			return -EINVAL;
5488 		if (info.hash_size != sizeof(map->sha))
5489 			return -EINVAL;
5490 		if (!READ_ONCE(map->frozen))
5491 			return -EPERM;
5492 
5493 		err = map->ops->map_get_hash(map);
5494 		if (err != 0)
5495 			return err;
5496 
5497 		if (copy_to_user(uhash, map->sha, sizeof(map->sha)) != 0)
5498 			return -EFAULT;
5499 	} else if (info.hash_size) {
5500 		return -EINVAL;
5501 	}
5502 
5503 	if (copy_to_user(uinfo, &info, info_len) ||
5504 	    put_user(info_len, &uattr->info.info_len))
5505 		return -EFAULT;
5506 
5507 	return 0;
5508 }
5509 
5510 static int bpf_btf_get_info_by_fd(struct file *file,
5511 				  struct btf *btf,
5512 				  const union bpf_attr *attr,
5513 				  union bpf_attr __user *uattr)
5514 {
5515 	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5516 	u32 info_len = attr->info.info_len;
5517 	int err;
5518 
5519 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
5520 	if (err)
5521 		return err;
5522 
5523 	return btf_get_info_by_fd(btf, attr, uattr);
5524 }
5525 
5526 static int bpf_link_get_info_by_fd(struct file *file,
5527 				  struct bpf_link *link,
5528 				  const union bpf_attr *attr,
5529 				  union bpf_attr __user *uattr)
5530 {
5531 	struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5532 	struct bpf_link_info info;
5533 	u32 info_len = attr->info.info_len;
5534 	int err;
5535 
5536 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
5537 	if (err)
5538 		return err;
5539 	info_len = min_t(u32, sizeof(info), info_len);
5540 
5541 	memset(&info, 0, sizeof(info));
5542 	if (copy_from_user(&info, uinfo, info_len))
5543 		return -EFAULT;
5544 
5545 	info.type = link->type;
5546 	info.id = link->id;
5547 	if (link->prog)
5548 		info.prog_id = link->prog->aux->id;
5549 
5550 	if (link->ops->fill_link_info) {
5551 		err = link->ops->fill_link_info(link, &info);
5552 		if (err)
5553 			return err;
5554 	}
5555 
5556 	if (copy_to_user(uinfo, &info, info_len) ||
5557 	    put_user(info_len, &uattr->info.info_len))
5558 		return -EFAULT;
5559 
5560 	return 0;
5561 }
5562 
5563 
5564 static int token_get_info_by_fd(struct file *file,
5565 				struct bpf_token *token,
5566 				const union bpf_attr *attr,
5567 				union bpf_attr __user *uattr)
5568 {
5569 	struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5570 	u32 info_len = attr->info.info_len;
5571 	int err;
5572 
5573 	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
5574 	if (err)
5575 		return err;
5576 	return bpf_token_get_info_by_fd(token, attr, uattr);
5577 }
5578 
5579 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
5580 
5581 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
5582 				  union bpf_attr __user *uattr)
5583 {
5584 	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
5585 		return -EINVAL;
5586 
5587 	CLASS(fd, f)(attr->info.bpf_fd);
5588 	if (fd_empty(f))
5589 		return -EBADFD;
5590 
5591 	if (fd_file(f)->f_op == &bpf_prog_fops)
5592 		return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
5593 					      uattr);
5594 	else if (fd_file(f)->f_op == &bpf_map_fops)
5595 		return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
5596 					     uattr);
5597 	else if (fd_file(f)->f_op == &btf_fops)
5598 		return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
5599 	else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll)
5600 		return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
5601 					      attr, uattr);
5602 	else if (fd_file(f)->f_op == &bpf_token_fops)
5603 		return token_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
5604 					    attr, uattr);
5605 	return -EINVAL;
5606 }
5607 
5608 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
5609 
5610 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log)
5611 {
5612 	struct bpf_token *token = NULL;
5613 
5614 	if (CHECK_ATTR(BPF_BTF_LOAD))
5615 		return -EINVAL;
5616 
5617 	if (attr->btf_flags & ~BPF_F_TOKEN_FD)
5618 		return -EINVAL;
5619 
5620 	if (attr->btf_flags & BPF_F_TOKEN_FD) {
5621 		token = bpf_token_get_from_fd(attr->btf_token_fd);
5622 		if (IS_ERR(token))
5623 			return PTR_ERR(token);
5624 		if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
5625 			bpf_token_put(token);
5626 			token = NULL;
5627 		}
5628 	}
5629 
5630 	if (!bpf_token_capable(token, CAP_BPF)) {
5631 		bpf_token_put(token);
5632 		return -EPERM;
5633 	}
5634 
5635 	bpf_token_put(token);
5636 
5637 	return btf_new_fd(attr, uattr, attr_log);
5638 }
5639 
5640 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd
5641 
5642 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
5643 {
5644 	struct bpf_token *token = NULL;
5645 
5646 	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
5647 		return -EINVAL;
5648 
5649 	if (attr->open_flags & ~BPF_F_TOKEN_FD)
5650 		return -EINVAL;
5651 
5652 	if (attr->open_flags & BPF_F_TOKEN_FD) {
5653 		token = bpf_token_get_from_fd(attr->fd_by_id_token_fd);
5654 		if (IS_ERR(token))
5655 			return PTR_ERR(token);
5656 		if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) {
5657 			bpf_token_put(token);
5658 			token = NULL;
5659 		}
5660 	}
5661 
5662 	if (!bpf_token_capable(token, CAP_SYS_ADMIN)) {
5663 		bpf_token_put(token);
5664 		return -EPERM;
5665 	}
5666 
5667 	bpf_token_put(token);
5668 
5669 	return btf_get_fd_by_id(attr->btf_id);
5670 }
5671 
5672 static int bpf_task_fd_query_copy(const union bpf_attr *attr,
5673 				    union bpf_attr __user *uattr,
5674 				    u32 prog_id, u32 fd_type,
5675 				    const char *buf, u64 probe_offset,
5676 				    u64 probe_addr)
5677 {
5678 	char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
5679 	u32 len = buf ? strlen(buf) : 0, input_len;
5680 	int err = 0;
5681 
5682 	if (put_user(len, &uattr->task_fd_query.buf_len))
5683 		return -EFAULT;
5684 	input_len = attr->task_fd_query.buf_len;
5685 	if (input_len && ubuf) {
5686 		if (!len) {
5687 			/* nothing to copy, just make ubuf NULL terminated */
5688 			char zero = '\0';
5689 
5690 			if (put_user(zero, ubuf))
5691 				return -EFAULT;
5692 		} else {
5693 			err = bpf_copy_to_user(ubuf, buf, input_len, len);
5694 			if (err == -EFAULT)
5695 				return err;
5696 		}
5697 	}
5698 
5699 	if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
5700 	    put_user(fd_type, &uattr->task_fd_query.fd_type) ||
5701 	    put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
5702 	    put_user(probe_addr, &uattr->task_fd_query.probe_addr))
5703 		return -EFAULT;
5704 
5705 	return err;
5706 }
5707 
5708 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
5709 
5710 static int bpf_task_fd_query(const union bpf_attr *attr,
5711 			     union bpf_attr __user *uattr)
5712 {
5713 	pid_t pid = attr->task_fd_query.pid;
5714 	u32 fd = attr->task_fd_query.fd;
5715 	const struct perf_event *event;
5716 	struct task_struct *task;
5717 	struct file *file;
5718 	int err;
5719 
5720 	if (CHECK_ATTR(BPF_TASK_FD_QUERY))
5721 		return -EINVAL;
5722 
5723 	if (!capable(CAP_SYS_ADMIN))
5724 		return -EPERM;
5725 
5726 	if (attr->task_fd_query.flags != 0)
5727 		return -EINVAL;
5728 
5729 	rcu_read_lock();
5730 	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
5731 	rcu_read_unlock();
5732 	if (!task)
5733 		return -ENOENT;
5734 
5735 	err = 0;
5736 	file = fget_task(task, fd);
5737 	put_task_struct(task);
5738 	if (!file)
5739 		return -EBADF;
5740 
5741 	if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) {
5742 		struct bpf_link *link = file->private_data;
5743 
5744 		if (link->ops == &bpf_raw_tp_link_lops) {
5745 			struct bpf_raw_tp_link *raw_tp =
5746 				container_of(link, struct bpf_raw_tp_link, link);
5747 			struct bpf_raw_event_map *btp = raw_tp->btp;
5748 
5749 			err = bpf_task_fd_query_copy(attr, uattr,
5750 						     raw_tp->link.prog->aux->id,
5751 						     BPF_FD_TYPE_RAW_TRACEPOINT,
5752 						     btp->tp->name, 0, 0);
5753 			goto put_file;
5754 		}
5755 		goto out_not_supp;
5756 	}
5757 
5758 	event = perf_get_event(file);
5759 	if (!IS_ERR(event)) {
5760 		u64 probe_offset, probe_addr;
5761 		u32 prog_id, fd_type;
5762 		const char *buf;
5763 
5764 		err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
5765 					      &buf, &probe_offset,
5766 					      &probe_addr, NULL);
5767 		if (!err)
5768 			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
5769 						     fd_type, buf,
5770 						     probe_offset,
5771 						     probe_addr);
5772 		goto put_file;
5773 	}
5774 
5775 out_not_supp:
5776 	err = -ENOTSUPP;
5777 put_file:
5778 	fput(file);
5779 	return err;
5780 }
5781 
5782 #define BPF_MAP_BATCH_LAST_FIELD batch.flags
5783 
5784 #define BPF_DO_BATCH(fn, ...)			\
5785 	do {					\
5786 		if (!fn) {			\
5787 			err = -ENOTSUPP;	\
5788 			goto err_put;		\
5789 		}				\
5790 		err = fn(__VA_ARGS__);		\
5791 	} while (0)
5792 
5793 static int bpf_map_do_batch(const union bpf_attr *attr,
5794 			    union bpf_attr __user *uattr,
5795 			    int cmd)
5796 {
5797 	bool has_read  = cmd == BPF_MAP_LOOKUP_BATCH ||
5798 			 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
5799 	bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
5800 	struct bpf_map *map;
5801 	int err;
5802 
5803 	if (CHECK_ATTR(BPF_MAP_BATCH))
5804 		return -EINVAL;
5805 
5806 	CLASS(fd, f)(attr->batch.map_fd);
5807 
5808 	map = __bpf_map_get(f);
5809 	if (IS_ERR(map))
5810 		return PTR_ERR(map);
5811 	if (has_write)
5812 		bpf_map_write_active_inc(map);
5813 	if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
5814 		err = -EPERM;
5815 		goto err_put;
5816 	}
5817 	if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
5818 		err = -EPERM;
5819 		goto err_put;
5820 	}
5821 
5822 	if (cmd == BPF_MAP_LOOKUP_BATCH)
5823 		BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr);
5824 	else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
5825 		BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr);
5826 	else if (cmd == BPF_MAP_UPDATE_BATCH)
5827 		BPF_DO_BATCH(map->ops->map_update_batch, map, fd_file(f), attr, uattr);
5828 	else
5829 		BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
5830 err_put:
5831 	if (has_write) {
5832 		maybe_wait_bpf_programs(map);
5833 		bpf_map_write_active_dec(map);
5834 	}
5835 	return err;
5836 }
5837 
5838 #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid
5839 static int link_create(union bpf_attr *attr, bpfptr_t uattr)
5840 {
5841 	struct bpf_prog *prog;
5842 	int ret;
5843 
5844 	if (CHECK_ATTR(BPF_LINK_CREATE))
5845 		return -EINVAL;
5846 
5847 	if (attr->link_create.attach_type == BPF_STRUCT_OPS)
5848 		return bpf_struct_ops_link_create(attr);
5849 
5850 	prog = bpf_prog_get(attr->link_create.prog_fd);
5851 	if (IS_ERR(prog))
5852 		return PTR_ERR(prog);
5853 
5854 	ret = bpf_prog_attach_check_attach_type(prog,
5855 						attr->link_create.attach_type);
5856 	if (ret)
5857 		goto out;
5858 
5859 	switch (prog->type) {
5860 	case BPF_PROG_TYPE_CGROUP_SKB:
5861 	case BPF_PROG_TYPE_CGROUP_SOCK:
5862 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
5863 	case BPF_PROG_TYPE_SOCK_OPS:
5864 	case BPF_PROG_TYPE_CGROUP_DEVICE:
5865 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
5866 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5867 		ret = cgroup_bpf_link_attach(attr, prog);
5868 		break;
5869 	case BPF_PROG_TYPE_EXT:
5870 		ret = bpf_tracing_prog_attach(prog,
5871 					      attr->link_create.target_fd,
5872 					      attr->link_create.target_btf_id,
5873 					      attr->link_create.tracing.cookie,
5874 					      attr->link_create.attach_type);
5875 		break;
5876 	case BPF_PROG_TYPE_LSM:
5877 	case BPF_PROG_TYPE_TRACING:
5878 		if (attr->link_create.attach_type != prog->expected_attach_type) {
5879 			ret = -EINVAL;
5880 			goto out;
5881 		}
5882 		if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
5883 			ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie,
5884 						     attr->link_create.attach_type);
5885 		else if (prog->expected_attach_type == BPF_TRACE_ITER)
5886 			ret = bpf_iter_link_attach(attr, uattr, prog);
5887 		else if (prog->expected_attach_type == BPF_LSM_CGROUP)
5888 			ret = cgroup_bpf_link_attach(attr, prog);
5889 		else if (is_tracing_multi(prog->expected_attach_type))
5890 			ret = bpf_tracing_multi_attach(prog, attr);
5891 		else
5892 			ret = bpf_tracing_prog_attach(prog,
5893 						      attr->link_create.target_fd,
5894 						      attr->link_create.target_btf_id,
5895 						      attr->link_create.tracing.cookie,
5896 						      attr->link_create.attach_type);
5897 		break;
5898 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5899 	case BPF_PROG_TYPE_SK_LOOKUP:
5900 		ret = netns_bpf_link_create(attr, prog);
5901 		break;
5902 	case BPF_PROG_TYPE_SK_MSG:
5903 	case BPF_PROG_TYPE_SK_SKB:
5904 		ret = sock_map_link_create(attr, prog);
5905 		break;
5906 #ifdef CONFIG_NET
5907 	case BPF_PROG_TYPE_XDP:
5908 		ret = bpf_xdp_link_attach(attr, prog);
5909 		break;
5910 	case BPF_PROG_TYPE_SCHED_CLS:
5911 		if (attr->link_create.attach_type == BPF_TCX_INGRESS ||
5912 		    attr->link_create.attach_type == BPF_TCX_EGRESS)
5913 			ret = tcx_link_attach(attr, prog);
5914 		else
5915 			ret = netkit_link_attach(attr, prog);
5916 		break;
5917 	case BPF_PROG_TYPE_NETFILTER:
5918 		ret = bpf_nf_link_attach(attr, prog);
5919 		break;
5920 #endif
5921 	case BPF_PROG_TYPE_PERF_EVENT:
5922 	case BPF_PROG_TYPE_TRACEPOINT:
5923 		ret = bpf_perf_link_attach(attr, prog);
5924 		break;
5925 	case BPF_PROG_TYPE_KPROBE:
5926 		if (attr->link_create.attach_type == BPF_PERF_EVENT)
5927 			ret = bpf_perf_link_attach(attr, prog);
5928 		else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI ||
5929 			 attr->link_create.attach_type == BPF_TRACE_KPROBE_SESSION)
5930 			ret = bpf_kprobe_multi_link_attach(attr, prog);
5931 		else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI ||
5932 			 attr->link_create.attach_type == BPF_TRACE_UPROBE_SESSION)
5933 			ret = bpf_uprobe_multi_link_attach(attr, prog);
5934 		break;
5935 	default:
5936 		ret = -EINVAL;
5937 	}
5938 
5939 out:
5940 	if (ret < 0)
5941 		bpf_prog_put(prog);
5942 	return ret;
5943 }
5944 
5945 static int link_update_map(struct bpf_link *link, union bpf_attr *attr)
5946 {
5947 	struct bpf_map *new_map, *old_map = NULL;
5948 	int ret;
5949 
5950 	new_map = bpf_map_get(attr->link_update.new_map_fd);
5951 	if (IS_ERR(new_map))
5952 		return PTR_ERR(new_map);
5953 
5954 	if (attr->link_update.flags & BPF_F_REPLACE) {
5955 		old_map = bpf_map_get(attr->link_update.old_map_fd);
5956 		if (IS_ERR(old_map)) {
5957 			ret = PTR_ERR(old_map);
5958 			goto out_put;
5959 		}
5960 	} else if (attr->link_update.old_map_fd) {
5961 		ret = -EINVAL;
5962 		goto out_put;
5963 	}
5964 
5965 	ret = link->ops->update_map(link, new_map, old_map);
5966 
5967 	if (old_map)
5968 		bpf_map_put(old_map);
5969 out_put:
5970 	bpf_map_put(new_map);
5971 	return ret;
5972 }
5973 
5974 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
5975 
5976 static int link_update(union bpf_attr *attr)
5977 {
5978 	struct bpf_prog *old_prog = NULL, *new_prog;
5979 	struct bpf_link *link;
5980 	u32 flags;
5981 	int ret;
5982 
5983 	if (CHECK_ATTR(BPF_LINK_UPDATE))
5984 		return -EINVAL;
5985 
5986 	flags = attr->link_update.flags;
5987 	if (flags & ~BPF_F_REPLACE)
5988 		return -EINVAL;
5989 
5990 	link = bpf_link_get_from_fd(attr->link_update.link_fd);
5991 	if (IS_ERR(link))
5992 		return PTR_ERR(link);
5993 
5994 	if (link->ops->update_map) {
5995 		ret = link_update_map(link, attr);
5996 		goto out_put_link;
5997 	}
5998 
5999 	new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
6000 	if (IS_ERR(new_prog)) {
6001 		ret = PTR_ERR(new_prog);
6002 		goto out_put_link;
6003 	}
6004 
6005 	if (flags & BPF_F_REPLACE) {
6006 		old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
6007 		if (IS_ERR(old_prog)) {
6008 			ret = PTR_ERR(old_prog);
6009 			old_prog = NULL;
6010 			goto out_put_progs;
6011 		}
6012 	} else if (attr->link_update.old_prog_fd) {
6013 		ret = -EINVAL;
6014 		goto out_put_progs;
6015 	}
6016 
6017 	if (link->ops->update_prog)
6018 		ret = link->ops->update_prog(link, new_prog, old_prog);
6019 	else
6020 		ret = -EINVAL;
6021 
6022 out_put_progs:
6023 	if (old_prog)
6024 		bpf_prog_put(old_prog);
6025 	if (ret)
6026 		bpf_prog_put(new_prog);
6027 out_put_link:
6028 	bpf_link_put_direct(link);
6029 	return ret;
6030 }
6031 
6032 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
6033 
6034 static int link_detach(union bpf_attr *attr)
6035 {
6036 	struct bpf_link *link;
6037 	int ret;
6038 
6039 	if (CHECK_ATTR(BPF_LINK_DETACH))
6040 		return -EINVAL;
6041 
6042 	link = bpf_link_get_from_fd(attr->link_detach.link_fd);
6043 	if (IS_ERR(link))
6044 		return PTR_ERR(link);
6045 
6046 	if (link->ops->detach)
6047 		ret = link->ops->detach(link);
6048 	else
6049 		ret = -EOPNOTSUPP;
6050 
6051 	bpf_link_put_direct(link);
6052 	return ret;
6053 }
6054 
6055 struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
6056 {
6057 	return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
6058 }
6059 EXPORT_SYMBOL(bpf_link_inc_not_zero);
6060 
6061 struct bpf_link *bpf_link_by_id(u32 id)
6062 {
6063 	struct bpf_link *link;
6064 
6065 	if (!id)
6066 		return ERR_PTR(-ENOENT);
6067 
6068 	spin_lock_bh(&link_idr_lock);
6069 	/* before link is "settled", ID is 0, pretend it doesn't exist yet */
6070 	link = idr_find(&link_idr, id);
6071 	if (link) {
6072 		if (link->id)
6073 			link = bpf_link_inc_not_zero(link);
6074 		else
6075 			link = ERR_PTR(-EAGAIN);
6076 	} else {
6077 		link = ERR_PTR(-ENOENT);
6078 	}
6079 	spin_unlock_bh(&link_idr_lock);
6080 	return link;
6081 }
6082 
6083 struct bpf_link *bpf_link_get_curr_or_next(u32 *id)
6084 {
6085 	struct bpf_link *link;
6086 
6087 	spin_lock_bh(&link_idr_lock);
6088 again:
6089 	link = idr_get_next(&link_idr, id);
6090 	if (link) {
6091 		link = bpf_link_inc_not_zero(link);
6092 		if (IS_ERR(link)) {
6093 			(*id)++;
6094 			goto again;
6095 		}
6096 	}
6097 	spin_unlock_bh(&link_idr_lock);
6098 
6099 	return link;
6100 }
6101 
6102 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
6103 
6104 static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
6105 {
6106 	struct bpf_link *link;
6107 	u32 id = attr->link_id;
6108 	int fd;
6109 
6110 	if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
6111 		return -EINVAL;
6112 
6113 	if (!capable(CAP_SYS_ADMIN))
6114 		return -EPERM;
6115 
6116 	link = bpf_link_by_id(id);
6117 	if (IS_ERR(link))
6118 		return PTR_ERR(link);
6119 
6120 	fd = bpf_link_new_fd(link);
6121 	if (fd < 0)
6122 		bpf_link_put_direct(link);
6123 
6124 	return fd;
6125 }
6126 
6127 DEFINE_MUTEX(bpf_stats_enabled_mutex);
6128 
6129 static int bpf_stats_release(struct inode *inode, struct file *file)
6130 {
6131 	mutex_lock(&bpf_stats_enabled_mutex);
6132 	static_key_slow_dec(&bpf_stats_enabled_key.key);
6133 	mutex_unlock(&bpf_stats_enabled_mutex);
6134 	return 0;
6135 }
6136 
6137 static const struct file_operations bpf_stats_fops = {
6138 	.release = bpf_stats_release,
6139 };
6140 
6141 static int bpf_enable_runtime_stats(void)
6142 {
6143 	int fd;
6144 
6145 	mutex_lock(&bpf_stats_enabled_mutex);
6146 
6147 	/* Set a very high limit to avoid overflow */
6148 	if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
6149 		mutex_unlock(&bpf_stats_enabled_mutex);
6150 		return -EBUSY;
6151 	}
6152 
6153 	fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
6154 	if (fd >= 0)
6155 		static_key_slow_inc(&bpf_stats_enabled_key.key);
6156 
6157 	mutex_unlock(&bpf_stats_enabled_mutex);
6158 	return fd;
6159 }
6160 
6161 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
6162 
6163 static int bpf_enable_stats(union bpf_attr *attr)
6164 {
6165 
6166 	if (CHECK_ATTR(BPF_ENABLE_STATS))
6167 		return -EINVAL;
6168 
6169 	if (!capable(CAP_SYS_ADMIN))
6170 		return -EPERM;
6171 
6172 	switch (attr->enable_stats.type) {
6173 	case BPF_STATS_RUN_TIME:
6174 		return bpf_enable_runtime_stats();
6175 	default:
6176 		break;
6177 	}
6178 	return -EINVAL;
6179 }
6180 
6181 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
6182 
6183 static int bpf_iter_create(union bpf_attr *attr)
6184 {
6185 	struct bpf_link *link;
6186 	int err;
6187 
6188 	if (CHECK_ATTR(BPF_ITER_CREATE))
6189 		return -EINVAL;
6190 
6191 	if (attr->iter_create.flags)
6192 		return -EINVAL;
6193 
6194 	link = bpf_link_get_from_fd(attr->iter_create.link_fd);
6195 	if (IS_ERR(link))
6196 		return PTR_ERR(link);
6197 
6198 	err = bpf_iter_new_fd(link);
6199 	bpf_link_put_direct(link);
6200 
6201 	return err;
6202 }
6203 
6204 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
6205 
6206 static int bpf_prog_bind_map(union bpf_attr *attr)
6207 {
6208 	struct bpf_prog *prog;
6209 	struct bpf_map *map;
6210 	struct bpf_map **used_maps_old, **used_maps_new;
6211 	int i, ret = 0;
6212 
6213 	if (CHECK_ATTR(BPF_PROG_BIND_MAP))
6214 		return -EINVAL;
6215 
6216 	if (attr->prog_bind_map.flags)
6217 		return -EINVAL;
6218 
6219 	prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
6220 	if (IS_ERR(prog))
6221 		return PTR_ERR(prog);
6222 
6223 	map = bpf_map_get(attr->prog_bind_map.map_fd);
6224 	if (IS_ERR(map)) {
6225 		ret = PTR_ERR(map);
6226 		goto out_prog_put;
6227 	}
6228 
6229 	mutex_lock(&prog->aux->used_maps_mutex);
6230 
6231 	used_maps_old = prog->aux->used_maps;
6232 
6233 	for (i = 0; i < prog->aux->used_map_cnt; i++)
6234 		if (used_maps_old[i] == map) {
6235 			bpf_map_put(map);
6236 			goto out_unlock;
6237 		}
6238 
6239 	used_maps_new = kmalloc_objs(used_maps_new[0],
6240 				     prog->aux->used_map_cnt + 1);
6241 	if (!used_maps_new) {
6242 		ret = -ENOMEM;
6243 		goto out_unlock;
6244 	}
6245 
6246 	/* The bpf program will not access the bpf map, but for the sake of
6247 	 * simplicity, increase sleepable_refcnt for sleepable program as well.
6248 	 */
6249 	if (prog->sleepable)
6250 		atomic64_inc(&map->sleepable_refcnt);
6251 	memcpy(used_maps_new, used_maps_old,
6252 	       sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
6253 	used_maps_new[prog->aux->used_map_cnt] = map;
6254 
6255 	prog->aux->used_map_cnt++;
6256 	prog->aux->used_maps = used_maps_new;
6257 
6258 	kfree(used_maps_old);
6259 
6260 out_unlock:
6261 	mutex_unlock(&prog->aux->used_maps_mutex);
6262 
6263 	if (ret)
6264 		bpf_map_put(map);
6265 out_prog_put:
6266 	bpf_prog_put(prog);
6267 	return ret;
6268 }
6269 
6270 #define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
6271 
6272 static int token_create(union bpf_attr *attr)
6273 {
6274 	if (CHECK_ATTR(BPF_TOKEN_CREATE))
6275 		return -EINVAL;
6276 
6277 	/* no flags are supported yet */
6278 	if (attr->token_create.flags)
6279 		return -EINVAL;
6280 
6281 	return bpf_token_create(attr);
6282 }
6283 
6284 #define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd
6285 
6286 static int prog_stream_read(union bpf_attr *attr)
6287 {
6288 	char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf);
6289 	u32 len = attr->prog_stream_read.stream_buf_len;
6290 	struct bpf_prog *prog;
6291 	int ret;
6292 
6293 	if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD))
6294 		return -EINVAL;
6295 
6296 	prog = bpf_prog_get(attr->prog_stream_read.prog_fd);
6297 	if (IS_ERR(prog))
6298 		return PTR_ERR(prog);
6299 
6300 	ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len);
6301 	bpf_prog_put(prog);
6302 
6303 	return ret;
6304 }
6305 
6306 #define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd
6307 
6308 static int prog_assoc_struct_ops(union bpf_attr *attr)
6309 {
6310 	struct bpf_prog *prog;
6311 	struct bpf_map *map;
6312 	int ret;
6313 
6314 	if (CHECK_ATTR(BPF_PROG_ASSOC_STRUCT_OPS))
6315 		return -EINVAL;
6316 
6317 	if (attr->prog_assoc_struct_ops.flags)
6318 		return -EINVAL;
6319 
6320 	prog = bpf_prog_get(attr->prog_assoc_struct_ops.prog_fd);
6321 	if (IS_ERR(prog))
6322 		return PTR_ERR(prog);
6323 
6324 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
6325 		ret = -EINVAL;
6326 		goto put_prog;
6327 	}
6328 
6329 	map = bpf_map_get(attr->prog_assoc_struct_ops.map_fd);
6330 	if (IS_ERR(map)) {
6331 		ret = PTR_ERR(map);
6332 		goto put_prog;
6333 	}
6334 
6335 	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
6336 		ret = -EINVAL;
6337 		goto put_map;
6338 	}
6339 
6340 	ret = bpf_prog_assoc_struct_ops(prog, map);
6341 
6342 put_map:
6343 	bpf_map_put(map);
6344 put_prog:
6345 	bpf_prog_put(prog);
6346 	return ret;
6347 }
6348 
6349 static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
6350 		     bpfptr_t uattr_common, unsigned int size_common)
6351 {
6352 	struct bpf_common_attr attr_common;
6353 	u32 offsetof_log_true_size = 0;
6354 	struct bpf_log_attr attr_log;
6355 	union bpf_attr attr;
6356 	int err;
6357 
6358 	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
6359 	if (err)
6360 		return err;
6361 	size = min_t(u32, size, sizeof(attr));
6362 
6363 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
6364 	memset(&attr, 0, sizeof(attr));
6365 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
6366 		return -EFAULT;
6367 
6368 	memset(&attr_common, 0, sizeof(attr_common));
6369 	if (cmd & BPF_COMMON_ATTRS) {
6370 		err = bpf_check_uarg_tail_zero(uattr_common,
6371 					       offsetofend(struct bpf_common_attr, log_true_size),
6372 					       size_common);
6373 		if (err)
6374 			return err;
6375 
6376 		cmd &= ~BPF_COMMON_ATTRS;
6377 		size_common = min_t(u32, size_common, sizeof(attr_common));
6378 		if (copy_from_bpfptr(&attr_common, uattr_common, size_common) != 0)
6379 			return -EFAULT;
6380 	} else {
6381 		size_common = 0;
6382 	}
6383 
6384 	err = security_bpf(cmd, &attr, size, uattr.is_kernel);
6385 	if (err < 0)
6386 		return err;
6387 
6388 	switch (cmd) {
6389 	case BPF_MAP_CREATE:
6390 		err = map_create(&attr, uattr, &attr_common, uattr_common, size_common);
6391 		break;
6392 	case BPF_MAP_LOOKUP_ELEM:
6393 		err = map_lookup_elem(&attr);
6394 		break;
6395 	case BPF_MAP_UPDATE_ELEM:
6396 		err = map_update_elem(&attr, uattr);
6397 		break;
6398 	case BPF_MAP_DELETE_ELEM:
6399 		err = map_delete_elem(&attr, uattr);
6400 		break;
6401 	case BPF_MAP_GET_NEXT_KEY:
6402 		err = map_get_next_key(&attr);
6403 		break;
6404 	case BPF_MAP_FREEZE:
6405 		err = map_freeze(&attr);
6406 		break;
6407 	case BPF_PROG_LOAD:
6408 		if (size >= offsetofend(union bpf_attr, log_true_size))
6409 			offsetof_log_true_size = offsetof(union bpf_attr, log_true_size);
6410 		err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level,
6411 					offsetof_log_true_size, uattr, &attr_common, uattr_common,
6412 					size_common);
6413 		err = err ?: bpf_prog_load(&attr, uattr, &attr_log);
6414 		break;
6415 	case BPF_OBJ_PIN:
6416 		err = bpf_obj_pin(&attr);
6417 		break;
6418 	case BPF_OBJ_GET:
6419 		err = bpf_obj_get(&attr);
6420 		break;
6421 	case BPF_PROG_ATTACH:
6422 		err = bpf_prog_attach(&attr);
6423 		break;
6424 	case BPF_PROG_DETACH:
6425 		err = bpf_prog_detach(&attr);
6426 		break;
6427 	case BPF_PROG_QUERY:
6428 		err = bpf_prog_query(&attr, uattr.user, size);
6429 		break;
6430 	case BPF_PROG_TEST_RUN:
6431 		err = bpf_prog_test_run(&attr, uattr.user);
6432 		break;
6433 	case BPF_PROG_GET_NEXT_ID:
6434 		err = bpf_obj_get_next_id(&attr, uattr.user,
6435 					  &prog_idr, &prog_idr_lock);
6436 		break;
6437 	case BPF_MAP_GET_NEXT_ID:
6438 		err = bpf_obj_get_next_id(&attr, uattr.user,
6439 					  &map_idr, &map_idr_lock);
6440 		break;
6441 	case BPF_BTF_GET_NEXT_ID:
6442 		err = bpf_obj_get_next_id(&attr, uattr.user,
6443 					  &btf_idr, &btf_idr_lock);
6444 		break;
6445 	case BPF_PROG_GET_FD_BY_ID:
6446 		err = bpf_prog_get_fd_by_id(&attr);
6447 		break;
6448 	case BPF_MAP_GET_FD_BY_ID:
6449 		err = bpf_map_get_fd_by_id(&attr);
6450 		break;
6451 	case BPF_OBJ_GET_INFO_BY_FD:
6452 		err = bpf_obj_get_info_by_fd(&attr, uattr.user);
6453 		break;
6454 	case BPF_RAW_TRACEPOINT_OPEN:
6455 		err = bpf_raw_tracepoint_open(&attr);
6456 		break;
6457 	case BPF_BTF_LOAD:
6458 		if (size >= offsetofend(union bpf_attr, btf_log_true_size))
6459 			offsetof_log_true_size = offsetof(union bpf_attr, btf_log_true_size);
6460 		err = bpf_log_attr_init(&attr_log, attr.btf_log_buf, attr.btf_log_size,
6461 					attr.btf_log_level, offsetof_log_true_size, uattr,
6462 					&attr_common, uattr_common, size_common);
6463 		err = err ?: bpf_btf_load(&attr, uattr, &attr_log);
6464 		break;
6465 	case BPF_BTF_GET_FD_BY_ID:
6466 		err = bpf_btf_get_fd_by_id(&attr);
6467 		break;
6468 	case BPF_TASK_FD_QUERY:
6469 		err = bpf_task_fd_query(&attr, uattr.user);
6470 		break;
6471 	case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
6472 		err = map_lookup_and_delete_elem(&attr);
6473 		break;
6474 	case BPF_MAP_LOOKUP_BATCH:
6475 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
6476 		break;
6477 	case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
6478 		err = bpf_map_do_batch(&attr, uattr.user,
6479 				       BPF_MAP_LOOKUP_AND_DELETE_BATCH);
6480 		break;
6481 	case BPF_MAP_UPDATE_BATCH:
6482 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
6483 		break;
6484 	case BPF_MAP_DELETE_BATCH:
6485 		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
6486 		break;
6487 	case BPF_LINK_CREATE:
6488 		err = link_create(&attr, uattr);
6489 		break;
6490 	case BPF_LINK_UPDATE:
6491 		err = link_update(&attr);
6492 		break;
6493 	case BPF_LINK_GET_FD_BY_ID:
6494 		err = bpf_link_get_fd_by_id(&attr);
6495 		break;
6496 	case BPF_LINK_GET_NEXT_ID:
6497 		err = bpf_obj_get_next_id(&attr, uattr.user,
6498 					  &link_idr, &link_idr_lock);
6499 		break;
6500 	case BPF_ENABLE_STATS:
6501 		err = bpf_enable_stats(&attr);
6502 		break;
6503 	case BPF_ITER_CREATE:
6504 		err = bpf_iter_create(&attr);
6505 		break;
6506 	case BPF_LINK_DETACH:
6507 		err = link_detach(&attr);
6508 		break;
6509 	case BPF_PROG_BIND_MAP:
6510 		err = bpf_prog_bind_map(&attr);
6511 		break;
6512 	case BPF_TOKEN_CREATE:
6513 		err = token_create(&attr);
6514 		break;
6515 	case BPF_PROG_STREAM_READ_BY_FD:
6516 		err = prog_stream_read(&attr);
6517 		break;
6518 	case BPF_PROG_ASSOC_STRUCT_OPS:
6519 		err = prog_assoc_struct_ops(&attr);
6520 		break;
6521 	default:
6522 		err = -EINVAL;
6523 		break;
6524 	}
6525 
6526 	return err;
6527 }
6528 
6529 SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
6530 		struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
6531 {
6532 	return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
6533 }
6534 
6535 static bool syscall_prog_is_valid_access(int off, int size,
6536 					 enum bpf_access_type type,
6537 					 const struct bpf_prog *prog,
6538 					 struct bpf_insn_access_aux *info)
6539 {
6540 	if (off < 0 || off >= U16_MAX)
6541 		return false;
6542 	/* No alignment requirements for syscall ctx accesses. */
6543 	return true;
6544 }
6545 
6546 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
6547 {
6548 	switch (cmd) {
6549 	case BPF_MAP_CREATE:
6550 	case BPF_MAP_DELETE_ELEM:
6551 	case BPF_MAP_UPDATE_ELEM:
6552 	case BPF_MAP_FREEZE:
6553 	case BPF_MAP_GET_FD_BY_ID:
6554 	case BPF_PROG_LOAD:
6555 	case BPF_BTF_LOAD:
6556 	case BPF_LINK_CREATE:
6557 	case BPF_RAW_TRACEPOINT_OPEN:
6558 		break;
6559 	default:
6560 		return -EINVAL;
6561 	}
6562 	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
6563 }
6564 
6565 
6566 /* To shut up -Wmissing-prototypes.
6567  * This function is used by the kernel light skeleton
6568  * to load bpf programs when modules are loaded or during kernel boot.
6569  * See tools/lib/bpf/skel_internal.h
6570  */
6571 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
6572 
6573 int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
6574 {
6575 	struct bpf_prog * __maybe_unused prog;
6576 	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
6577 
6578 	switch (cmd) {
6579 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */
6580 	case BPF_PROG_TEST_RUN:
6581 		if (attr->test.data_in || attr->test.data_out ||
6582 		    attr->test.ctx_out || attr->test.duration ||
6583 		    attr->test.repeat || attr->test.flags)
6584 			return -EINVAL;
6585 
6586 		prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL);
6587 		if (IS_ERR(prog))
6588 			return PTR_ERR(prog);
6589 
6590 		if (attr->test.ctx_size_in < prog->aux->max_ctx_offset ||
6591 		    attr->test.ctx_size_in > U16_MAX) {
6592 			bpf_prog_put(prog);
6593 			return -EINVAL;
6594 		}
6595 
6596 		run_ctx.bpf_cookie = 0;
6597 		if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
6598 			/* recursion detected */
6599 			__bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
6600 			bpf_prog_put(prog);
6601 			return -EBUSY;
6602 		}
6603 		attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
6604 		__bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
6605 						&run_ctx);
6606 		bpf_prog_put(prog);
6607 		return 0;
6608 #endif
6609 	default:
6610 		return ____bpf_sys_bpf(cmd, attr, size);
6611 	}
6612 }
6613 EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL");
6614 
6615 static const struct bpf_func_proto bpf_sys_bpf_proto = {
6616 	.func		= bpf_sys_bpf,
6617 	.gpl_only	= false,
6618 	.ret_type	= RET_INTEGER,
6619 	.arg1_type	= ARG_ANYTHING,
6620 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6621 	.arg3_type	= ARG_CONST_SIZE,
6622 };
6623 
6624 const struct bpf_func_proto * __weak
6625 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6626 {
6627 	return bpf_base_func_proto(func_id, prog);
6628 }
6629 
6630 BPF_CALL_1(bpf_sys_close, u32, fd)
6631 {
6632 	/* When bpf program calls this helper there should not be
6633 	 * an fdget() without matching completed fdput().
6634 	 * This helper is allowed in the following callchain only:
6635 	 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
6636 	 */
6637 	return close_fd(fd);
6638 }
6639 
6640 static const struct bpf_func_proto bpf_sys_close_proto = {
6641 	.func		= bpf_sys_close,
6642 	.gpl_only	= false,
6643 	.ret_type	= RET_INTEGER,
6644 	.arg1_type	= ARG_ANYTHING,
6645 };
6646 
6647 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
6648 {
6649 	*res = 0;
6650 	if (flags)
6651 		return -EINVAL;
6652 
6653 	if (name_sz <= 1 || name[name_sz - 1])
6654 		return -EINVAL;
6655 
6656 	if (!bpf_dump_raw_ok(current_cred()))
6657 		return -EPERM;
6658 
6659 	*res = kallsyms_lookup_name(name);
6660 	return *res ? 0 : -ENOENT;
6661 }
6662 
6663 static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
6664 	.func		= bpf_kallsyms_lookup_name,
6665 	.gpl_only	= false,
6666 	.ret_type	= RET_INTEGER,
6667 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6668 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
6669 	.arg3_type	= ARG_ANYTHING,
6670 	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
6671 	.arg4_size	= sizeof(u64),
6672 };
6673 
6674 static const struct bpf_func_proto *
6675 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6676 {
6677 	switch (func_id) {
6678 	case BPF_FUNC_sys_bpf:
6679 		return !bpf_token_capable(prog->aux->token, CAP_PERFMON)
6680 		       ? NULL : &bpf_sys_bpf_proto;
6681 	case BPF_FUNC_btf_find_by_name_kind:
6682 		return &bpf_btf_find_by_name_kind_proto;
6683 	case BPF_FUNC_sys_close:
6684 		return &bpf_sys_close_proto;
6685 	case BPF_FUNC_kallsyms_lookup_name:
6686 		return &bpf_kallsyms_lookup_name_proto;
6687 	default:
6688 		return tracing_prog_func_proto(func_id, prog);
6689 	}
6690 }
6691 
6692 const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
6693 	.get_func_proto  = syscall_prog_func_proto,
6694 	.is_valid_access = syscall_prog_is_valid_access,
6695 };
6696 
6697 const struct bpf_prog_ops bpf_syscall_prog_ops = {
6698 	.test_run = bpf_prog_test_run_syscall,
6699 };
6700 
6701 #ifdef CONFIG_SYSCTL
6702 static int bpf_stats_handler(const struct ctl_table *table, int write,
6703 			     void *buffer, size_t *lenp, loff_t *ppos)
6704 {
6705 	struct static_key *key = (struct static_key *)table->data;
6706 	static int saved_val;
6707 	int val, ret;
6708 	struct ctl_table tmp = {
6709 		.data   = &val,
6710 		.maxlen = sizeof(val),
6711 		.mode   = table->mode,
6712 		.extra1 = SYSCTL_ZERO,
6713 		.extra2 = SYSCTL_ONE,
6714 	};
6715 
6716 	if (write && !capable(CAP_SYS_ADMIN))
6717 		return -EPERM;
6718 
6719 	mutex_lock(&bpf_stats_enabled_mutex);
6720 	val = saved_val;
6721 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
6722 	if (write && !ret && val != saved_val) {
6723 		if (val)
6724 			static_key_slow_inc(key);
6725 		else
6726 			static_key_slow_dec(key);
6727 		saved_val = val;
6728 	}
6729 	mutex_unlock(&bpf_stats_enabled_mutex);
6730 	return ret;
6731 }
6732 
6733 void __weak unpriv_ebpf_notify(int new_state)
6734 {
6735 }
6736 
6737 static int bpf_unpriv_handler(const struct ctl_table *table, int write,
6738 			      void *buffer, size_t *lenp, loff_t *ppos)
6739 {
6740 	int ret, unpriv_enable = *(int *)table->data;
6741 	bool locked_state = unpriv_enable == 1;
6742 	struct ctl_table tmp = *table;
6743 
6744 	if (write && !capable(CAP_SYS_ADMIN))
6745 		return -EPERM;
6746 
6747 	tmp.data = &unpriv_enable;
6748 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
6749 	if (write && !ret) {
6750 		if (locked_state && unpriv_enable != 1)
6751 			return -EPERM;
6752 		*(int *)table->data = unpriv_enable;
6753 	}
6754 
6755 	if (write)
6756 		unpriv_ebpf_notify(unpriv_enable);
6757 
6758 	return ret;
6759 }
6760 
6761 static const struct ctl_table bpf_syscall_table[] = {
6762 	{
6763 		.procname	= "unprivileged_bpf_disabled",
6764 		.data		= &sysctl_unprivileged_bpf_disabled,
6765 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
6766 		.mode		= 0644,
6767 		.proc_handler	= bpf_unpriv_handler,
6768 		.extra1		= SYSCTL_ZERO,
6769 		.extra2		= SYSCTL_TWO,
6770 	},
6771 	{
6772 		.procname	= "bpf_stats_enabled",
6773 		.data		= &bpf_stats_enabled_key.key,
6774 		.mode		= 0644,
6775 		.proc_handler	= bpf_stats_handler,
6776 	},
6777 };
6778 
6779 static int __init bpf_syscall_sysctl_init(void)
6780 {
6781 	register_sysctl_init("kernel", bpf_syscall_table);
6782 	return 0;
6783 }
6784 late_initcall(bpf_syscall_sysctl_init);
6785 #endif /* CONFIG_SYSCTL */
6786