xref: /linux/net/bpf/test_run.c (revision 1f8d99de1d1b4b3764203ae02db57041475dab84)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2017 Facebook
3  */
4 #include <linux/bpf.h>
5 #include <linux/btf.h>
6 #include <linux/btf_ids.h>
7 #include <linux/slab.h>
8 #include <linux/init.h>
9 #include <linux/vmalloc.h>
10 #include <linux/etherdevice.h>
11 #include <linux/filter.h>
12 #include <linux/rcupdate_trace.h>
13 #include <linux/sched/signal.h>
14 #include <net/bpf_sk_storage.h>
15 #include <net/sock.h>
16 #include <net/tcp.h>
17 #include <net/net_namespace.h>
18 #include <linux/error-injection.h>
19 #include <linux/smp.h>
20 #include <linux/sock_diag.h>
21 #include <net/xdp.h>
22 
23 #define CREATE_TRACE_POINTS
24 #include <trace/events/bpf_test_run.h>
25 
26 struct bpf_test_timer {
27 	enum { NO_PREEMPT, NO_MIGRATE } mode;
28 	u32 i;
29 	u64 time_start, time_spent;
30 };
31 
32 static void bpf_test_timer_enter(struct bpf_test_timer *t)
33 	__acquires(rcu)
34 {
35 	rcu_read_lock();
36 	if (t->mode == NO_PREEMPT)
37 		preempt_disable();
38 	else
39 		migrate_disable();
40 
41 	t->time_start = ktime_get_ns();
42 }
43 
44 static void bpf_test_timer_leave(struct bpf_test_timer *t)
45 	__releases(rcu)
46 {
47 	t->time_start = 0;
48 
49 	if (t->mode == NO_PREEMPT)
50 		preempt_enable();
51 	else
52 		migrate_enable();
53 	rcu_read_unlock();
54 }
55 
56 static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
57 	__must_hold(rcu)
58 {
59 	t->i++;
60 	if (t->i >= repeat) {
61 		/* We're done. */
62 		t->time_spent += ktime_get_ns() - t->time_start;
63 		do_div(t->time_spent, t->i);
64 		*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
65 		*err = 0;
66 		goto reset;
67 	}
68 
69 	if (signal_pending(current)) {
70 		/* During iteration: we've been cancelled, abort. */
71 		*err = -EINTR;
72 		goto reset;
73 	}
74 
75 	if (need_resched()) {
76 		/* During iteration: we need to reschedule between runs. */
77 		t->time_spent += ktime_get_ns() - t->time_start;
78 		bpf_test_timer_leave(t);
79 		cond_resched();
80 		bpf_test_timer_enter(t);
81 	}
82 
83 	/* Do another round. */
84 	return true;
85 
86 reset:
87 	t->i = 0;
88 	return false;
89 }
90 
91 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
92 			u32 *retval, u32 *time, bool xdp)
93 {
94 	struct bpf_prog_array_item item = {.prog = prog};
95 	struct bpf_run_ctx *old_ctx;
96 	struct bpf_cg_run_ctx run_ctx;
97 	struct bpf_test_timer t = { NO_MIGRATE };
98 	enum bpf_cgroup_storage_type stype;
99 	int ret;
100 
101 	for_each_cgroup_storage_type(stype) {
102 		item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
103 		if (IS_ERR(item.cgroup_storage[stype])) {
104 			item.cgroup_storage[stype] = NULL;
105 			for_each_cgroup_storage_type(stype)
106 				bpf_cgroup_storage_free(item.cgroup_storage[stype]);
107 			return -ENOMEM;
108 		}
109 	}
110 
111 	if (!repeat)
112 		repeat = 1;
113 
114 	bpf_test_timer_enter(&t);
115 	old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
116 	do {
117 		run_ctx.prog_item = &item;
118 		if (xdp)
119 			*retval = bpf_prog_run_xdp(prog, ctx);
120 		else
121 			*retval = bpf_prog_run(prog, ctx);
122 	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
123 	bpf_reset_run_ctx(old_ctx);
124 	bpf_test_timer_leave(&t);
125 
126 	for_each_cgroup_storage_type(stype)
127 		bpf_cgroup_storage_free(item.cgroup_storage[stype]);
128 
129 	return ret;
130 }
131 
132 static int bpf_test_finish(const union bpf_attr *kattr,
133 			   union bpf_attr __user *uattr, const void *data,
134 			   struct skb_shared_info *sinfo, u32 size,
135 			   u32 retval, u32 duration)
136 {
137 	void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
138 	int err = -EFAULT;
139 	u32 copy_size = size;
140 
141 	/* Clamp copy if the user has provided a size hint, but copy the full
142 	 * buffer if not to retain old behaviour.
143 	 */
144 	if (kattr->test.data_size_out &&
145 	    copy_size > kattr->test.data_size_out) {
146 		copy_size = kattr->test.data_size_out;
147 		err = -ENOSPC;
148 	}
149 
150 	if (data_out) {
151 		int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
152 
153 		if (copy_to_user(data_out, data, len))
154 			goto out;
155 
156 		if (sinfo) {
157 			int i, offset = len, data_len;
158 
159 			for (i = 0; i < sinfo->nr_frags; i++) {
160 				skb_frag_t *frag = &sinfo->frags[i];
161 
162 				if (offset >= copy_size) {
163 					err = -ENOSPC;
164 					break;
165 				}
166 
167 				data_len = min_t(int, copy_size - offset,
168 						 skb_frag_size(frag));
169 
170 				if (copy_to_user(data_out + offset,
171 						 skb_frag_address(frag),
172 						 data_len))
173 					goto out;
174 
175 				offset += data_len;
176 			}
177 		}
178 	}
179 
180 	if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
181 		goto out;
182 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
183 		goto out;
184 	if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
185 		goto out;
186 	if (err != -ENOSPC)
187 		err = 0;
188 out:
189 	trace_bpf_test_finish(&err);
190 	return err;
191 }
192 
193 /* Integer types of various sizes and pointer combinations cover variety of
194  * architecture dependent calling conventions. 7+ can be supported in the
195  * future.
196  */
197 __diag_push();
198 __diag_ignore(GCC, 8, "-Wmissing-prototypes",
199 	      "Global functions as their definitions will be in vmlinux BTF");
200 int noinline bpf_fentry_test1(int a)
201 {
202 	return a + 1;
203 }
204 EXPORT_SYMBOL_GPL(bpf_fentry_test1);
205 ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
206 
207 int noinline bpf_fentry_test2(int a, u64 b)
208 {
209 	return a + b;
210 }
211 
212 int noinline bpf_fentry_test3(char a, int b, u64 c)
213 {
214 	return a + b + c;
215 }
216 
217 int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
218 {
219 	return (long)a + b + c + d;
220 }
221 
222 int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
223 {
224 	return a + (long)b + c + d + e;
225 }
226 
227 int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
228 {
229 	return a + (long)b + c + d + (long)e + f;
230 }
231 
232 struct bpf_fentry_test_t {
233 	struct bpf_fentry_test_t *a;
234 };
235 
236 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
237 {
238 	return (long)arg;
239 }
240 
241 int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
242 {
243 	return (long)arg->a;
244 }
245 
246 int noinline bpf_modify_return_test(int a, int *b)
247 {
248 	*b += 1;
249 	return a + *b;
250 }
251 
252 u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
253 {
254 	return a + b + c + d;
255 }
256 
257 int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
258 {
259 	return a + b;
260 }
261 
262 struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
263 {
264 	return sk;
265 }
266 
267 struct prog_test_ref_kfunc {
268 	int a;
269 	int b;
270 	struct prog_test_ref_kfunc *next;
271 };
272 
273 static struct prog_test_ref_kfunc prog_test_struct = {
274 	.a = 42,
275 	.b = 108,
276 	.next = &prog_test_struct,
277 };
278 
279 noinline struct prog_test_ref_kfunc *
280 bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
281 {
282 	/* randomly return NULL */
283 	if (get_jiffies_64() % 2)
284 		return NULL;
285 	return &prog_test_struct;
286 }
287 
288 noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
289 {
290 }
291 
292 struct prog_test_pass1 {
293 	int x0;
294 	struct {
295 		int x1;
296 		struct {
297 			int x2;
298 			struct {
299 				int x3;
300 			};
301 		};
302 	};
303 };
304 
305 struct prog_test_pass2 {
306 	int len;
307 	short arr1[4];
308 	struct {
309 		char arr2[4];
310 		unsigned long arr3[8];
311 	} x;
312 };
313 
314 struct prog_test_fail1 {
315 	void *p;
316 	int x;
317 };
318 
319 struct prog_test_fail2 {
320 	int x8;
321 	struct prog_test_pass1 x;
322 };
323 
324 struct prog_test_fail3 {
325 	int len;
326 	char arr1[2];
327 	char arr2[];
328 };
329 
330 noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
331 {
332 }
333 
334 noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
335 {
336 }
337 
338 noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
339 {
340 }
341 
342 noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
343 {
344 }
345 
346 noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
347 {
348 }
349 
350 noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
351 {
352 }
353 
354 noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
355 {
356 }
357 
358 noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
359 {
360 }
361 
362 noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
363 {
364 }
365 
366 __diag_pop();
367 
368 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
369 
370 BTF_SET_START(test_sk_check_kfunc_ids)
371 BTF_ID(func, bpf_kfunc_call_test1)
372 BTF_ID(func, bpf_kfunc_call_test2)
373 BTF_ID(func, bpf_kfunc_call_test3)
374 BTF_ID(func, bpf_kfunc_call_test_acquire)
375 BTF_ID(func, bpf_kfunc_call_test_release)
376 BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
377 BTF_ID(func, bpf_kfunc_call_test_pass1)
378 BTF_ID(func, bpf_kfunc_call_test_pass2)
379 BTF_ID(func, bpf_kfunc_call_test_fail1)
380 BTF_ID(func, bpf_kfunc_call_test_fail2)
381 BTF_ID(func, bpf_kfunc_call_test_fail3)
382 BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
383 BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
384 BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
385 BTF_SET_END(test_sk_check_kfunc_ids)
386 
387 BTF_SET_START(test_sk_acquire_kfunc_ids)
388 BTF_ID(func, bpf_kfunc_call_test_acquire)
389 BTF_SET_END(test_sk_acquire_kfunc_ids)
390 
391 BTF_SET_START(test_sk_release_kfunc_ids)
392 BTF_ID(func, bpf_kfunc_call_test_release)
393 BTF_SET_END(test_sk_release_kfunc_ids)
394 
395 BTF_SET_START(test_sk_ret_null_kfunc_ids)
396 BTF_ID(func, bpf_kfunc_call_test_acquire)
397 BTF_SET_END(test_sk_ret_null_kfunc_ids)
398 
399 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
400 			   u32 size, u32 headroom, u32 tailroom)
401 {
402 	void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
403 	void *data;
404 
405 	if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
406 		return ERR_PTR(-EINVAL);
407 
408 	if (user_size > size)
409 		return ERR_PTR(-EMSGSIZE);
410 
411 	data = kzalloc(size + headroom + tailroom, GFP_USER);
412 	if (!data)
413 		return ERR_PTR(-ENOMEM);
414 
415 	if (copy_from_user(data + headroom, data_in, user_size)) {
416 		kfree(data);
417 		return ERR_PTR(-EFAULT);
418 	}
419 
420 	return data;
421 }
422 
423 int bpf_prog_test_run_tracing(struct bpf_prog *prog,
424 			      const union bpf_attr *kattr,
425 			      union bpf_attr __user *uattr)
426 {
427 	struct bpf_fentry_test_t arg = {};
428 	u16 side_effect = 0, ret = 0;
429 	int b = 2, err = -EFAULT;
430 	u32 retval = 0;
431 
432 	if (kattr->test.flags || kattr->test.cpu)
433 		return -EINVAL;
434 
435 	switch (prog->expected_attach_type) {
436 	case BPF_TRACE_FENTRY:
437 	case BPF_TRACE_FEXIT:
438 		if (bpf_fentry_test1(1) != 2 ||
439 		    bpf_fentry_test2(2, 3) != 5 ||
440 		    bpf_fentry_test3(4, 5, 6) != 15 ||
441 		    bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
442 		    bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
443 		    bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
444 		    bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
445 		    bpf_fentry_test8(&arg) != 0)
446 			goto out;
447 		break;
448 	case BPF_MODIFY_RETURN:
449 		ret = bpf_modify_return_test(1, &b);
450 		if (b != 2)
451 			side_effect = 1;
452 		break;
453 	default:
454 		goto out;
455 	}
456 
457 	retval = ((u32)side_effect << 16) | ret;
458 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
459 		goto out;
460 
461 	err = 0;
462 out:
463 	trace_bpf_test_finish(&err);
464 	return err;
465 }
466 
467 struct bpf_raw_tp_test_run_info {
468 	struct bpf_prog *prog;
469 	void *ctx;
470 	u32 retval;
471 };
472 
473 static void
474 __bpf_prog_test_run_raw_tp(void *data)
475 {
476 	struct bpf_raw_tp_test_run_info *info = data;
477 
478 	rcu_read_lock();
479 	info->retval = bpf_prog_run(info->prog, info->ctx);
480 	rcu_read_unlock();
481 }
482 
483 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
484 			     const union bpf_attr *kattr,
485 			     union bpf_attr __user *uattr)
486 {
487 	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
488 	__u32 ctx_size_in = kattr->test.ctx_size_in;
489 	struct bpf_raw_tp_test_run_info info;
490 	int cpu = kattr->test.cpu, err = 0;
491 	int current_cpu;
492 
493 	/* doesn't support data_in/out, ctx_out, duration, or repeat */
494 	if (kattr->test.data_in || kattr->test.data_out ||
495 	    kattr->test.ctx_out || kattr->test.duration ||
496 	    kattr->test.repeat)
497 		return -EINVAL;
498 
499 	if (ctx_size_in < prog->aux->max_ctx_offset ||
500 	    ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
501 		return -EINVAL;
502 
503 	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
504 		return -EINVAL;
505 
506 	if (ctx_size_in) {
507 		info.ctx = memdup_user(ctx_in, ctx_size_in);
508 		if (IS_ERR(info.ctx))
509 			return PTR_ERR(info.ctx);
510 	} else {
511 		info.ctx = NULL;
512 	}
513 
514 	info.prog = prog;
515 
516 	current_cpu = get_cpu();
517 	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
518 	    cpu == current_cpu) {
519 		__bpf_prog_test_run_raw_tp(&info);
520 	} else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
521 		/* smp_call_function_single() also checks cpu_online()
522 		 * after csd_lock(). However, since cpu is from user
523 		 * space, let's do an extra quick check to filter out
524 		 * invalid value before smp_call_function_single().
525 		 */
526 		err = -ENXIO;
527 	} else {
528 		err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
529 					       &info, 1);
530 	}
531 	put_cpu();
532 
533 	if (!err &&
534 	    copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
535 		err = -EFAULT;
536 
537 	kfree(info.ctx);
538 	return err;
539 }
540 
541 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
542 {
543 	void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
544 	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
545 	u32 size = kattr->test.ctx_size_in;
546 	void *data;
547 	int err;
548 
549 	if (!data_in && !data_out)
550 		return NULL;
551 
552 	data = kzalloc(max_size, GFP_USER);
553 	if (!data)
554 		return ERR_PTR(-ENOMEM);
555 
556 	if (data_in) {
557 		err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
558 		if (err) {
559 			kfree(data);
560 			return ERR_PTR(err);
561 		}
562 
563 		size = min_t(u32, max_size, size);
564 		if (copy_from_user(data, data_in, size)) {
565 			kfree(data);
566 			return ERR_PTR(-EFAULT);
567 		}
568 	}
569 	return data;
570 }
571 
572 static int bpf_ctx_finish(const union bpf_attr *kattr,
573 			  union bpf_attr __user *uattr, const void *data,
574 			  u32 size)
575 {
576 	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
577 	int err = -EFAULT;
578 	u32 copy_size = size;
579 
580 	if (!data || !data_out)
581 		return 0;
582 
583 	if (copy_size > kattr->test.ctx_size_out) {
584 		copy_size = kattr->test.ctx_size_out;
585 		err = -ENOSPC;
586 	}
587 
588 	if (copy_to_user(data_out, data, copy_size))
589 		goto out;
590 	if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
591 		goto out;
592 	if (err != -ENOSPC)
593 		err = 0;
594 out:
595 	return err;
596 }
597 
598 /**
599  * range_is_zero - test whether buffer is initialized
600  * @buf: buffer to check
601  * @from: check from this position
602  * @to: check up until (excluding) this position
603  *
604  * This function returns true if the there is a non-zero byte
605  * in the buf in the range [from,to).
606  */
607 static inline bool range_is_zero(void *buf, size_t from, size_t to)
608 {
609 	return !memchr_inv((u8 *)buf + from, 0, to - from);
610 }
611 
612 static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
613 {
614 	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
615 
616 	if (!__skb)
617 		return 0;
618 
619 	/* make sure the fields we don't use are zeroed */
620 	if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
621 		return -EINVAL;
622 
623 	/* mark is allowed */
624 
625 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
626 			   offsetof(struct __sk_buff, priority)))
627 		return -EINVAL;
628 
629 	/* priority is allowed */
630 	/* ingress_ifindex is allowed */
631 	/* ifindex is allowed */
632 
633 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
634 			   offsetof(struct __sk_buff, cb)))
635 		return -EINVAL;
636 
637 	/* cb is allowed */
638 
639 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
640 			   offsetof(struct __sk_buff, tstamp)))
641 		return -EINVAL;
642 
643 	/* tstamp is allowed */
644 	/* wire_len is allowed */
645 	/* gso_segs is allowed */
646 
647 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
648 			   offsetof(struct __sk_buff, gso_size)))
649 		return -EINVAL;
650 
651 	/* gso_size is allowed */
652 
653 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
654 			   offsetof(struct __sk_buff, hwtstamp)))
655 		return -EINVAL;
656 
657 	/* hwtstamp is allowed */
658 
659 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp),
660 			   sizeof(struct __sk_buff)))
661 		return -EINVAL;
662 
663 	skb->mark = __skb->mark;
664 	skb->priority = __skb->priority;
665 	skb->skb_iif = __skb->ingress_ifindex;
666 	skb->tstamp = __skb->tstamp;
667 	memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
668 
669 	if (__skb->wire_len == 0) {
670 		cb->pkt_len = skb->len;
671 	} else {
672 		if (__skb->wire_len < skb->len ||
673 		    __skb->wire_len > GSO_MAX_SIZE)
674 			return -EINVAL;
675 		cb->pkt_len = __skb->wire_len;
676 	}
677 
678 	if (__skb->gso_segs > GSO_MAX_SEGS)
679 		return -EINVAL;
680 	skb_shinfo(skb)->gso_segs = __skb->gso_segs;
681 	skb_shinfo(skb)->gso_size = __skb->gso_size;
682 	skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
683 
684 	return 0;
685 }
686 
687 static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
688 {
689 	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
690 
691 	if (!__skb)
692 		return;
693 
694 	__skb->mark = skb->mark;
695 	__skb->priority = skb->priority;
696 	__skb->ingress_ifindex = skb->skb_iif;
697 	__skb->ifindex = skb->dev->ifindex;
698 	__skb->tstamp = skb->tstamp;
699 	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
700 	__skb->wire_len = cb->pkt_len;
701 	__skb->gso_segs = skb_shinfo(skb)->gso_segs;
702 	__skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp;
703 }
704 
705 static struct proto bpf_dummy_proto = {
706 	.name   = "bpf_dummy",
707 	.owner  = THIS_MODULE,
708 	.obj_size = sizeof(struct sock),
709 };
710 
711 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
712 			  union bpf_attr __user *uattr)
713 {
714 	bool is_l2 = false, is_direct_pkt_access = false;
715 	struct net *net = current->nsproxy->net_ns;
716 	struct net_device *dev = net->loopback_dev;
717 	u32 size = kattr->test.data_size_in;
718 	u32 repeat = kattr->test.repeat;
719 	struct __sk_buff *ctx = NULL;
720 	u32 retval, duration;
721 	int hh_len = ETH_HLEN;
722 	struct sk_buff *skb;
723 	struct sock *sk;
724 	void *data;
725 	int ret;
726 
727 	if (kattr->test.flags || kattr->test.cpu)
728 		return -EINVAL;
729 
730 	data = bpf_test_init(kattr, kattr->test.data_size_in,
731 			     size, NET_SKB_PAD + NET_IP_ALIGN,
732 			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
733 	if (IS_ERR(data))
734 		return PTR_ERR(data);
735 
736 	ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
737 	if (IS_ERR(ctx)) {
738 		kfree(data);
739 		return PTR_ERR(ctx);
740 	}
741 
742 	switch (prog->type) {
743 	case BPF_PROG_TYPE_SCHED_CLS:
744 	case BPF_PROG_TYPE_SCHED_ACT:
745 		is_l2 = true;
746 		fallthrough;
747 	case BPF_PROG_TYPE_LWT_IN:
748 	case BPF_PROG_TYPE_LWT_OUT:
749 	case BPF_PROG_TYPE_LWT_XMIT:
750 		is_direct_pkt_access = true;
751 		break;
752 	default:
753 		break;
754 	}
755 
756 	sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
757 	if (!sk) {
758 		kfree(data);
759 		kfree(ctx);
760 		return -ENOMEM;
761 	}
762 	sock_init_data(NULL, sk);
763 
764 	skb = build_skb(data, 0);
765 	if (!skb) {
766 		kfree(data);
767 		kfree(ctx);
768 		sk_free(sk);
769 		return -ENOMEM;
770 	}
771 	skb->sk = sk;
772 
773 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
774 	__skb_put(skb, size);
775 	if (ctx && ctx->ifindex > 1) {
776 		dev = dev_get_by_index(net, ctx->ifindex);
777 		if (!dev) {
778 			ret = -ENODEV;
779 			goto out;
780 		}
781 	}
782 	skb->protocol = eth_type_trans(skb, dev);
783 	skb_reset_network_header(skb);
784 
785 	switch (skb->protocol) {
786 	case htons(ETH_P_IP):
787 		sk->sk_family = AF_INET;
788 		if (sizeof(struct iphdr) <= skb_headlen(skb)) {
789 			sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
790 			sk->sk_daddr = ip_hdr(skb)->daddr;
791 		}
792 		break;
793 #if IS_ENABLED(CONFIG_IPV6)
794 	case htons(ETH_P_IPV6):
795 		sk->sk_family = AF_INET6;
796 		if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
797 			sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
798 			sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
799 		}
800 		break;
801 #endif
802 	default:
803 		break;
804 	}
805 
806 	if (is_l2)
807 		__skb_push(skb, hh_len);
808 	if (is_direct_pkt_access)
809 		bpf_compute_data_pointers(skb);
810 	ret = convert___skb_to_skb(skb, ctx);
811 	if (ret)
812 		goto out;
813 	ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
814 	if (ret)
815 		goto out;
816 	if (!is_l2) {
817 		if (skb_headroom(skb) < hh_len) {
818 			int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
819 
820 			if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
821 				ret = -ENOMEM;
822 				goto out;
823 			}
824 		}
825 		memset(__skb_push(skb, hh_len), 0, hh_len);
826 	}
827 	convert_skb_to___skb(skb, ctx);
828 
829 	size = skb->len;
830 	/* bpf program can never convert linear skb to non-linear */
831 	if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
832 		size = skb_headlen(skb);
833 	ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
834 			      duration);
835 	if (!ret)
836 		ret = bpf_ctx_finish(kattr, uattr, ctx,
837 				     sizeof(struct __sk_buff));
838 out:
839 	if (dev && dev != net->loopback_dev)
840 		dev_put(dev);
841 	kfree_skb(skb);
842 	sk_free(sk);
843 	kfree(ctx);
844 	return ret;
845 }
846 
847 static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
848 {
849 	unsigned int ingress_ifindex, rx_queue_index;
850 	struct netdev_rx_queue *rxqueue;
851 	struct net_device *device;
852 
853 	if (!xdp_md)
854 		return 0;
855 
856 	if (xdp_md->egress_ifindex != 0)
857 		return -EINVAL;
858 
859 	ingress_ifindex = xdp_md->ingress_ifindex;
860 	rx_queue_index = xdp_md->rx_queue_index;
861 
862 	if (!ingress_ifindex && rx_queue_index)
863 		return -EINVAL;
864 
865 	if (ingress_ifindex) {
866 		device = dev_get_by_index(current->nsproxy->net_ns,
867 					  ingress_ifindex);
868 		if (!device)
869 			return -ENODEV;
870 
871 		if (rx_queue_index >= device->real_num_rx_queues)
872 			goto free_dev;
873 
874 		rxqueue = __netif_get_rx_queue(device, rx_queue_index);
875 
876 		if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
877 			goto free_dev;
878 
879 		xdp->rxq = &rxqueue->xdp_rxq;
880 		/* The device is now tracked in the xdp->rxq for later
881 		 * dev_put()
882 		 */
883 	}
884 
885 	xdp->data = xdp->data_meta + xdp_md->data;
886 	return 0;
887 
888 free_dev:
889 	dev_put(device);
890 	return -EINVAL;
891 }
892 
893 static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
894 {
895 	if (!xdp_md)
896 		return;
897 
898 	xdp_md->data = xdp->data - xdp->data_meta;
899 	xdp_md->data_end = xdp->data_end - xdp->data_meta;
900 
901 	if (xdp_md->ingress_ifindex)
902 		dev_put(xdp->rxq->dev);
903 }
904 
905 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
906 			  union bpf_attr __user *uattr)
907 {
908 	u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
909 	u32 size = kattr->test.data_size_in;
910 	u32 headroom = XDP_PACKET_HEADROOM;
911 	u32 retval, duration, max_data_sz;
912 	u32 repeat = kattr->test.repeat;
913 	struct netdev_rx_queue *rxqueue;
914 	struct skb_shared_info *sinfo;
915 	struct xdp_buff xdp = {};
916 	int i, ret = -EINVAL;
917 	struct xdp_md *ctx;
918 	void *data;
919 
920 	if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
921 	    prog->expected_attach_type == BPF_XDP_CPUMAP)
922 		return -EINVAL;
923 
924 	ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
925 	if (IS_ERR(ctx))
926 		return PTR_ERR(ctx);
927 
928 	if (ctx) {
929 		/* There can't be user provided data before the meta data */
930 		if (ctx->data_meta || ctx->data_end != size ||
931 		    ctx->data > ctx->data_end ||
932 		    unlikely(xdp_metalen_invalid(ctx->data)))
933 			goto free_ctx;
934 		/* Meta data is allocated from the headroom */
935 		headroom -= ctx->data;
936 	}
937 
938 	max_data_sz = 4096 - headroom - tailroom;
939 	size = min_t(u32, size, max_data_sz);
940 
941 	data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
942 	if (IS_ERR(data)) {
943 		ret = PTR_ERR(data);
944 		goto free_ctx;
945 	}
946 
947 	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
948 	rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
949 	xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
950 	xdp_prepare_buff(&xdp, data, headroom, size, true);
951 	sinfo = xdp_get_shared_info_from_buff(&xdp);
952 
953 	ret = xdp_convert_md_to_buff(ctx, &xdp);
954 	if (ret)
955 		goto free_data;
956 
957 	if (unlikely(kattr->test.data_size_in > size)) {
958 		void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
959 
960 		while (size < kattr->test.data_size_in) {
961 			struct page *page;
962 			skb_frag_t *frag;
963 			int data_len;
964 
965 			page = alloc_page(GFP_KERNEL);
966 			if (!page) {
967 				ret = -ENOMEM;
968 				goto out;
969 			}
970 
971 			frag = &sinfo->frags[sinfo->nr_frags++];
972 			__skb_frag_set_page(frag, page);
973 
974 			data_len = min_t(int, kattr->test.data_size_in - size,
975 					 PAGE_SIZE);
976 			skb_frag_size_set(frag, data_len);
977 
978 			if (copy_from_user(page_address(page), data_in + size,
979 					   data_len)) {
980 				ret = -EFAULT;
981 				goto out;
982 			}
983 			sinfo->xdp_frags_size += data_len;
984 			size += data_len;
985 		}
986 		xdp_buff_set_frags_flag(&xdp);
987 	}
988 
989 	if (repeat > 1)
990 		bpf_prog_change_xdp(NULL, prog);
991 
992 	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
993 	/* We convert the xdp_buff back to an xdp_md before checking the return
994 	 * code so the reference count of any held netdevice will be decremented
995 	 * even if the test run failed.
996 	 */
997 	xdp_convert_buff_to_md(&xdp, ctx);
998 	if (ret)
999 		goto out;
1000 
1001 	size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
1002 	ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
1003 			      retval, duration);
1004 	if (!ret)
1005 		ret = bpf_ctx_finish(kattr, uattr, ctx,
1006 				     sizeof(struct xdp_md));
1007 
1008 out:
1009 	if (repeat > 1)
1010 		bpf_prog_change_xdp(prog, NULL);
1011 free_data:
1012 	for (i = 0; i < sinfo->nr_frags; i++)
1013 		__free_page(skb_frag_page(&sinfo->frags[i]));
1014 	kfree(data);
1015 free_ctx:
1016 	kfree(ctx);
1017 	return ret;
1018 }
1019 
1020 static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
1021 {
1022 	/* make sure the fields we don't use are zeroed */
1023 	if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
1024 		return -EINVAL;
1025 
1026 	/* flags is allowed */
1027 
1028 	if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
1029 			   sizeof(struct bpf_flow_keys)))
1030 		return -EINVAL;
1031 
1032 	return 0;
1033 }
1034 
1035 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
1036 				     const union bpf_attr *kattr,
1037 				     union bpf_attr __user *uattr)
1038 {
1039 	struct bpf_test_timer t = { NO_PREEMPT };
1040 	u32 size = kattr->test.data_size_in;
1041 	struct bpf_flow_dissector ctx = {};
1042 	u32 repeat = kattr->test.repeat;
1043 	struct bpf_flow_keys *user_ctx;
1044 	struct bpf_flow_keys flow_keys;
1045 	const struct ethhdr *eth;
1046 	unsigned int flags = 0;
1047 	u32 retval, duration;
1048 	void *data;
1049 	int ret;
1050 
1051 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
1052 		return -EINVAL;
1053 
1054 	if (kattr->test.flags || kattr->test.cpu)
1055 		return -EINVAL;
1056 
1057 	if (size < ETH_HLEN)
1058 		return -EINVAL;
1059 
1060 	data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
1061 	if (IS_ERR(data))
1062 		return PTR_ERR(data);
1063 
1064 	eth = (struct ethhdr *)data;
1065 
1066 	if (!repeat)
1067 		repeat = 1;
1068 
1069 	user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
1070 	if (IS_ERR(user_ctx)) {
1071 		kfree(data);
1072 		return PTR_ERR(user_ctx);
1073 	}
1074 	if (user_ctx) {
1075 		ret = verify_user_bpf_flow_keys(user_ctx);
1076 		if (ret)
1077 			goto out;
1078 		flags = user_ctx->flags;
1079 	}
1080 
1081 	ctx.flow_keys = &flow_keys;
1082 	ctx.data = data;
1083 	ctx.data_end = (__u8 *)data + size;
1084 
1085 	bpf_test_timer_enter(&t);
1086 	do {
1087 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
1088 					  size, flags);
1089 	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
1090 	bpf_test_timer_leave(&t);
1091 
1092 	if (ret < 0)
1093 		goto out;
1094 
1095 	ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
1096 			      sizeof(flow_keys), retval, duration);
1097 	if (!ret)
1098 		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
1099 				     sizeof(struct bpf_flow_keys));
1100 
1101 out:
1102 	kfree(user_ctx);
1103 	kfree(data);
1104 	return ret;
1105 }
1106 
1107 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
1108 				union bpf_attr __user *uattr)
1109 {
1110 	struct bpf_test_timer t = { NO_PREEMPT };
1111 	struct bpf_prog_array *progs = NULL;
1112 	struct bpf_sk_lookup_kern ctx = {};
1113 	u32 repeat = kattr->test.repeat;
1114 	struct bpf_sk_lookup *user_ctx;
1115 	u32 retval, duration;
1116 	int ret = -EINVAL;
1117 
1118 	if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
1119 		return -EINVAL;
1120 
1121 	if (kattr->test.flags || kattr->test.cpu)
1122 		return -EINVAL;
1123 
1124 	if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
1125 	    kattr->test.data_size_out)
1126 		return -EINVAL;
1127 
1128 	if (!repeat)
1129 		repeat = 1;
1130 
1131 	user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
1132 	if (IS_ERR(user_ctx))
1133 		return PTR_ERR(user_ctx);
1134 
1135 	if (!user_ctx)
1136 		return -EINVAL;
1137 
1138 	if (user_ctx->sk)
1139 		goto out;
1140 
1141 	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
1142 		goto out;
1143 
1144 	if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
1145 		ret = -ERANGE;
1146 		goto out;
1147 	}
1148 
1149 	ctx.family = (u16)user_ctx->family;
1150 	ctx.protocol = (u16)user_ctx->protocol;
1151 	ctx.dport = (u16)user_ctx->local_port;
1152 	ctx.sport = (__force __be16)user_ctx->remote_port;
1153 
1154 	switch (ctx.family) {
1155 	case AF_INET:
1156 		ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
1157 		ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
1158 		break;
1159 
1160 #if IS_ENABLED(CONFIG_IPV6)
1161 	case AF_INET6:
1162 		ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
1163 		ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
1164 		break;
1165 #endif
1166 
1167 	default:
1168 		ret = -EAFNOSUPPORT;
1169 		goto out;
1170 	}
1171 
1172 	progs = bpf_prog_array_alloc(1, GFP_KERNEL);
1173 	if (!progs) {
1174 		ret = -ENOMEM;
1175 		goto out;
1176 	}
1177 
1178 	progs->items[0].prog = prog;
1179 
1180 	bpf_test_timer_enter(&t);
1181 	do {
1182 		ctx.selected_sk = NULL;
1183 		retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
1184 	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
1185 	bpf_test_timer_leave(&t);
1186 
1187 	if (ret < 0)
1188 		goto out;
1189 
1190 	user_ctx->cookie = 0;
1191 	if (ctx.selected_sk) {
1192 		if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
1193 			ret = -EOPNOTSUPP;
1194 			goto out;
1195 		}
1196 
1197 		user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
1198 	}
1199 
1200 	ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
1201 	if (!ret)
1202 		ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
1203 
1204 out:
1205 	bpf_prog_array_free(progs);
1206 	kfree(user_ctx);
1207 	return ret;
1208 }
1209 
1210 int bpf_prog_test_run_syscall(struct bpf_prog *prog,
1211 			      const union bpf_attr *kattr,
1212 			      union bpf_attr __user *uattr)
1213 {
1214 	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
1215 	__u32 ctx_size_in = kattr->test.ctx_size_in;
1216 	void *ctx = NULL;
1217 	u32 retval;
1218 	int err = 0;
1219 
1220 	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
1221 	if (kattr->test.data_in || kattr->test.data_out ||
1222 	    kattr->test.ctx_out || kattr->test.duration ||
1223 	    kattr->test.repeat || kattr->test.flags)
1224 		return -EINVAL;
1225 
1226 	if (ctx_size_in < prog->aux->max_ctx_offset ||
1227 	    ctx_size_in > U16_MAX)
1228 		return -EINVAL;
1229 
1230 	if (ctx_size_in) {
1231 		ctx = memdup_user(ctx_in, ctx_size_in);
1232 		if (IS_ERR(ctx))
1233 			return PTR_ERR(ctx);
1234 	}
1235 
1236 	rcu_read_lock_trace();
1237 	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
1238 	rcu_read_unlock_trace();
1239 
1240 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
1241 		err = -EFAULT;
1242 		goto out;
1243 	}
1244 	if (ctx_size_in)
1245 		if (copy_to_user(ctx_in, ctx, ctx_size_in))
1246 			err = -EFAULT;
1247 out:
1248 	kfree(ctx);
1249 	return err;
1250 }
1251 
1252 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
1253 	.owner        = THIS_MODULE,
1254 	.check_set    = &test_sk_check_kfunc_ids,
1255 	.acquire_set  = &test_sk_acquire_kfunc_ids,
1256 	.release_set  = &test_sk_release_kfunc_ids,
1257 	.ret_null_set = &test_sk_ret_null_kfunc_ids,
1258 };
1259 
1260 static int __init bpf_prog_test_run_init(void)
1261 {
1262 	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
1263 }
1264 late_initcall(bpf_prog_test_run_init);
1265