xref: /linux/kernel/bpf/net_namespace.c (revision 6015fb905d89063231ed33bc15be19ef0fc339b8)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/bpf.h>
4 #include <linux/bpf-netns.h>
5 #include <linux/filter.h>
6 #include <net/net_namespace.h>
7 
8 /*
9  * Functions to manage BPF programs attached to netns
10  */
11 
12 struct bpf_netns_link {
13 	struct bpf_link	link;
14 	enum bpf_attach_type type;
15 	enum netns_bpf_attach_type netns_type;
16 
17 	/* We don't hold a ref to net in order to auto-detach the link
18 	 * when netns is going away. Instead we rely on pernet
19 	 * pre_exit callback to clear this pointer. Must be accessed
20 	 * with netns_bpf_mutex held.
21 	 */
22 	struct net *net;
23 	struct list_head node; /* node in list of links attached to net */
24 };
25 
26 /* Protects updates to netns_bpf */
27 DEFINE_MUTEX(netns_bpf_mutex);
28 
29 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
30 {
31 	switch (type) {
32 #ifdef CONFIG_INET
33 	case NETNS_BPF_SK_LOOKUP:
34 		static_branch_dec(&bpf_sk_lookup_enabled);
35 		break;
36 #endif
37 	default:
38 		break;
39 	}
40 }
41 
42 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
43 {
44 	switch (type) {
45 #ifdef CONFIG_INET
46 	case NETNS_BPF_SK_LOOKUP:
47 		static_branch_inc(&bpf_sk_lookup_enabled);
48 		break;
49 #endif
50 	default:
51 		break;
52 	}
53 }
54 
55 /* Must be called with netns_bpf_mutex held. */
56 static void netns_bpf_run_array_detach(struct net *net,
57 				       enum netns_bpf_attach_type type)
58 {
59 	struct bpf_prog_array *run_array;
60 
61 	run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
62 					lockdep_is_held(&netns_bpf_mutex));
63 	bpf_prog_array_free(run_array);
64 }
65 
66 static int link_index(struct net *net, enum netns_bpf_attach_type type,
67 		      struct bpf_netns_link *link)
68 {
69 	struct bpf_netns_link *pos;
70 	int i = 0;
71 
72 	list_for_each_entry(pos, &net->bpf.links[type], node) {
73 		if (pos == link)
74 			return i;
75 		i++;
76 	}
77 	return -ENOENT;
78 }
79 
80 static int link_count(struct net *net, enum netns_bpf_attach_type type)
81 {
82 	struct list_head *pos;
83 	int i = 0;
84 
85 	list_for_each(pos, &net->bpf.links[type])
86 		i++;
87 	return i;
88 }
89 
90 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
91 			    struct bpf_prog_array *prog_array)
92 {
93 	struct bpf_netns_link *pos;
94 	unsigned int i = 0;
95 
96 	list_for_each_entry(pos, &net->bpf.links[type], node) {
97 		prog_array->items[i].prog = pos->link.prog;
98 		i++;
99 	}
100 }
101 
102 static void bpf_netns_link_release(struct bpf_link *link)
103 {
104 	struct bpf_netns_link *net_link =
105 		container_of(link, struct bpf_netns_link, link);
106 	enum netns_bpf_attach_type type = net_link->netns_type;
107 	struct bpf_prog_array *old_array, *new_array;
108 	struct net *net;
109 	int cnt, idx;
110 
111 	mutex_lock(&netns_bpf_mutex);
112 
113 	/* We can race with cleanup_net, but if we see a non-NULL
114 	 * struct net pointer, pre_exit has not run yet and wait for
115 	 * netns_bpf_mutex.
116 	 */
117 	net = net_link->net;
118 	if (!net)
119 		goto out_unlock;
120 
121 	/* Mark attach point as unused */
122 	netns_bpf_attach_type_unneed(type);
123 
124 	/* Remember link position in case of safe delete */
125 	idx = link_index(net, type, net_link);
126 	list_del(&net_link->node);
127 
128 	cnt = link_count(net, type);
129 	if (!cnt) {
130 		netns_bpf_run_array_detach(net, type);
131 		goto out_unlock;
132 	}
133 
134 	old_array = rcu_dereference_protected(net->bpf.run_array[type],
135 					      lockdep_is_held(&netns_bpf_mutex));
136 	new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
137 	if (!new_array) {
138 		WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
139 		goto out_unlock;
140 	}
141 	fill_prog_array(net, type, new_array);
142 	rcu_assign_pointer(net->bpf.run_array[type], new_array);
143 	bpf_prog_array_free(old_array);
144 
145 out_unlock:
146 	net_link->net = NULL;
147 	mutex_unlock(&netns_bpf_mutex);
148 }
149 
150 static int bpf_netns_link_detach(struct bpf_link *link)
151 {
152 	bpf_netns_link_release(link);
153 	return 0;
154 }
155 
156 static void bpf_netns_link_dealloc(struct bpf_link *link)
157 {
158 	struct bpf_netns_link *net_link =
159 		container_of(link, struct bpf_netns_link, link);
160 
161 	kfree(net_link);
162 }
163 
164 static int bpf_netns_link_update_prog(struct bpf_link *link,
165 				      struct bpf_prog *new_prog,
166 				      struct bpf_prog *old_prog)
167 {
168 	struct bpf_netns_link *net_link =
169 		container_of(link, struct bpf_netns_link, link);
170 	enum netns_bpf_attach_type type = net_link->netns_type;
171 	struct bpf_prog_array *run_array;
172 	struct net *net;
173 	int idx, ret;
174 
175 	if (old_prog && old_prog != link->prog)
176 		return -EPERM;
177 	if (new_prog->type != link->prog->type)
178 		return -EINVAL;
179 
180 	mutex_lock(&netns_bpf_mutex);
181 
182 	net = net_link->net;
183 	if (!net || !check_net(net)) {
184 		/* Link auto-detached or netns dying */
185 		ret = -ENOLINK;
186 		goto out_unlock;
187 	}
188 
189 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
190 					      lockdep_is_held(&netns_bpf_mutex));
191 	idx = link_index(net, type, net_link);
192 	ret = bpf_prog_array_update_at(run_array, idx, new_prog);
193 	if (ret)
194 		goto out_unlock;
195 
196 	old_prog = xchg(&link->prog, new_prog);
197 	bpf_prog_put(old_prog);
198 
199 out_unlock:
200 	mutex_unlock(&netns_bpf_mutex);
201 	return ret;
202 }
203 
204 static int bpf_netns_link_fill_info(const struct bpf_link *link,
205 				    struct bpf_link_info *info)
206 {
207 	const struct bpf_netns_link *net_link =
208 		container_of(link, struct bpf_netns_link, link);
209 	unsigned int inum = 0;
210 	struct net *net;
211 
212 	mutex_lock(&netns_bpf_mutex);
213 	net = net_link->net;
214 	if (net && check_net(net))
215 		inum = net->ns.inum;
216 	mutex_unlock(&netns_bpf_mutex);
217 
218 	info->netns.netns_ino = inum;
219 	info->netns.attach_type = net_link->type;
220 	return 0;
221 }
222 
223 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
224 				       struct seq_file *seq)
225 {
226 	struct bpf_link_info info = {};
227 
228 	bpf_netns_link_fill_info(link, &info);
229 	seq_printf(seq,
230 		   "netns_ino:\t%u\n"
231 		   "attach_type:\t%u\n",
232 		   info.netns.netns_ino,
233 		   info.netns.attach_type);
234 }
235 
236 static const struct bpf_link_ops bpf_netns_link_ops = {
237 	.release = bpf_netns_link_release,
238 	.dealloc = bpf_netns_link_dealloc,
239 	.detach = bpf_netns_link_detach,
240 	.update_prog = bpf_netns_link_update_prog,
241 	.fill_link_info = bpf_netns_link_fill_info,
242 	.show_fdinfo = bpf_netns_link_show_fdinfo,
243 };
244 
245 /* Must be called with netns_bpf_mutex held. */
246 static int __netns_bpf_prog_query(const union bpf_attr *attr,
247 				  union bpf_attr __user *uattr,
248 				  struct net *net,
249 				  enum netns_bpf_attach_type type)
250 {
251 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
252 	struct bpf_prog_array *run_array;
253 	u32 prog_cnt = 0, flags = 0;
254 
255 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
256 					      lockdep_is_held(&netns_bpf_mutex));
257 	if (run_array)
258 		prog_cnt = bpf_prog_array_length(run_array);
259 
260 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
261 		return -EFAULT;
262 	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
263 		return -EFAULT;
264 	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
265 		return 0;
266 
267 	return bpf_prog_array_copy_to_user(run_array, prog_ids,
268 					   attr->query.prog_cnt);
269 }
270 
271 int netns_bpf_prog_query(const union bpf_attr *attr,
272 			 union bpf_attr __user *uattr)
273 {
274 	enum netns_bpf_attach_type type;
275 	struct net *net;
276 	int ret;
277 
278 	if (attr->query.query_flags)
279 		return -EINVAL;
280 
281 	type = to_netns_bpf_attach_type(attr->query.attach_type);
282 	if (type < 0)
283 		return -EINVAL;
284 
285 	net = get_net_ns_by_fd(attr->query.target_fd);
286 	if (IS_ERR(net))
287 		return PTR_ERR(net);
288 
289 	mutex_lock(&netns_bpf_mutex);
290 	ret = __netns_bpf_prog_query(attr, uattr, net, type);
291 	mutex_unlock(&netns_bpf_mutex);
292 
293 	put_net(net);
294 	return ret;
295 }
296 
297 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
298 {
299 	struct bpf_prog_array *run_array;
300 	enum netns_bpf_attach_type type;
301 	struct bpf_prog *attached;
302 	struct net *net;
303 	int ret;
304 
305 	if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
306 		return -EINVAL;
307 
308 	type = to_netns_bpf_attach_type(attr->attach_type);
309 	if (type < 0)
310 		return -EINVAL;
311 
312 	net = current->nsproxy->net_ns;
313 	mutex_lock(&netns_bpf_mutex);
314 
315 	/* Attaching prog directly is not compatible with links */
316 	if (!list_empty(&net->bpf.links[type])) {
317 		ret = -EEXIST;
318 		goto out_unlock;
319 	}
320 
321 	switch (type) {
322 	case NETNS_BPF_FLOW_DISSECTOR:
323 		ret = flow_dissector_bpf_prog_attach_check(net, prog);
324 		break;
325 	default:
326 		ret = -EINVAL;
327 		break;
328 	}
329 	if (ret)
330 		goto out_unlock;
331 
332 	attached = net->bpf.progs[type];
333 	if (attached == prog) {
334 		/* The same program cannot be attached twice */
335 		ret = -EINVAL;
336 		goto out_unlock;
337 	}
338 
339 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
340 					      lockdep_is_held(&netns_bpf_mutex));
341 	if (run_array) {
342 		WRITE_ONCE(run_array->items[0].prog, prog);
343 	} else {
344 		run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
345 		if (!run_array) {
346 			ret = -ENOMEM;
347 			goto out_unlock;
348 		}
349 		run_array->items[0].prog = prog;
350 		rcu_assign_pointer(net->bpf.run_array[type], run_array);
351 	}
352 
353 	net->bpf.progs[type] = prog;
354 	if (attached)
355 		bpf_prog_put(attached);
356 
357 out_unlock:
358 	mutex_unlock(&netns_bpf_mutex);
359 
360 	return ret;
361 }
362 
363 /* Must be called with netns_bpf_mutex held. */
364 static int __netns_bpf_prog_detach(struct net *net,
365 				   enum netns_bpf_attach_type type,
366 				   struct bpf_prog *old)
367 {
368 	struct bpf_prog *attached;
369 
370 	/* Progs attached via links cannot be detached */
371 	if (!list_empty(&net->bpf.links[type]))
372 		return -EINVAL;
373 
374 	attached = net->bpf.progs[type];
375 	if (!attached || attached != old)
376 		return -ENOENT;
377 	netns_bpf_run_array_detach(net, type);
378 	net->bpf.progs[type] = NULL;
379 	bpf_prog_put(attached);
380 	return 0;
381 }
382 
383 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
384 {
385 	enum netns_bpf_attach_type type;
386 	struct bpf_prog *prog;
387 	int ret;
388 
389 	if (attr->target_fd)
390 		return -EINVAL;
391 
392 	type = to_netns_bpf_attach_type(attr->attach_type);
393 	if (type < 0)
394 		return -EINVAL;
395 
396 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
397 	if (IS_ERR(prog))
398 		return PTR_ERR(prog);
399 
400 	mutex_lock(&netns_bpf_mutex);
401 	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
402 	mutex_unlock(&netns_bpf_mutex);
403 
404 	bpf_prog_put(prog);
405 
406 	return ret;
407 }
408 
409 static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
410 {
411 	switch (type) {
412 	case NETNS_BPF_FLOW_DISSECTOR:
413 		return 1;
414 	case NETNS_BPF_SK_LOOKUP:
415 		return 64;
416 	default:
417 		return 0;
418 	}
419 }
420 
421 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
422 				 enum netns_bpf_attach_type type)
423 {
424 	struct bpf_netns_link *net_link =
425 		container_of(link, struct bpf_netns_link, link);
426 	struct bpf_prog_array *run_array;
427 	int cnt, err;
428 
429 	mutex_lock(&netns_bpf_mutex);
430 
431 	cnt = link_count(net, type);
432 	if (cnt >= netns_bpf_max_progs(type)) {
433 		err = -E2BIG;
434 		goto out_unlock;
435 	}
436 	/* Links are not compatible with attaching prog directly */
437 	if (net->bpf.progs[type]) {
438 		err = -EEXIST;
439 		goto out_unlock;
440 	}
441 
442 	switch (type) {
443 	case NETNS_BPF_FLOW_DISSECTOR:
444 		err = flow_dissector_bpf_prog_attach_check(net, link->prog);
445 		break;
446 	case NETNS_BPF_SK_LOOKUP:
447 		err = 0; /* nothing to check */
448 		break;
449 	default:
450 		err = -EINVAL;
451 		break;
452 	}
453 	if (err)
454 		goto out_unlock;
455 
456 	run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
457 	if (!run_array) {
458 		err = -ENOMEM;
459 		goto out_unlock;
460 	}
461 
462 	list_add_tail(&net_link->node, &net->bpf.links[type]);
463 
464 	fill_prog_array(net, type, run_array);
465 	run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
466 					lockdep_is_held(&netns_bpf_mutex));
467 	bpf_prog_array_free(run_array);
468 
469 	/* Mark attach point as used */
470 	netns_bpf_attach_type_need(type);
471 
472 out_unlock:
473 	mutex_unlock(&netns_bpf_mutex);
474 	return err;
475 }
476 
477 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
478 {
479 	enum netns_bpf_attach_type netns_type;
480 	struct bpf_link_primer link_primer;
481 	struct bpf_netns_link *net_link;
482 	enum bpf_attach_type type;
483 	struct net *net;
484 	int err;
485 
486 	if (attr->link_create.flags)
487 		return -EINVAL;
488 
489 	type = attr->link_create.attach_type;
490 	netns_type = to_netns_bpf_attach_type(type);
491 	if (netns_type < 0)
492 		return -EINVAL;
493 
494 	net = get_net_ns_by_fd(attr->link_create.target_fd);
495 	if (IS_ERR(net))
496 		return PTR_ERR(net);
497 
498 	net_link = kzalloc(sizeof(*net_link), GFP_USER);
499 	if (!net_link) {
500 		err = -ENOMEM;
501 		goto out_put_net;
502 	}
503 	bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
504 		      &bpf_netns_link_ops, prog);
505 	net_link->net = net;
506 	net_link->type = type;
507 	net_link->netns_type = netns_type;
508 
509 	err = bpf_link_prime(&net_link->link, &link_primer);
510 	if (err) {
511 		kfree(net_link);
512 		goto out_put_net;
513 	}
514 
515 	err = netns_bpf_link_attach(net, &net_link->link, netns_type);
516 	if (err) {
517 		bpf_link_cleanup(&link_primer);
518 		goto out_put_net;
519 	}
520 
521 	put_net(net);
522 	return bpf_link_settle(&link_primer);
523 
524 out_put_net:
525 	put_net(net);
526 	return err;
527 }
528 
529 static int __net_init netns_bpf_pernet_init(struct net *net)
530 {
531 	int type;
532 
533 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
534 		INIT_LIST_HEAD(&net->bpf.links[type]);
535 
536 	return 0;
537 }
538 
539 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
540 {
541 	enum netns_bpf_attach_type type;
542 	struct bpf_netns_link *net_link;
543 
544 	mutex_lock(&netns_bpf_mutex);
545 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
546 		netns_bpf_run_array_detach(net, type);
547 		list_for_each_entry(net_link, &net->bpf.links[type], node) {
548 			net_link->net = NULL; /* auto-detach link */
549 			netns_bpf_attach_type_unneed(type);
550 		}
551 		if (net->bpf.progs[type])
552 			bpf_prog_put(net->bpf.progs[type]);
553 	}
554 	mutex_unlock(&netns_bpf_mutex);
555 }
556 
557 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
558 	.init = netns_bpf_pernet_init,
559 	.pre_exit = netns_bpf_pernet_pre_exit,
560 };
561 
562 static int __init netns_bpf_init(void)
563 {
564 	return register_pernet_subsys(&netns_bpf_pernet_ops);
565 }
566 
567 subsys_initcall(netns_bpf_init);
568