xref: /linux/kernel/bpf/net_namespace.c (revision 93a3545d812ae7cfe4426374e00a7d8f64ac02e0)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/bpf.h>
4 #include <linux/filter.h>
5 #include <net/net_namespace.h>
6 
7 /*
8  * Functions to manage BPF programs attached to netns
9  */
10 
11 struct bpf_netns_link {
12 	struct bpf_link	link;
13 	enum bpf_attach_type type;
14 	enum netns_bpf_attach_type netns_type;
15 
16 	/* We don't hold a ref to net in order to auto-detach the link
17 	 * when netns is going away. Instead we rely on pernet
18 	 * pre_exit callback to clear this pointer. Must be accessed
19 	 * with netns_bpf_mutex held.
20 	 */
21 	struct net *net;
22 	struct list_head node; /* node in list of links attached to net */
23 };
24 
25 /* Protects updates to netns_bpf */
26 DEFINE_MUTEX(netns_bpf_mutex);
27 
28 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
29 {
30 	switch (type) {
31 	case NETNS_BPF_SK_LOOKUP:
32 		static_branch_dec(&bpf_sk_lookup_enabled);
33 		break;
34 	default:
35 		break;
36 	}
37 }
38 
39 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
40 {
41 	switch (type) {
42 	case NETNS_BPF_SK_LOOKUP:
43 		static_branch_inc(&bpf_sk_lookup_enabled);
44 		break;
45 	default:
46 		break;
47 	}
48 }
49 
50 /* Must be called with netns_bpf_mutex held. */
51 static void netns_bpf_run_array_detach(struct net *net,
52 				       enum netns_bpf_attach_type type)
53 {
54 	struct bpf_prog_array *run_array;
55 
56 	run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
57 					lockdep_is_held(&netns_bpf_mutex));
58 	bpf_prog_array_free(run_array);
59 }
60 
61 static int link_index(struct net *net, enum netns_bpf_attach_type type,
62 		      struct bpf_netns_link *link)
63 {
64 	struct bpf_netns_link *pos;
65 	int i = 0;
66 
67 	list_for_each_entry(pos, &net->bpf.links[type], node) {
68 		if (pos == link)
69 			return i;
70 		i++;
71 	}
72 	return -ENOENT;
73 }
74 
75 static int link_count(struct net *net, enum netns_bpf_attach_type type)
76 {
77 	struct list_head *pos;
78 	int i = 0;
79 
80 	list_for_each(pos, &net->bpf.links[type])
81 		i++;
82 	return i;
83 }
84 
85 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
86 			    struct bpf_prog_array *prog_array)
87 {
88 	struct bpf_netns_link *pos;
89 	unsigned int i = 0;
90 
91 	list_for_each_entry(pos, &net->bpf.links[type], node) {
92 		prog_array->items[i].prog = pos->link.prog;
93 		i++;
94 	}
95 }
96 
97 static void bpf_netns_link_release(struct bpf_link *link)
98 {
99 	struct bpf_netns_link *net_link =
100 		container_of(link, struct bpf_netns_link, link);
101 	enum netns_bpf_attach_type type = net_link->netns_type;
102 	struct bpf_prog_array *old_array, *new_array;
103 	struct net *net;
104 	int cnt, idx;
105 
106 	mutex_lock(&netns_bpf_mutex);
107 
108 	/* We can race with cleanup_net, but if we see a non-NULL
109 	 * struct net pointer, pre_exit has not run yet and wait for
110 	 * netns_bpf_mutex.
111 	 */
112 	net = net_link->net;
113 	if (!net)
114 		goto out_unlock;
115 
116 	/* Mark attach point as unused */
117 	netns_bpf_attach_type_unneed(type);
118 
119 	/* Remember link position in case of safe delete */
120 	idx = link_index(net, type, net_link);
121 	list_del(&net_link->node);
122 
123 	cnt = link_count(net, type);
124 	if (!cnt) {
125 		netns_bpf_run_array_detach(net, type);
126 		goto out_unlock;
127 	}
128 
129 	old_array = rcu_dereference_protected(net->bpf.run_array[type],
130 					      lockdep_is_held(&netns_bpf_mutex));
131 	new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
132 	if (!new_array) {
133 		WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
134 		goto out_unlock;
135 	}
136 	fill_prog_array(net, type, new_array);
137 	rcu_assign_pointer(net->bpf.run_array[type], new_array);
138 	bpf_prog_array_free(old_array);
139 
140 out_unlock:
141 	mutex_unlock(&netns_bpf_mutex);
142 }
143 
144 static void bpf_netns_link_dealloc(struct bpf_link *link)
145 {
146 	struct bpf_netns_link *net_link =
147 		container_of(link, struct bpf_netns_link, link);
148 
149 	kfree(net_link);
150 }
151 
152 static int bpf_netns_link_update_prog(struct bpf_link *link,
153 				      struct bpf_prog *new_prog,
154 				      struct bpf_prog *old_prog)
155 {
156 	struct bpf_netns_link *net_link =
157 		container_of(link, struct bpf_netns_link, link);
158 	enum netns_bpf_attach_type type = net_link->netns_type;
159 	struct bpf_prog_array *run_array;
160 	struct net *net;
161 	int idx, ret;
162 
163 	if (old_prog && old_prog != link->prog)
164 		return -EPERM;
165 	if (new_prog->type != link->prog->type)
166 		return -EINVAL;
167 
168 	mutex_lock(&netns_bpf_mutex);
169 
170 	net = net_link->net;
171 	if (!net || !check_net(net)) {
172 		/* Link auto-detached or netns dying */
173 		ret = -ENOLINK;
174 		goto out_unlock;
175 	}
176 
177 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
178 					      lockdep_is_held(&netns_bpf_mutex));
179 	idx = link_index(net, type, net_link);
180 	ret = bpf_prog_array_update_at(run_array, idx, new_prog);
181 	if (ret)
182 		goto out_unlock;
183 
184 	old_prog = xchg(&link->prog, new_prog);
185 	bpf_prog_put(old_prog);
186 
187 out_unlock:
188 	mutex_unlock(&netns_bpf_mutex);
189 	return ret;
190 }
191 
192 static int bpf_netns_link_fill_info(const struct bpf_link *link,
193 				    struct bpf_link_info *info)
194 {
195 	const struct bpf_netns_link *net_link =
196 		container_of(link, struct bpf_netns_link, link);
197 	unsigned int inum = 0;
198 	struct net *net;
199 
200 	mutex_lock(&netns_bpf_mutex);
201 	net = net_link->net;
202 	if (net && check_net(net))
203 		inum = net->ns.inum;
204 	mutex_unlock(&netns_bpf_mutex);
205 
206 	info->netns.netns_ino = inum;
207 	info->netns.attach_type = net_link->type;
208 	return 0;
209 }
210 
211 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
212 				       struct seq_file *seq)
213 {
214 	struct bpf_link_info info = {};
215 
216 	bpf_netns_link_fill_info(link, &info);
217 	seq_printf(seq,
218 		   "netns_ino:\t%u\n"
219 		   "attach_type:\t%u\n",
220 		   info.netns.netns_ino,
221 		   info.netns.attach_type);
222 }
223 
224 static const struct bpf_link_ops bpf_netns_link_ops = {
225 	.release = bpf_netns_link_release,
226 	.dealloc = bpf_netns_link_dealloc,
227 	.update_prog = bpf_netns_link_update_prog,
228 	.fill_link_info = bpf_netns_link_fill_info,
229 	.show_fdinfo = bpf_netns_link_show_fdinfo,
230 };
231 
232 /* Must be called with netns_bpf_mutex held. */
233 static int __netns_bpf_prog_query(const union bpf_attr *attr,
234 				  union bpf_attr __user *uattr,
235 				  struct net *net,
236 				  enum netns_bpf_attach_type type)
237 {
238 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
239 	struct bpf_prog_array *run_array;
240 	u32 prog_cnt = 0, flags = 0;
241 
242 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
243 					      lockdep_is_held(&netns_bpf_mutex));
244 	if (run_array)
245 		prog_cnt = bpf_prog_array_length(run_array);
246 
247 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
248 		return -EFAULT;
249 	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
250 		return -EFAULT;
251 	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
252 		return 0;
253 
254 	return bpf_prog_array_copy_to_user(run_array, prog_ids,
255 					   attr->query.prog_cnt);
256 }
257 
258 int netns_bpf_prog_query(const union bpf_attr *attr,
259 			 union bpf_attr __user *uattr)
260 {
261 	enum netns_bpf_attach_type type;
262 	struct net *net;
263 	int ret;
264 
265 	if (attr->query.query_flags)
266 		return -EINVAL;
267 
268 	type = to_netns_bpf_attach_type(attr->query.attach_type);
269 	if (type < 0)
270 		return -EINVAL;
271 
272 	net = get_net_ns_by_fd(attr->query.target_fd);
273 	if (IS_ERR(net))
274 		return PTR_ERR(net);
275 
276 	mutex_lock(&netns_bpf_mutex);
277 	ret = __netns_bpf_prog_query(attr, uattr, net, type);
278 	mutex_unlock(&netns_bpf_mutex);
279 
280 	put_net(net);
281 	return ret;
282 }
283 
284 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
285 {
286 	struct bpf_prog_array *run_array;
287 	enum netns_bpf_attach_type type;
288 	struct bpf_prog *attached;
289 	struct net *net;
290 	int ret;
291 
292 	if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
293 		return -EINVAL;
294 
295 	type = to_netns_bpf_attach_type(attr->attach_type);
296 	if (type < 0)
297 		return -EINVAL;
298 
299 	net = current->nsproxy->net_ns;
300 	mutex_lock(&netns_bpf_mutex);
301 
302 	/* Attaching prog directly is not compatible with links */
303 	if (!list_empty(&net->bpf.links[type])) {
304 		ret = -EEXIST;
305 		goto out_unlock;
306 	}
307 
308 	switch (type) {
309 	case NETNS_BPF_FLOW_DISSECTOR:
310 		ret = flow_dissector_bpf_prog_attach_check(net, prog);
311 		break;
312 	default:
313 		ret = -EINVAL;
314 		break;
315 	}
316 	if (ret)
317 		goto out_unlock;
318 
319 	attached = net->bpf.progs[type];
320 	if (attached == prog) {
321 		/* The same program cannot be attached twice */
322 		ret = -EINVAL;
323 		goto out_unlock;
324 	}
325 
326 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
327 					      lockdep_is_held(&netns_bpf_mutex));
328 	if (run_array) {
329 		WRITE_ONCE(run_array->items[0].prog, prog);
330 	} else {
331 		run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
332 		if (!run_array) {
333 			ret = -ENOMEM;
334 			goto out_unlock;
335 		}
336 		run_array->items[0].prog = prog;
337 		rcu_assign_pointer(net->bpf.run_array[type], run_array);
338 	}
339 
340 	net->bpf.progs[type] = prog;
341 	if (attached)
342 		bpf_prog_put(attached);
343 
344 out_unlock:
345 	mutex_unlock(&netns_bpf_mutex);
346 
347 	return ret;
348 }
349 
350 /* Must be called with netns_bpf_mutex held. */
351 static int __netns_bpf_prog_detach(struct net *net,
352 				   enum netns_bpf_attach_type type,
353 				   struct bpf_prog *old)
354 {
355 	struct bpf_prog *attached;
356 
357 	/* Progs attached via links cannot be detached */
358 	if (!list_empty(&net->bpf.links[type]))
359 		return -EINVAL;
360 
361 	attached = net->bpf.progs[type];
362 	if (!attached || attached != old)
363 		return -ENOENT;
364 	netns_bpf_run_array_detach(net, type);
365 	net->bpf.progs[type] = NULL;
366 	bpf_prog_put(attached);
367 	return 0;
368 }
369 
370 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
371 {
372 	enum netns_bpf_attach_type type;
373 	struct bpf_prog *prog;
374 	int ret;
375 
376 	if (attr->target_fd)
377 		return -EINVAL;
378 
379 	type = to_netns_bpf_attach_type(attr->attach_type);
380 	if (type < 0)
381 		return -EINVAL;
382 
383 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
384 	if (IS_ERR(prog))
385 		return PTR_ERR(prog);
386 
387 	mutex_lock(&netns_bpf_mutex);
388 	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
389 	mutex_unlock(&netns_bpf_mutex);
390 
391 	bpf_prog_put(prog);
392 
393 	return ret;
394 }
395 
396 static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
397 {
398 	switch (type) {
399 	case NETNS_BPF_FLOW_DISSECTOR:
400 		return 1;
401 	case NETNS_BPF_SK_LOOKUP:
402 		return 64;
403 	default:
404 		return 0;
405 	}
406 }
407 
408 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
409 				 enum netns_bpf_attach_type type)
410 {
411 	struct bpf_netns_link *net_link =
412 		container_of(link, struct bpf_netns_link, link);
413 	struct bpf_prog_array *run_array;
414 	int cnt, err;
415 
416 	mutex_lock(&netns_bpf_mutex);
417 
418 	cnt = link_count(net, type);
419 	if (cnt >= netns_bpf_max_progs(type)) {
420 		err = -E2BIG;
421 		goto out_unlock;
422 	}
423 	/* Links are not compatible with attaching prog directly */
424 	if (net->bpf.progs[type]) {
425 		err = -EEXIST;
426 		goto out_unlock;
427 	}
428 
429 	switch (type) {
430 	case NETNS_BPF_FLOW_DISSECTOR:
431 		err = flow_dissector_bpf_prog_attach_check(net, link->prog);
432 		break;
433 	case NETNS_BPF_SK_LOOKUP:
434 		err = 0; /* nothing to check */
435 		break;
436 	default:
437 		err = -EINVAL;
438 		break;
439 	}
440 	if (err)
441 		goto out_unlock;
442 
443 	run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
444 	if (!run_array) {
445 		err = -ENOMEM;
446 		goto out_unlock;
447 	}
448 
449 	list_add_tail(&net_link->node, &net->bpf.links[type]);
450 
451 	fill_prog_array(net, type, run_array);
452 	run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
453 					lockdep_is_held(&netns_bpf_mutex));
454 	bpf_prog_array_free(run_array);
455 
456 	/* Mark attach point as used */
457 	netns_bpf_attach_type_need(type);
458 
459 out_unlock:
460 	mutex_unlock(&netns_bpf_mutex);
461 	return err;
462 }
463 
464 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
465 {
466 	enum netns_bpf_attach_type netns_type;
467 	struct bpf_link_primer link_primer;
468 	struct bpf_netns_link *net_link;
469 	enum bpf_attach_type type;
470 	struct net *net;
471 	int err;
472 
473 	if (attr->link_create.flags)
474 		return -EINVAL;
475 
476 	type = attr->link_create.attach_type;
477 	netns_type = to_netns_bpf_attach_type(type);
478 	if (netns_type < 0)
479 		return -EINVAL;
480 
481 	net = get_net_ns_by_fd(attr->link_create.target_fd);
482 	if (IS_ERR(net))
483 		return PTR_ERR(net);
484 
485 	net_link = kzalloc(sizeof(*net_link), GFP_USER);
486 	if (!net_link) {
487 		err = -ENOMEM;
488 		goto out_put_net;
489 	}
490 	bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
491 		      &bpf_netns_link_ops, prog);
492 	net_link->net = net;
493 	net_link->type = type;
494 	net_link->netns_type = netns_type;
495 
496 	err = bpf_link_prime(&net_link->link, &link_primer);
497 	if (err) {
498 		kfree(net_link);
499 		goto out_put_net;
500 	}
501 
502 	err = netns_bpf_link_attach(net, &net_link->link, netns_type);
503 	if (err) {
504 		bpf_link_cleanup(&link_primer);
505 		goto out_put_net;
506 	}
507 
508 	put_net(net);
509 	return bpf_link_settle(&link_primer);
510 
511 out_put_net:
512 	put_net(net);
513 	return err;
514 }
515 
516 static int __net_init netns_bpf_pernet_init(struct net *net)
517 {
518 	int type;
519 
520 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
521 		INIT_LIST_HEAD(&net->bpf.links[type]);
522 
523 	return 0;
524 }
525 
526 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
527 {
528 	enum netns_bpf_attach_type type;
529 	struct bpf_netns_link *net_link;
530 
531 	mutex_lock(&netns_bpf_mutex);
532 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
533 		netns_bpf_run_array_detach(net, type);
534 		list_for_each_entry(net_link, &net->bpf.links[type], node) {
535 			net_link->net = NULL; /* auto-detach link */
536 			netns_bpf_attach_type_unneed(type);
537 		}
538 		if (net->bpf.progs[type])
539 			bpf_prog_put(net->bpf.progs[type]);
540 	}
541 	mutex_unlock(&netns_bpf_mutex);
542 }
543 
544 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
545 	.init = netns_bpf_pernet_init,
546 	.pre_exit = netns_bpf_pernet_pre_exit,
547 };
548 
549 static int __init netns_bpf_init(void)
550 {
551 	return register_pernet_subsys(&netns_bpf_pernet_ops);
552 }
553 
554 subsys_initcall(netns_bpf_init);
555