1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2023 Isovalent */
3
4 #include <linux/netdevice.h>
5 #include <linux/ethtool.h>
6 #include <linux/etherdevice.h>
7 #include <linux/filter.h>
8 #include <linux/netfilter_netdev.h>
9 #include <linux/bpf_mprog.h>
10 #include <linux/indirect_call_wrapper.h>
11
12 #include <net/netkit.h>
13 #include <net/dst.h>
14 #include <net/tcx.h>
15
16 #define DRV_NAME "netkit"
17
18 struct netkit {
19 /* Needed in fast-path */
20 struct net_device __rcu *peer;
21 struct bpf_mprog_entry __rcu *active;
22 enum netkit_action policy;
23 struct bpf_mprog_bundle bundle;
24
25 /* Needed in slow-path */
26 enum netkit_mode mode;
27 bool primary;
28 u32 headroom;
29 };
30
31 struct netkit_link {
32 struct bpf_link link;
33 struct net_device *dev;
34 u32 location;
35 };
36
37 static __always_inline int
netkit_run(const struct bpf_mprog_entry * entry,struct sk_buff * skb,enum netkit_action ret)38 netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
39 enum netkit_action ret)
40 {
41 const struct bpf_mprog_fp *fp;
42 const struct bpf_prog *prog;
43
44 bpf_mprog_foreach_prog(entry, fp, prog) {
45 bpf_compute_data_pointers(skb);
46 ret = bpf_prog_run(prog, skb);
47 if (ret != NETKIT_NEXT)
48 break;
49 }
50 return ret;
51 }
52
netkit_prep_forward(struct sk_buff * skb,bool xnet)53 static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
54 {
55 skb_scrub_packet(skb, xnet);
56 skb->priority = 0;
57 nf_skip_egress(skb, true);
58 skb_reset_mac_header(skb);
59 }
60
netkit_priv(const struct net_device * dev)61 static struct netkit *netkit_priv(const struct net_device *dev)
62 {
63 return netdev_priv(dev);
64 }
65
netkit_xmit(struct sk_buff * skb,struct net_device * dev)66 static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
67 {
68 struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
69 struct netkit *nk = netkit_priv(dev);
70 enum netkit_action ret = READ_ONCE(nk->policy);
71 netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
72 const struct bpf_mprog_entry *entry;
73 struct net_device *peer;
74 int len = skb->len;
75
76 bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
77 rcu_read_lock();
78 peer = rcu_dereference(nk->peer);
79 if (unlikely(!peer || !(peer->flags & IFF_UP) ||
80 !pskb_may_pull(skb, ETH_HLEN) ||
81 skb_orphan_frags(skb, GFP_ATOMIC)))
82 goto drop;
83 netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
84 eth_skb_pkt_type(skb, peer);
85 skb->dev = peer;
86 entry = rcu_dereference(nk->active);
87 if (entry)
88 ret = netkit_run(entry, skb, ret);
89 switch (ret) {
90 case NETKIT_NEXT:
91 case NETKIT_PASS:
92 eth_skb_pull_mac(skb);
93 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
94 if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
95 dev_sw_netstats_tx_add(dev, 1, len);
96 dev_sw_netstats_rx_add(peer, len);
97 } else {
98 goto drop_stats;
99 }
100 break;
101 case NETKIT_REDIRECT:
102 dev_sw_netstats_tx_add(dev, 1, len);
103 skb_do_redirect(skb);
104 break;
105 case NETKIT_DROP:
106 default:
107 drop:
108 kfree_skb(skb);
109 drop_stats:
110 dev_core_stats_tx_dropped_inc(dev);
111 ret_dev = NET_XMIT_DROP;
112 break;
113 }
114 rcu_read_unlock();
115 bpf_net_ctx_clear(bpf_net_ctx);
116 return ret_dev;
117 }
118
netkit_open(struct net_device * dev)119 static int netkit_open(struct net_device *dev)
120 {
121 struct netkit *nk = netkit_priv(dev);
122 struct net_device *peer = rtnl_dereference(nk->peer);
123
124 if (!peer)
125 return -ENOTCONN;
126 if (peer->flags & IFF_UP) {
127 netif_carrier_on(dev);
128 netif_carrier_on(peer);
129 }
130 return 0;
131 }
132
netkit_close(struct net_device * dev)133 static int netkit_close(struct net_device *dev)
134 {
135 struct netkit *nk = netkit_priv(dev);
136 struct net_device *peer = rtnl_dereference(nk->peer);
137
138 netif_carrier_off(dev);
139 if (peer)
140 netif_carrier_off(peer);
141 return 0;
142 }
143
netkit_get_iflink(const struct net_device * dev)144 static int netkit_get_iflink(const struct net_device *dev)
145 {
146 struct netkit *nk = netkit_priv(dev);
147 struct net_device *peer;
148 int iflink = 0;
149
150 rcu_read_lock();
151 peer = rcu_dereference(nk->peer);
152 if (peer)
153 iflink = READ_ONCE(peer->ifindex);
154 rcu_read_unlock();
155 return iflink;
156 }
157
netkit_set_multicast(struct net_device * dev)158 static void netkit_set_multicast(struct net_device *dev)
159 {
160 /* Nothing to do, we receive whatever gets pushed to us! */
161 }
162
netkit_set_macaddr(struct net_device * dev,void * sa)163 static int netkit_set_macaddr(struct net_device *dev, void *sa)
164 {
165 struct netkit *nk = netkit_priv(dev);
166
167 if (nk->mode != NETKIT_L2)
168 return -EOPNOTSUPP;
169
170 return eth_mac_addr(dev, sa);
171 }
172
netkit_set_headroom(struct net_device * dev,int headroom)173 static void netkit_set_headroom(struct net_device *dev, int headroom)
174 {
175 struct netkit *nk = netkit_priv(dev), *nk2;
176 struct net_device *peer;
177
178 if (headroom < 0)
179 headroom = NET_SKB_PAD;
180
181 rcu_read_lock();
182 peer = rcu_dereference(nk->peer);
183 if (unlikely(!peer))
184 goto out;
185
186 nk2 = netkit_priv(peer);
187 nk->headroom = headroom;
188 headroom = max(nk->headroom, nk2->headroom);
189
190 peer->needed_headroom = headroom;
191 dev->needed_headroom = headroom;
192 out:
193 rcu_read_unlock();
194 }
195
netkit_peer_dev(struct net_device * dev)196 INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
197 {
198 return rcu_dereference(netkit_priv(dev)->peer);
199 }
200
netkit_get_stats(struct net_device * dev,struct rtnl_link_stats64 * stats)201 static void netkit_get_stats(struct net_device *dev,
202 struct rtnl_link_stats64 *stats)
203 {
204 dev_fetch_sw_netstats(stats, dev->tstats);
205 stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
206 }
207
208 static void netkit_uninit(struct net_device *dev);
209
210 static const struct net_device_ops netkit_netdev_ops = {
211 .ndo_open = netkit_open,
212 .ndo_stop = netkit_close,
213 .ndo_start_xmit = netkit_xmit,
214 .ndo_set_rx_mode = netkit_set_multicast,
215 .ndo_set_rx_headroom = netkit_set_headroom,
216 .ndo_set_mac_address = netkit_set_macaddr,
217 .ndo_get_iflink = netkit_get_iflink,
218 .ndo_get_peer_dev = netkit_peer_dev,
219 .ndo_get_stats64 = netkit_get_stats,
220 .ndo_uninit = netkit_uninit,
221 .ndo_features_check = passthru_features_check,
222 };
223
netkit_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * info)224 static void netkit_get_drvinfo(struct net_device *dev,
225 struct ethtool_drvinfo *info)
226 {
227 strscpy(info->driver, DRV_NAME, sizeof(info->driver));
228 }
229
230 static const struct ethtool_ops netkit_ethtool_ops = {
231 .get_drvinfo = netkit_get_drvinfo,
232 };
233
netkit_setup(struct net_device * dev)234 static void netkit_setup(struct net_device *dev)
235 {
236 static const netdev_features_t netkit_features_hw_vlan =
237 NETIF_F_HW_VLAN_CTAG_TX |
238 NETIF_F_HW_VLAN_CTAG_RX |
239 NETIF_F_HW_VLAN_STAG_TX |
240 NETIF_F_HW_VLAN_STAG_RX;
241 static const netdev_features_t netkit_features =
242 netkit_features_hw_vlan |
243 NETIF_F_SG |
244 NETIF_F_FRAGLIST |
245 NETIF_F_HW_CSUM |
246 NETIF_F_RXCSUM |
247 NETIF_F_SCTP_CRC |
248 NETIF_F_HIGHDMA |
249 NETIF_F_GSO_SOFTWARE |
250 NETIF_F_GSO_ENCAP_ALL;
251
252 ether_setup(dev);
253 dev->max_mtu = ETH_MAX_MTU;
254 dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
255
256 dev->flags |= IFF_NOARP;
257 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
258 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
259 dev->priv_flags |= IFF_PHONY_HEADROOM;
260 dev->priv_flags |= IFF_NO_QUEUE;
261 dev->priv_flags |= IFF_DISABLE_NETPOLL;
262 dev->lltx = true;
263
264 dev->ethtool_ops = &netkit_ethtool_ops;
265 dev->netdev_ops = &netkit_netdev_ops;
266
267 dev->features |= netkit_features;
268 dev->hw_features = netkit_features;
269 dev->hw_enc_features = netkit_features;
270 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
271 dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
272
273 dev->needs_free_netdev = true;
274
275 netif_set_tso_max_size(dev, GSO_MAX_SIZE);
276 }
277
netkit_get_link_net(const struct net_device * dev)278 static struct net *netkit_get_link_net(const struct net_device *dev)
279 {
280 struct netkit *nk = netkit_priv(dev);
281 struct net_device *peer = rtnl_dereference(nk->peer);
282
283 return peer ? dev_net(peer) : dev_net(dev);
284 }
285
netkit_check_policy(int policy,struct nlattr * tb,struct netlink_ext_ack * extack)286 static int netkit_check_policy(int policy, struct nlattr *tb,
287 struct netlink_ext_ack *extack)
288 {
289 switch (policy) {
290 case NETKIT_PASS:
291 case NETKIT_DROP:
292 return 0;
293 default:
294 NL_SET_ERR_MSG_ATTR(extack, tb,
295 "Provided default xmit policy not supported");
296 return -EINVAL;
297 }
298 }
299
netkit_check_mode(int mode,struct nlattr * tb,struct netlink_ext_ack * extack)300 static int netkit_check_mode(int mode, struct nlattr *tb,
301 struct netlink_ext_ack *extack)
302 {
303 switch (mode) {
304 case NETKIT_L2:
305 case NETKIT_L3:
306 return 0;
307 default:
308 NL_SET_ERR_MSG_ATTR(extack, tb,
309 "Provided device mode can only be L2 or L3");
310 return -EINVAL;
311 }
312 }
313
netkit_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)314 static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
315 struct netlink_ext_ack *extack)
316 {
317 struct nlattr *attr = tb[IFLA_ADDRESS];
318
319 if (!attr)
320 return 0;
321 if (nla_len(attr) != ETH_ALEN)
322 return -EINVAL;
323 if (!is_valid_ether_addr(nla_data(attr)))
324 return -EADDRNOTAVAIL;
325 return 0;
326 }
327
328 static struct rtnl_link_ops netkit_link_ops;
329
netkit_new_link(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)330 static int netkit_new_link(struct net *src_net, struct net_device *dev,
331 struct nlattr *tb[], struct nlattr *data[],
332 struct netlink_ext_ack *extack)
333 {
334 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
335 enum netkit_action default_prim = NETKIT_PASS;
336 enum netkit_action default_peer = NETKIT_PASS;
337 enum netkit_mode mode = NETKIT_L3;
338 unsigned char ifname_assign_type;
339 struct ifinfomsg *ifmp = NULL;
340 struct net_device *peer;
341 char ifname[IFNAMSIZ];
342 struct netkit *nk;
343 struct net *net;
344 int err;
345
346 if (data) {
347 if (data[IFLA_NETKIT_MODE]) {
348 attr = data[IFLA_NETKIT_MODE];
349 mode = nla_get_u32(attr);
350 err = netkit_check_mode(mode, attr, extack);
351 if (err < 0)
352 return err;
353 }
354 if (data[IFLA_NETKIT_PEER_INFO]) {
355 attr = data[IFLA_NETKIT_PEER_INFO];
356 ifmp = nla_data(attr);
357 err = rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
358 if (err < 0)
359 return err;
360 err = netkit_validate(peer_tb, NULL, extack);
361 if (err < 0)
362 return err;
363 tbp = peer_tb;
364 }
365 if (data[IFLA_NETKIT_POLICY]) {
366 attr = data[IFLA_NETKIT_POLICY];
367 default_prim = nla_get_u32(attr);
368 err = netkit_check_policy(default_prim, attr, extack);
369 if (err < 0)
370 return err;
371 }
372 if (data[IFLA_NETKIT_PEER_POLICY]) {
373 attr = data[IFLA_NETKIT_PEER_POLICY];
374 default_peer = nla_get_u32(attr);
375 err = netkit_check_policy(default_peer, attr, extack);
376 if (err < 0)
377 return err;
378 }
379 }
380
381 if (ifmp && tbp[IFLA_IFNAME]) {
382 nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
383 ifname_assign_type = NET_NAME_USER;
384 } else {
385 strscpy(ifname, "nk%d", IFNAMSIZ);
386 ifname_assign_type = NET_NAME_ENUM;
387 }
388 if (mode != NETKIT_L2 &&
389 (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
390 return -EOPNOTSUPP;
391
392 net = rtnl_link_get_net(src_net, tbp);
393 if (IS_ERR(net))
394 return PTR_ERR(net);
395
396 peer = rtnl_create_link(net, ifname, ifname_assign_type,
397 &netkit_link_ops, tbp, extack);
398 if (IS_ERR(peer)) {
399 put_net(net);
400 return PTR_ERR(peer);
401 }
402
403 netif_inherit_tso_max(peer, dev);
404
405 if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
406 eth_hw_addr_random(peer);
407 if (ifmp && dev->ifindex)
408 peer->ifindex = ifmp->ifi_index;
409
410 nk = netkit_priv(peer);
411 nk->primary = false;
412 nk->policy = default_peer;
413 nk->mode = mode;
414 bpf_mprog_bundle_init(&nk->bundle);
415
416 err = register_netdevice(peer);
417 put_net(net);
418 if (err < 0)
419 goto err_register_peer;
420 netif_carrier_off(peer);
421 if (mode == NETKIT_L2)
422 dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
423
424 err = rtnl_configure_link(peer, NULL, 0, NULL);
425 if (err < 0)
426 goto err_configure_peer;
427
428 if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
429 eth_hw_addr_random(dev);
430 if (tb[IFLA_IFNAME])
431 nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
432 else
433 strscpy(dev->name, "nk%d", IFNAMSIZ);
434
435 nk = netkit_priv(dev);
436 nk->primary = true;
437 nk->policy = default_prim;
438 nk->mode = mode;
439 bpf_mprog_bundle_init(&nk->bundle);
440
441 err = register_netdevice(dev);
442 if (err < 0)
443 goto err_configure_peer;
444 netif_carrier_off(dev);
445 if (mode == NETKIT_L2)
446 dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
447
448 rcu_assign_pointer(netkit_priv(dev)->peer, peer);
449 rcu_assign_pointer(netkit_priv(peer)->peer, dev);
450 return 0;
451 err_configure_peer:
452 unregister_netdevice(peer);
453 return err;
454 err_register_peer:
455 free_netdev(peer);
456 return err;
457 }
458
netkit_entry_fetch(struct net_device * dev,bool bundle_fallback)459 static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev,
460 bool bundle_fallback)
461 {
462 struct netkit *nk = netkit_priv(dev);
463 struct bpf_mprog_entry *entry;
464
465 ASSERT_RTNL();
466 entry = rcu_dereference_rtnl(nk->active);
467 if (entry)
468 return entry;
469 if (bundle_fallback)
470 return &nk->bundle.a;
471 return NULL;
472 }
473
netkit_entry_update(struct net_device * dev,struct bpf_mprog_entry * entry)474 static void netkit_entry_update(struct net_device *dev,
475 struct bpf_mprog_entry *entry)
476 {
477 struct netkit *nk = netkit_priv(dev);
478
479 ASSERT_RTNL();
480 rcu_assign_pointer(nk->active, entry);
481 }
482
netkit_entry_sync(void)483 static void netkit_entry_sync(void)
484 {
485 synchronize_rcu();
486 }
487
netkit_dev_fetch(struct net * net,u32 ifindex,u32 which)488 static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which)
489 {
490 struct net_device *dev;
491 struct netkit *nk;
492
493 ASSERT_RTNL();
494
495 switch (which) {
496 case BPF_NETKIT_PRIMARY:
497 case BPF_NETKIT_PEER:
498 break;
499 default:
500 return ERR_PTR(-EINVAL);
501 }
502
503 dev = __dev_get_by_index(net, ifindex);
504 if (!dev)
505 return ERR_PTR(-ENODEV);
506 if (dev->netdev_ops != &netkit_netdev_ops)
507 return ERR_PTR(-ENXIO);
508
509 nk = netkit_priv(dev);
510 if (!nk->primary)
511 return ERR_PTR(-EACCES);
512 if (which == BPF_NETKIT_PEER) {
513 dev = rcu_dereference_rtnl(nk->peer);
514 if (!dev)
515 return ERR_PTR(-ENODEV);
516 }
517 return dev;
518 }
519
netkit_prog_attach(const union bpf_attr * attr,struct bpf_prog * prog)520 int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
521 {
522 struct bpf_mprog_entry *entry, *entry_new;
523 struct bpf_prog *replace_prog = NULL;
524 struct net_device *dev;
525 int ret;
526
527 rtnl_lock();
528 dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
529 attr->attach_type);
530 if (IS_ERR(dev)) {
531 ret = PTR_ERR(dev);
532 goto out;
533 }
534 entry = netkit_entry_fetch(dev, true);
535 if (attr->attach_flags & BPF_F_REPLACE) {
536 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
537 prog->type);
538 if (IS_ERR(replace_prog)) {
539 ret = PTR_ERR(replace_prog);
540 replace_prog = NULL;
541 goto out;
542 }
543 }
544 ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
545 attr->attach_flags, attr->relative_fd,
546 attr->expected_revision);
547 if (!ret) {
548 if (entry != entry_new) {
549 netkit_entry_update(dev, entry_new);
550 netkit_entry_sync();
551 }
552 bpf_mprog_commit(entry);
553 }
554 out:
555 if (replace_prog)
556 bpf_prog_put(replace_prog);
557 rtnl_unlock();
558 return ret;
559 }
560
netkit_prog_detach(const union bpf_attr * attr,struct bpf_prog * prog)561 int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
562 {
563 struct bpf_mprog_entry *entry, *entry_new;
564 struct net_device *dev;
565 int ret;
566
567 rtnl_lock();
568 dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
569 attr->attach_type);
570 if (IS_ERR(dev)) {
571 ret = PTR_ERR(dev);
572 goto out;
573 }
574 entry = netkit_entry_fetch(dev, false);
575 if (!entry) {
576 ret = -ENOENT;
577 goto out;
578 }
579 ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
580 attr->relative_fd, attr->expected_revision);
581 if (!ret) {
582 if (!bpf_mprog_total(entry_new))
583 entry_new = NULL;
584 netkit_entry_update(dev, entry_new);
585 netkit_entry_sync();
586 bpf_mprog_commit(entry);
587 }
588 out:
589 rtnl_unlock();
590 return ret;
591 }
592
netkit_prog_query(const union bpf_attr * attr,union bpf_attr __user * uattr)593 int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
594 {
595 struct net_device *dev;
596 int ret;
597
598 rtnl_lock();
599 dev = netkit_dev_fetch(current->nsproxy->net_ns,
600 attr->query.target_ifindex,
601 attr->query.attach_type);
602 if (IS_ERR(dev)) {
603 ret = PTR_ERR(dev);
604 goto out;
605 }
606 ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false));
607 out:
608 rtnl_unlock();
609 return ret;
610 }
611
netkit_link(const struct bpf_link * link)612 static struct netkit_link *netkit_link(const struct bpf_link *link)
613 {
614 return container_of(link, struct netkit_link, link);
615 }
616
netkit_link_prog_attach(struct bpf_link * link,u32 flags,u32 id_or_fd,u64 revision)617 static int netkit_link_prog_attach(struct bpf_link *link, u32 flags,
618 u32 id_or_fd, u64 revision)
619 {
620 struct netkit_link *nkl = netkit_link(link);
621 struct bpf_mprog_entry *entry, *entry_new;
622 struct net_device *dev = nkl->dev;
623 int ret;
624
625 ASSERT_RTNL();
626 entry = netkit_entry_fetch(dev, true);
627 ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
628 id_or_fd, revision);
629 if (!ret) {
630 if (entry != entry_new) {
631 netkit_entry_update(dev, entry_new);
632 netkit_entry_sync();
633 }
634 bpf_mprog_commit(entry);
635 }
636 return ret;
637 }
638
netkit_link_release(struct bpf_link * link)639 static void netkit_link_release(struct bpf_link *link)
640 {
641 struct netkit_link *nkl = netkit_link(link);
642 struct bpf_mprog_entry *entry, *entry_new;
643 struct net_device *dev;
644 int ret = 0;
645
646 rtnl_lock();
647 dev = nkl->dev;
648 if (!dev)
649 goto out;
650 entry = netkit_entry_fetch(dev, false);
651 if (!entry) {
652 ret = -ENOENT;
653 goto out;
654 }
655 ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
656 if (!ret) {
657 if (!bpf_mprog_total(entry_new))
658 entry_new = NULL;
659 netkit_entry_update(dev, entry_new);
660 netkit_entry_sync();
661 bpf_mprog_commit(entry);
662 nkl->dev = NULL;
663 }
664 out:
665 WARN_ON_ONCE(ret);
666 rtnl_unlock();
667 }
668
netkit_link_update(struct bpf_link * link,struct bpf_prog * nprog,struct bpf_prog * oprog)669 static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog,
670 struct bpf_prog *oprog)
671 {
672 struct netkit_link *nkl = netkit_link(link);
673 struct bpf_mprog_entry *entry, *entry_new;
674 struct net_device *dev;
675 int ret = 0;
676
677 rtnl_lock();
678 dev = nkl->dev;
679 if (!dev) {
680 ret = -ENOLINK;
681 goto out;
682 }
683 if (oprog && link->prog != oprog) {
684 ret = -EPERM;
685 goto out;
686 }
687 oprog = link->prog;
688 if (oprog == nprog) {
689 bpf_prog_put(nprog);
690 goto out;
691 }
692 entry = netkit_entry_fetch(dev, false);
693 if (!entry) {
694 ret = -ENOENT;
695 goto out;
696 }
697 ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
698 BPF_F_REPLACE | BPF_F_ID,
699 link->prog->aux->id, 0);
700 if (!ret) {
701 WARN_ON_ONCE(entry != entry_new);
702 oprog = xchg(&link->prog, nprog);
703 bpf_prog_put(oprog);
704 bpf_mprog_commit(entry);
705 }
706 out:
707 rtnl_unlock();
708 return ret;
709 }
710
netkit_link_dealloc(struct bpf_link * link)711 static void netkit_link_dealloc(struct bpf_link *link)
712 {
713 kfree(netkit_link(link));
714 }
715
netkit_link_fdinfo(const struct bpf_link * link,struct seq_file * seq)716 static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
717 {
718 const struct netkit_link *nkl = netkit_link(link);
719 u32 ifindex = 0;
720
721 rtnl_lock();
722 if (nkl->dev)
723 ifindex = nkl->dev->ifindex;
724 rtnl_unlock();
725
726 seq_printf(seq, "ifindex:\t%u\n", ifindex);
727 seq_printf(seq, "attach_type:\t%u (%s)\n",
728 nkl->location,
729 nkl->location == BPF_NETKIT_PRIMARY ? "primary" : "peer");
730 }
731
netkit_link_fill_info(const struct bpf_link * link,struct bpf_link_info * info)732 static int netkit_link_fill_info(const struct bpf_link *link,
733 struct bpf_link_info *info)
734 {
735 const struct netkit_link *nkl = netkit_link(link);
736 u32 ifindex = 0;
737
738 rtnl_lock();
739 if (nkl->dev)
740 ifindex = nkl->dev->ifindex;
741 rtnl_unlock();
742
743 info->netkit.ifindex = ifindex;
744 info->netkit.attach_type = nkl->location;
745 return 0;
746 }
747
netkit_link_detach(struct bpf_link * link)748 static int netkit_link_detach(struct bpf_link *link)
749 {
750 netkit_link_release(link);
751 return 0;
752 }
753
754 static const struct bpf_link_ops netkit_link_lops = {
755 .release = netkit_link_release,
756 .detach = netkit_link_detach,
757 .dealloc = netkit_link_dealloc,
758 .update_prog = netkit_link_update,
759 .show_fdinfo = netkit_link_fdinfo,
760 .fill_link_info = netkit_link_fill_info,
761 };
762
netkit_link_init(struct netkit_link * nkl,struct bpf_link_primer * link_primer,const union bpf_attr * attr,struct net_device * dev,struct bpf_prog * prog)763 static int netkit_link_init(struct netkit_link *nkl,
764 struct bpf_link_primer *link_primer,
765 const union bpf_attr *attr,
766 struct net_device *dev,
767 struct bpf_prog *prog)
768 {
769 bpf_link_init(&nkl->link, BPF_LINK_TYPE_NETKIT,
770 &netkit_link_lops, prog);
771 nkl->location = attr->link_create.attach_type;
772 nkl->dev = dev;
773 return bpf_link_prime(&nkl->link, link_primer);
774 }
775
netkit_link_attach(const union bpf_attr * attr,struct bpf_prog * prog)776 int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
777 {
778 struct bpf_link_primer link_primer;
779 struct netkit_link *nkl;
780 struct net_device *dev;
781 int ret;
782
783 rtnl_lock();
784 dev = netkit_dev_fetch(current->nsproxy->net_ns,
785 attr->link_create.target_ifindex,
786 attr->link_create.attach_type);
787 if (IS_ERR(dev)) {
788 ret = PTR_ERR(dev);
789 goto out;
790 }
791 nkl = kzalloc(sizeof(*nkl), GFP_KERNEL_ACCOUNT);
792 if (!nkl) {
793 ret = -ENOMEM;
794 goto out;
795 }
796 ret = netkit_link_init(nkl, &link_primer, attr, dev, prog);
797 if (ret) {
798 kfree(nkl);
799 goto out;
800 }
801 ret = netkit_link_prog_attach(&nkl->link,
802 attr->link_create.flags,
803 attr->link_create.netkit.relative_fd,
804 attr->link_create.netkit.expected_revision);
805 if (ret) {
806 nkl->dev = NULL;
807 bpf_link_cleanup(&link_primer);
808 goto out;
809 }
810 ret = bpf_link_settle(&link_primer);
811 out:
812 rtnl_unlock();
813 return ret;
814 }
815
netkit_release_all(struct net_device * dev)816 static void netkit_release_all(struct net_device *dev)
817 {
818 struct bpf_mprog_entry *entry;
819 struct bpf_tuple tuple = {};
820 struct bpf_mprog_fp *fp;
821 struct bpf_mprog_cp *cp;
822
823 entry = netkit_entry_fetch(dev, false);
824 if (!entry)
825 return;
826 netkit_entry_update(dev, NULL);
827 netkit_entry_sync();
828 bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
829 if (tuple.link)
830 netkit_link(tuple.link)->dev = NULL;
831 else
832 bpf_prog_put(tuple.prog);
833 }
834 }
835
netkit_uninit(struct net_device * dev)836 static void netkit_uninit(struct net_device *dev)
837 {
838 netkit_release_all(dev);
839 }
840
netkit_del_link(struct net_device * dev,struct list_head * head)841 static void netkit_del_link(struct net_device *dev, struct list_head *head)
842 {
843 struct netkit *nk = netkit_priv(dev);
844 struct net_device *peer = rtnl_dereference(nk->peer);
845
846 RCU_INIT_POINTER(nk->peer, NULL);
847 unregister_netdevice_queue(dev, head);
848 if (peer) {
849 nk = netkit_priv(peer);
850 RCU_INIT_POINTER(nk->peer, NULL);
851 unregister_netdevice_queue(peer, head);
852 }
853 }
854
netkit_change_link(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)855 static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
856 struct nlattr *data[],
857 struct netlink_ext_ack *extack)
858 {
859 struct netkit *nk = netkit_priv(dev);
860 struct net_device *peer = rtnl_dereference(nk->peer);
861 enum netkit_action policy;
862 struct nlattr *attr;
863 int err;
864
865 if (!nk->primary) {
866 NL_SET_ERR_MSG(extack,
867 "netkit link settings can be changed only through the primary device");
868 return -EACCES;
869 }
870
871 if (data[IFLA_NETKIT_MODE]) {
872 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE],
873 "netkit link operating mode cannot be changed after device creation");
874 return -EACCES;
875 }
876
877 if (data[IFLA_NETKIT_PEER_INFO]) {
878 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
879 "netkit peer info cannot be changed after device creation");
880 return -EINVAL;
881 }
882
883 if (data[IFLA_NETKIT_POLICY]) {
884 attr = data[IFLA_NETKIT_POLICY];
885 policy = nla_get_u32(attr);
886 err = netkit_check_policy(policy, attr, extack);
887 if (err)
888 return err;
889 WRITE_ONCE(nk->policy, policy);
890 }
891
892 if (data[IFLA_NETKIT_PEER_POLICY]) {
893 err = -EOPNOTSUPP;
894 attr = data[IFLA_NETKIT_PEER_POLICY];
895 policy = nla_get_u32(attr);
896 if (peer)
897 err = netkit_check_policy(policy, attr, extack);
898 if (err)
899 return err;
900 nk = netkit_priv(peer);
901 WRITE_ONCE(nk->policy, policy);
902 }
903
904 return 0;
905 }
906
netkit_get_size(const struct net_device * dev)907 static size_t netkit_get_size(const struct net_device *dev)
908 {
909 return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
910 nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
911 nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
912 nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
913 0;
914 }
915
netkit_fill_info(struct sk_buff * skb,const struct net_device * dev)916 static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
917 {
918 struct netkit *nk = netkit_priv(dev);
919 struct net_device *peer = rtnl_dereference(nk->peer);
920
921 if (nla_put_u8(skb, IFLA_NETKIT_PRIMARY, nk->primary))
922 return -EMSGSIZE;
923 if (nla_put_u32(skb, IFLA_NETKIT_POLICY, nk->policy))
924 return -EMSGSIZE;
925 if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
926 return -EMSGSIZE;
927
928 if (peer) {
929 nk = netkit_priv(peer);
930 if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
931 return -EMSGSIZE;
932 }
933
934 return 0;
935 }
936
937 static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
938 [IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) },
939 [IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
940 [IFLA_NETKIT_MODE] = { .type = NLA_U32 },
941 [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
942 [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
943 .reject_message = "Primary attribute is read-only" },
944 };
945
946 static struct rtnl_link_ops netkit_link_ops = {
947 .kind = DRV_NAME,
948 .priv_size = sizeof(struct netkit),
949 .setup = netkit_setup,
950 .newlink = netkit_new_link,
951 .dellink = netkit_del_link,
952 .changelink = netkit_change_link,
953 .get_link_net = netkit_get_link_net,
954 .get_size = netkit_get_size,
955 .fill_info = netkit_fill_info,
956 .policy = netkit_policy,
957 .validate = netkit_validate,
958 .maxtype = IFLA_NETKIT_MAX,
959 };
960
netkit_init(void)961 static __init int netkit_init(void)
962 {
963 BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
964 (int)NETKIT_PASS != (int)TCX_PASS ||
965 (int)NETKIT_DROP != (int)TCX_DROP ||
966 (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
967
968 return rtnl_link_register(&netkit_link_ops);
969 }
970
netkit_exit(void)971 static __exit void netkit_exit(void)
972 {
973 rtnl_link_unregister(&netkit_link_ops);
974 }
975
976 module_init(netkit_init);
977 module_exit(netkit_exit);
978
979 MODULE_DESCRIPTION("BPF-programmable network device");
980 MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
981 MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
982 MODULE_LICENSE("GPL");
983 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
984