xref: /linux/drivers/net/netkit.c (revision 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2023 Isovalent */
3 
4 #include <linux/netdevice.h>
5 #include <linux/ethtool.h>
6 #include <linux/etherdevice.h>
7 #include <linux/filter.h>
8 #include <linux/netfilter_netdev.h>
9 #include <linux/bpf_mprog.h>
10 #include <linux/indirect_call_wrapper.h>
11 
12 #include <net/netdev_lock.h>
13 #include <net/netdev_queues.h>
14 #include <net/netdev_rx_queue.h>
15 #include <net/xdp_sock_drv.h>
16 #include <net/netkit.h>
17 #include <net/dst.h>
18 #include <net/tcx.h>
19 
20 #define NETKIT_DRV_NAME	"netkit"
21 
22 #define NETKIT_NUM_RX_QUEUES_MAX  1024
23 #define NETKIT_NUM_TX_QUEUES_MAX  1
24 
25 #define NETKIT_NUM_RX_QUEUES_REAL 1
26 #define NETKIT_NUM_TX_QUEUES_REAL 1
27 
28 struct netkit {
29 	__cacheline_group_begin(netkit_fastpath);
30 	struct net_device __rcu *peer;
31 	struct bpf_mprog_entry __rcu *active;
32 	enum netkit_action policy;
33 	enum netkit_scrub scrub;
34 	struct bpf_mprog_bundle	bundle;
35 	__cacheline_group_end(netkit_fastpath);
36 
37 	__cacheline_group_begin(netkit_slowpath);
38 	enum netkit_mode mode;
39 	enum netkit_pairing pair;
40 	bool primary;
41 	u32 headroom;
42 	__cacheline_group_end(netkit_slowpath);
43 };
44 
45 struct netkit_link {
46 	struct bpf_link link;
47 	struct net_device *dev;
48 };
49 
50 static struct rtnl_link_ops netkit_link_ops;
51 
52 static __always_inline int
53 netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
54 	   enum netkit_action ret)
55 {
56 	const struct bpf_mprog_fp *fp;
57 	const struct bpf_prog *prog;
58 
59 	bpf_mprog_foreach_prog(entry, fp, prog) {
60 		bpf_compute_data_pointers(skb);
61 		ret = bpf_prog_run(prog, skb);
62 		if (ret != NETKIT_NEXT)
63 			break;
64 	}
65 	return ret;
66 }
67 
68 static void netkit_xnet(struct sk_buff *skb)
69 {
70 	skb->priority = 0;
71 	skb->mark = 0;
72 }
73 
74 static void netkit_prep_forward(struct sk_buff *skb,
75 				bool xnet, bool xnet_scrub)
76 {
77 	skb_scrub_packet(skb, false);
78 	nf_skip_egress(skb, true);
79 	skb_reset_mac_header(skb);
80 	if (!xnet)
81 		return;
82 	skb_clear_tstamp(skb);
83 	if (xnet_scrub)
84 		netkit_xnet(skb);
85 }
86 
87 static struct netkit *netkit_priv(const struct net_device *dev)
88 {
89 	return netdev_priv(dev);
90 }
91 
92 static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
93 {
94 	struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
95 	struct netkit *nk = netkit_priv(dev);
96 	enum netkit_action ret = READ_ONCE(nk->policy);
97 	netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
98 	const struct bpf_mprog_entry *entry;
99 	struct net_device *peer;
100 	int len = skb->len;
101 
102 	bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
103 	rcu_read_lock();
104 	peer = rcu_dereference(nk->peer);
105 	if (unlikely(!peer || !(peer->flags & IFF_UP) ||
106 		     !pskb_may_pull(skb, ETH_HLEN) ||
107 		     skb_orphan_frags(skb, GFP_ATOMIC)))
108 		goto drop;
109 	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
110 			    nk->scrub);
111 	eth_skb_pkt_type(skb, peer);
112 	skb->dev = peer;
113 	entry = rcu_dereference(nk->active);
114 	if (entry)
115 		ret = netkit_run(entry, skb, ret);
116 	switch (ret) {
117 	case NETKIT_NEXT:
118 	case NETKIT_PASS:
119 		eth_skb_pull_mac(skb);
120 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
121 		if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
122 			dev_sw_netstats_tx_add(dev, 1, len);
123 			dev_sw_netstats_rx_add(peer, len);
124 		} else {
125 			goto drop_stats;
126 		}
127 		break;
128 	case NETKIT_REDIRECT:
129 		dev_sw_netstats_tx_add(dev, 1, len);
130 		skb_do_redirect(skb);
131 		break;
132 	case NETKIT_DROP:
133 	default:
134 drop:
135 		kfree_skb(skb);
136 drop_stats:
137 		dev_core_stats_tx_dropped_inc(dev);
138 		ret_dev = NET_XMIT_DROP;
139 		break;
140 	}
141 	rcu_read_unlock();
142 	bpf_net_ctx_clear(bpf_net_ctx);
143 	return ret_dev;
144 }
145 
146 static int netkit_open(struct net_device *dev)
147 {
148 	struct netkit *nk = netkit_priv(dev);
149 	struct net_device *peer = rtnl_dereference(nk->peer);
150 
151 	if (nk->pair == NETKIT_DEVICE_SINGLE) {
152 		netif_carrier_on(dev);
153 		return 0;
154 	}
155 	if (!peer)
156 		return -ENOTCONN;
157 	if (peer->flags & IFF_UP) {
158 		netif_carrier_on(dev);
159 		netif_carrier_on(peer);
160 	}
161 	return 0;
162 }
163 
164 static int netkit_close(struct net_device *dev)
165 {
166 	struct netkit *nk = netkit_priv(dev);
167 	struct net_device *peer = rtnl_dereference(nk->peer);
168 
169 	netif_carrier_off(dev);
170 	if (peer)
171 		netif_carrier_off(peer);
172 	return 0;
173 }
174 
175 static int netkit_get_iflink(const struct net_device *dev)
176 {
177 	struct netkit *nk = netkit_priv(dev);
178 	struct net_device *peer;
179 	int iflink = 0;
180 
181 	rcu_read_lock();
182 	peer = rcu_dereference(nk->peer);
183 	if (peer)
184 		iflink = READ_ONCE(peer->ifindex);
185 	rcu_read_unlock();
186 	return iflink;
187 }
188 
189 static void netkit_set_multicast(struct net_device *dev,
190 				 struct netdev_hw_addr_list *uc,
191 				 struct netdev_hw_addr_list *mc)
192 {
193 	/* Nothing to do, we receive whatever gets pushed to us! */
194 }
195 
196 static int netkit_set_macaddr(struct net_device *dev, void *sa)
197 {
198 	struct netkit *nk = netkit_priv(dev);
199 
200 	if (nk->mode != NETKIT_L2)
201 		return -EOPNOTSUPP;
202 
203 	return eth_mac_addr(dev, sa);
204 }
205 
206 static void netkit_set_headroom(struct net_device *dev, int headroom)
207 {
208 	struct netkit *nk = netkit_priv(dev), *nk2;
209 	struct net_device *peer;
210 
211 	if (headroom < 0)
212 		headroom = NET_SKB_PAD;
213 
214 	rcu_read_lock();
215 	peer = rcu_dereference(nk->peer);
216 	if (!peer) {
217 		nk->headroom = headroom;
218 		dev->needed_headroom = headroom;
219 	} else {
220 		nk2 = netkit_priv(peer);
221 		nk->headroom = headroom;
222 		headroom = max(nk->headroom, nk2->headroom);
223 
224 		peer->needed_headroom = headroom;
225 		dev->needed_headroom = headroom;
226 	}
227 	rcu_read_unlock();
228 }
229 
230 INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
231 {
232 	return rcu_dereference(netkit_priv(dev)->peer);
233 }
234 
235 static void netkit_get_stats(struct net_device *dev,
236 			     struct rtnl_link_stats64 *stats)
237 {
238 	dev_fetch_sw_netstats(stats, dev->tstats);
239 	stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
240 }
241 
242 static bool netkit_xsk_supported_at_phys(const struct net_device *dev)
243 {
244 	if (!dev->netdev_ops->ndo_bpf ||
245 	    !dev->netdev_ops->ndo_xdp_xmit ||
246 	    !dev->netdev_ops->ndo_xsk_wakeup)
247 		return false;
248 	return true;
249 }
250 
251 static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp)
252 {
253 	struct netkit *nk = netkit_priv(dev);
254 	struct netdev_bpf xdp_lower;
255 	struct netdev_rx_queue *rxq;
256 	struct net_device *phys;
257 	bool create = false;
258 	int ret = -EBUSY;
259 
260 	switch (xdp->command) {
261 	case XDP_SETUP_XSK_POOL:
262 		if (nk->pair == NETKIT_DEVICE_PAIR)
263 			return -EOPNOTSUPP;
264 		if (xdp->xsk.queue_id >= dev->real_num_rx_queues)
265 			return -EINVAL;
266 
267 		rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id);
268 		if (!rxq->lease)
269 			return -EOPNOTSUPP;
270 
271 		phys = rxq->lease->dev;
272 		if (!netkit_xsk_supported_at_phys(phys))
273 			return -EOPNOTSUPP;
274 
275 		create = xdp->xsk.pool;
276 		memcpy(&xdp_lower, xdp, sizeof(xdp_lower));
277 		xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease);
278 		break;
279 	case XDP_SETUP_PROG:
280 		return -EOPNOTSUPP;
281 	default:
282 		return -EINVAL;
283 	}
284 
285 	netdev_lock(phys);
286 	if (create &&
287 	    (phys->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) {
288 		ret = -EOPNOTSUPP;
289 		goto out;
290 	}
291 	if (!create || !dev_get_min_mp_channel_count(phys))
292 		ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower);
293 out:
294 	netdev_unlock(phys);
295 	return ret;
296 }
297 
298 static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
299 {
300 	struct netdev_rx_queue *rxq, *rxq_lease;
301 	struct net_device *phys;
302 
303 	if (queue_id >= dev->real_num_rx_queues)
304 		return -EINVAL;
305 
306 	rxq = __netif_get_rx_queue(dev, queue_id);
307 	rxq_lease = READ_ONCE(rxq->lease);
308 	if (unlikely(!rxq_lease))
309 		return -EOPNOTSUPP;
310 
311 	/* netkit_xsk already validated full xsk support, hence it's
312 	 * fine to call into ndo_xsk_wakeup right away given this
313 	 * was a prerequisite to get here in the first place. The
314 	 * phys xsk support cannot change without tearing down the
315 	 * device (which clears the lease first).
316 	 */
317 	phys = rxq_lease->dev;
318 	return phys->netdev_ops->ndo_xsk_wakeup(phys,
319 			get_netdev_rx_queue_index(rxq_lease), flags);
320 }
321 
322 static int netkit_init(struct net_device *dev)
323 {
324 	netdev_lockdep_set_classes(dev);
325 	return 0;
326 }
327 
328 static void netkit_uninit(struct net_device *dev);
329 
330 static const struct net_device_ops netkit_netdev_ops = {
331 	.ndo_init		= netkit_init,
332 	.ndo_open		= netkit_open,
333 	.ndo_stop		= netkit_close,
334 	.ndo_start_xmit		= netkit_xmit,
335 	.ndo_set_rx_mode_async	= netkit_set_multicast,
336 	.ndo_set_rx_headroom	= netkit_set_headroom,
337 	.ndo_set_mac_address	= netkit_set_macaddr,
338 	.ndo_get_iflink		= netkit_get_iflink,
339 	.ndo_get_peer_dev	= netkit_peer_dev,
340 	.ndo_get_stats64	= netkit_get_stats,
341 	.ndo_uninit		= netkit_uninit,
342 	.ndo_bpf		= netkit_xsk,
343 	.ndo_xsk_wakeup		= netkit_xsk_wakeup,
344 	.ndo_features_check	= passthru_features_check,
345 };
346 
347 static void netkit_get_drvinfo(struct net_device *dev,
348 			       struct ethtool_drvinfo *info)
349 {
350 	strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
351 }
352 
353 static const struct ethtool_ops netkit_ethtool_ops = {
354 	.get_drvinfo		= netkit_get_drvinfo,
355 };
356 
357 static int netkit_queue_create(struct net_device *dev,
358 			       struct netlink_ext_ack *extack)
359 {
360 	struct netkit *nk = netkit_priv(dev);
361 	u32 rxq_count_old, rxq_count_new;
362 	int err;
363 
364 	rxq_count_old = dev->real_num_rx_queues;
365 	rxq_count_new = rxq_count_old + 1;
366 
367 	/* In paired mode, only the non-primary (peer) device can
368 	 * create leased queues since the primary is the management
369 	 * side. In single device mode, leasing is always allowed.
370 	 */
371 	if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) {
372 		NL_SET_ERR_MSG(extack,
373 			       "netkit can only lease against the peer device");
374 		return -EOPNOTSUPP;
375 	}
376 
377 	err = netif_set_real_num_rx_queues(dev, rxq_count_new);
378 	if (err) {
379 		if (rxq_count_new > dev->num_rx_queues)
380 			NL_SET_ERR_MSG(extack,
381 				       "netkit maximum queue limit reached");
382 		else
383 			NL_SET_ERR_MSG_FMT(extack,
384 					   "netkit cannot create more queues err=%d", err);
385 		return err;
386 	}
387 
388 	return rxq_count_old;
389 }
390 
391 static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
392 	.ndo_queue_create	= netkit_queue_create,
393 };
394 
395 static struct net_device *netkit_alloc(struct nlattr *tb[],
396 				       const char *ifname,
397 				       unsigned char name_assign_type,
398 				       unsigned int num_tx_queues,
399 				       unsigned int num_rx_queues)
400 {
401 	const struct rtnl_link_ops *ops = &netkit_link_ops;
402 	struct net_device *dev;
403 
404 	if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
405 	    num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
406 		return ERR_PTR(-EOPNOTSUPP);
407 
408 	dev = alloc_netdev_mqs(ops->priv_size, ifname,
409 			       name_assign_type, ops->setup,
410 			       num_tx_queues, num_rx_queues);
411 	if (dev) {
412 		dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
413 		dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
414 	}
415 	return dev;
416 }
417 
418 static void netkit_queue_unlease(struct net_device *dev)
419 {
420 	struct netdev_rx_queue *rxq, *rxq_lease;
421 	struct net_device *dev_lease;
422 	int i;
423 
424 	if (dev->real_num_rx_queues == 1)
425 		return;
426 
427 	netdev_lock(dev);
428 	for (i = 1; i < dev->real_num_rx_queues; i++) {
429 		rxq = __netif_get_rx_queue(dev, i);
430 		rxq_lease = rxq->lease;
431 		dev_lease = rxq_lease->dev;
432 
433 		netdev_lock(dev_lease);
434 		netdev_rx_queue_unlease(rxq, rxq_lease);
435 		netdev_unlock(dev_lease);
436 	}
437 	netdev_unlock(dev);
438 }
439 
440 static void netkit_setup(struct net_device *dev)
441 {
442 	static const netdev_features_t netkit_features_hw_vlan =
443 		NETIF_F_HW_VLAN_CTAG_TX |
444 		NETIF_F_HW_VLAN_CTAG_RX |
445 		NETIF_F_HW_VLAN_STAG_TX |
446 		NETIF_F_HW_VLAN_STAG_RX;
447 	static const netdev_features_t netkit_features =
448 		netkit_features_hw_vlan |
449 		NETIF_F_SG |
450 		NETIF_F_FRAGLIST |
451 		NETIF_F_HW_CSUM |
452 		NETIF_F_RXCSUM |
453 		NETIF_F_SCTP_CRC |
454 		NETIF_F_HIGHDMA |
455 		NETIF_F_GSO_SOFTWARE |
456 		NETIF_F_GSO_ENCAP_ALL;
457 
458 	ether_setup(dev);
459 	dev->max_mtu = ETH_MAX_MTU;
460 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
461 
462 	dev->flags |= IFF_NOARP;
463 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
464 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
465 	dev->priv_flags |= IFF_PHONY_HEADROOM;
466 	dev->priv_flags |= IFF_NO_QUEUE;
467 	dev->priv_flags |= IFF_DISABLE_NETPOLL;
468 	dev->lltx = true;
469 
470 	dev->netdev_ops     = &netkit_netdev_ops;
471 	dev->ethtool_ops    = &netkit_ethtool_ops;
472 	dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
473 
474 	dev->features |= netkit_features;
475 	dev->hw_features = netkit_features;
476 	dev->hw_enc_features = netkit_features;
477 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
478 	dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
479 
480 	dev->needs_free_netdev = true;
481 
482 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
483 }
484 
485 static struct net *netkit_get_link_net(const struct net_device *dev)
486 {
487 	struct netkit *nk = netkit_priv(dev);
488 	struct net_device *peer = rtnl_dereference(nk->peer);
489 
490 	return peer ? dev_net(peer) : dev_net(dev);
491 }
492 
493 static int netkit_check_policy(int policy, struct nlattr *tb,
494 			       struct netlink_ext_ack *extack)
495 {
496 	switch (policy) {
497 	case NETKIT_PASS:
498 	case NETKIT_DROP:
499 		return 0;
500 	default:
501 		NL_SET_ERR_MSG_ATTR(extack, tb,
502 				    "Provided default xmit policy not supported");
503 		return -EINVAL;
504 	}
505 }
506 
507 static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
508 			   struct netlink_ext_ack *extack)
509 {
510 	struct nlattr *attr = tb[IFLA_ADDRESS];
511 
512 	if (!attr)
513 		return 0;
514 	if (nla_len(attr) != ETH_ALEN)
515 		return -EINVAL;
516 	if (!is_valid_ether_addr(nla_data(attr)))
517 		return -EADDRNOTAVAIL;
518 	return 0;
519 }
520 
521 static int netkit_new_link(struct net_device *dev,
522 			   struct rtnl_newlink_params *params,
523 			   struct netlink_ext_ack *extack)
524 {
525 	struct net *peer_net = rtnl_newlink_peer_net(params);
526 	enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
527 	enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
528 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr;
529 	enum netkit_pairing pair = NETKIT_DEVICE_PAIR;
530 	enum netkit_action policy_prim = NETKIT_PASS;
531 	enum netkit_action policy_peer = NETKIT_PASS;
532 	bool seen_peer = false, seen_scrub = false;
533 	struct nlattr **data = params->data;
534 	enum netkit_mode mode = NETKIT_L3;
535 	unsigned char ifname_assign_type;
536 	struct nlattr **tb = params->tb;
537 	u16 headroom = 0, tailroom = 0;
538 	struct ifinfomsg *ifmp = NULL;
539 	struct net_device *peer = NULL;
540 	char ifname[IFNAMSIZ];
541 	struct netkit *nk;
542 	int err;
543 
544 	tbp = tb;
545 	if (data) {
546 		if (data[IFLA_NETKIT_MODE])
547 			mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
548 		if (data[IFLA_NETKIT_PEER_INFO]) {
549 			attr = data[IFLA_NETKIT_PEER_INFO];
550 			ifmp = nla_data(attr);
551 			rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
552 			tbp = peer_tb;
553 		}
554 		if (data[IFLA_NETKIT_SCRUB])
555 			scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
556 		if (data[IFLA_NETKIT_PEER_SCRUB])
557 			scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
558 		if (data[IFLA_NETKIT_POLICY]) {
559 			attr = data[IFLA_NETKIT_POLICY];
560 			policy_prim = nla_get_u32(attr);
561 			err = netkit_check_policy(policy_prim, attr, extack);
562 			if (err < 0)
563 				return err;
564 		}
565 		if (data[IFLA_NETKIT_PEER_POLICY]) {
566 			attr = data[IFLA_NETKIT_PEER_POLICY];
567 			policy_peer = nla_get_u32(attr);
568 			err = netkit_check_policy(policy_peer, attr, extack);
569 			if (err < 0)
570 				return err;
571 		}
572 		if (data[IFLA_NETKIT_HEADROOM])
573 			headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
574 		if (data[IFLA_NETKIT_TAILROOM])
575 			tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
576 		if (data[IFLA_NETKIT_PAIRING])
577 			pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]);
578 
579 		seen_scrub = data[IFLA_NETKIT_SCRUB];
580 		seen_peer = data[IFLA_NETKIT_PEER_INFO] ||
581 			    data[IFLA_NETKIT_PEER_SCRUB] ||
582 			    data[IFLA_NETKIT_PEER_POLICY];
583 	}
584 
585 	if (ifmp && tbp[IFLA_IFNAME]) {
586 		nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
587 		ifname_assign_type = NET_NAME_USER;
588 	} else {
589 		strscpy(ifname, "nk%d", IFNAMSIZ);
590 		ifname_assign_type = NET_NAME_ENUM;
591 	}
592 	if (mode != NETKIT_L2 &&
593 	    (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
594 		return -EOPNOTSUPP;
595 	if (pair == NETKIT_DEVICE_SINGLE &&
596 	    (tb != tbp || seen_peer || seen_scrub ||
597 	     policy_prim != NETKIT_PASS))
598 		return -EOPNOTSUPP;
599 
600 	if (pair == NETKIT_DEVICE_PAIR) {
601 		peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
602 					&netkit_link_ops, tbp, extack);
603 		if (IS_ERR(peer))
604 			return PTR_ERR(peer);
605 
606 		netif_inherit_tso_max(peer, dev);
607 		if (headroom)
608 			peer->needed_headroom = headroom;
609 		if (tailroom)
610 			peer->needed_tailroom = tailroom;
611 		if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
612 			eth_hw_addr_random(peer);
613 		if (ifmp && dev->ifindex)
614 			peer->ifindex = ifmp->ifi_index;
615 
616 		nk = netkit_priv(peer);
617 		nk->primary = false;
618 		nk->policy = policy_peer;
619 		nk->scrub = scrub_peer;
620 		nk->mode = mode;
621 		nk->pair = pair;
622 		nk->headroom = headroom;
623 		bpf_mprog_bundle_init(&nk->bundle);
624 
625 		err = register_netdevice(peer);
626 		if (err < 0)
627 			goto err_register_peer;
628 		netif_carrier_off(peer);
629 		if (mode == NETKIT_L2)
630 			dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
631 
632 		err = rtnl_configure_link(peer, NULL, 0, NULL);
633 		if (err < 0)
634 			goto err_configure_peer;
635 	}
636 
637 	if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
638 		eth_hw_addr_random(dev);
639 	if (tb[IFLA_IFNAME])
640 		nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
641 	else
642 		strscpy(dev->name, "nk%d", IFNAMSIZ);
643 	if (headroom)
644 		dev->needed_headroom = headroom;
645 	if (tailroom)
646 		dev->needed_tailroom = tailroom;
647 
648 	nk = netkit_priv(dev);
649 	nk->primary = true;
650 	nk->policy = policy_prim;
651 	nk->scrub = scrub_prim;
652 	nk->mode = mode;
653 	nk->pair = pair;
654 	nk->headroom = headroom;
655 	bpf_mprog_bundle_init(&nk->bundle);
656 
657 	if (pair == NETKIT_DEVICE_SINGLE)
658 		xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK);
659 
660 	err = register_netdevice(dev);
661 	if (err < 0)
662 		goto err_configure_peer;
663 	netif_carrier_off(dev);
664 	if (mode == NETKIT_L2)
665 		dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
666 
667 	rcu_assign_pointer(netkit_priv(dev)->peer, peer);
668 	if (peer)
669 		rcu_assign_pointer(netkit_priv(peer)->peer, dev);
670 	return 0;
671 err_configure_peer:
672 	if (peer)
673 		unregister_netdevice(peer);
674 	return err;
675 err_register_peer:
676 	free_netdev(peer);
677 	return err;
678 }
679 
680 static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev,
681 						  bool bundle_fallback)
682 {
683 	struct netkit *nk = netkit_priv(dev);
684 	struct bpf_mprog_entry *entry;
685 
686 	ASSERT_RTNL();
687 	entry = rcu_dereference_rtnl(nk->active);
688 	if (entry)
689 		return entry;
690 	if (bundle_fallback)
691 		return &nk->bundle.a;
692 	return NULL;
693 }
694 
695 static void netkit_entry_update(struct net_device *dev,
696 				struct bpf_mprog_entry *entry)
697 {
698 	struct netkit *nk = netkit_priv(dev);
699 
700 	ASSERT_RTNL();
701 	rcu_assign_pointer(nk->active, entry);
702 }
703 
704 static void netkit_entry_sync(void)
705 {
706 	synchronize_rcu();
707 }
708 
709 static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which)
710 {
711 	struct net_device *dev;
712 	struct netkit *nk;
713 
714 	ASSERT_RTNL();
715 
716 	switch (which) {
717 	case BPF_NETKIT_PRIMARY:
718 	case BPF_NETKIT_PEER:
719 		break;
720 	default:
721 		return ERR_PTR(-EINVAL);
722 	}
723 
724 	dev = __dev_get_by_index(net, ifindex);
725 	if (!dev)
726 		return ERR_PTR(-ENODEV);
727 	if (dev->netdev_ops != &netkit_netdev_ops)
728 		return ERR_PTR(-ENXIO);
729 
730 	nk = netkit_priv(dev);
731 	if (!nk->primary)
732 		return ERR_PTR(-EACCES);
733 	if (nk->pair == NETKIT_DEVICE_SINGLE)
734 		return ERR_PTR(-EOPNOTSUPP);
735 	if (which == BPF_NETKIT_PEER) {
736 		dev = rcu_dereference_rtnl(nk->peer);
737 		if (!dev)
738 			return ERR_PTR(-ENODEV);
739 	}
740 	return dev;
741 }
742 
743 int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
744 {
745 	struct bpf_mprog_entry *entry, *entry_new;
746 	struct bpf_prog *replace_prog = NULL;
747 	struct net_device *dev;
748 	int ret;
749 
750 	rtnl_lock();
751 	dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
752 			       attr->attach_type);
753 	if (IS_ERR(dev)) {
754 		ret = PTR_ERR(dev);
755 		goto out;
756 	}
757 	entry = netkit_entry_fetch(dev, true);
758 	if (attr->attach_flags & BPF_F_REPLACE) {
759 		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
760 						 prog->type);
761 		if (IS_ERR(replace_prog)) {
762 			ret = PTR_ERR(replace_prog);
763 			replace_prog = NULL;
764 			goto out;
765 		}
766 	}
767 	ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
768 			       attr->attach_flags, attr->relative_fd,
769 			       attr->expected_revision);
770 	if (!ret) {
771 		if (entry != entry_new) {
772 			netkit_entry_update(dev, entry_new);
773 			netkit_entry_sync();
774 		}
775 		bpf_mprog_commit(entry);
776 	}
777 out:
778 	if (replace_prog)
779 		bpf_prog_put(replace_prog);
780 	rtnl_unlock();
781 	return ret;
782 }
783 
784 int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
785 {
786 	struct bpf_mprog_entry *entry, *entry_new;
787 	struct net_device *dev;
788 	int ret;
789 
790 	rtnl_lock();
791 	dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
792 			       attr->attach_type);
793 	if (IS_ERR(dev)) {
794 		ret = PTR_ERR(dev);
795 		goto out;
796 	}
797 	entry = netkit_entry_fetch(dev, false);
798 	if (!entry) {
799 		ret = -ENOENT;
800 		goto out;
801 	}
802 	ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
803 			       attr->relative_fd, attr->expected_revision);
804 	if (!ret) {
805 		if (!bpf_mprog_total(entry_new))
806 			entry_new = NULL;
807 		netkit_entry_update(dev, entry_new);
808 		netkit_entry_sync();
809 		bpf_mprog_commit(entry);
810 	}
811 out:
812 	rtnl_unlock();
813 	return ret;
814 }
815 
816 int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
817 {
818 	struct net_device *dev;
819 	int ret;
820 
821 	rtnl_lock();
822 	dev = netkit_dev_fetch(current->nsproxy->net_ns,
823 			       attr->query.target_ifindex,
824 			       attr->query.attach_type);
825 	if (IS_ERR(dev)) {
826 		ret = PTR_ERR(dev);
827 		goto out;
828 	}
829 	ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false));
830 out:
831 	rtnl_unlock();
832 	return ret;
833 }
834 
835 static struct netkit_link *netkit_link(const struct bpf_link *link)
836 {
837 	return container_of(link, struct netkit_link, link);
838 }
839 
840 static int netkit_link_prog_attach(struct bpf_link *link, u32 flags,
841 				   u32 id_or_fd, u64 revision)
842 {
843 	struct netkit_link *nkl = netkit_link(link);
844 	struct bpf_mprog_entry *entry, *entry_new;
845 	struct net_device *dev = nkl->dev;
846 	int ret;
847 
848 	ASSERT_RTNL();
849 	entry = netkit_entry_fetch(dev, true);
850 	ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
851 			       id_or_fd, revision);
852 	if (!ret) {
853 		if (entry != entry_new) {
854 			netkit_entry_update(dev, entry_new);
855 			netkit_entry_sync();
856 		}
857 		bpf_mprog_commit(entry);
858 	}
859 	return ret;
860 }
861 
862 static void netkit_link_release(struct bpf_link *link)
863 {
864 	struct netkit_link *nkl = netkit_link(link);
865 	struct bpf_mprog_entry *entry, *entry_new;
866 	struct net_device *dev;
867 	int ret = 0;
868 
869 	rtnl_lock();
870 	dev = nkl->dev;
871 	if (!dev)
872 		goto out;
873 	entry = netkit_entry_fetch(dev, false);
874 	if (!entry) {
875 		ret = -ENOENT;
876 		goto out;
877 	}
878 	ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
879 	if (!ret) {
880 		if (!bpf_mprog_total(entry_new))
881 			entry_new = NULL;
882 		netkit_entry_update(dev, entry_new);
883 		netkit_entry_sync();
884 		bpf_mprog_commit(entry);
885 		nkl->dev = NULL;
886 	}
887 out:
888 	WARN_ON_ONCE(ret);
889 	rtnl_unlock();
890 }
891 
892 static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog,
893 			      struct bpf_prog *oprog)
894 {
895 	struct netkit_link *nkl = netkit_link(link);
896 	struct bpf_mprog_entry *entry, *entry_new;
897 	struct net_device *dev;
898 	int ret = 0;
899 
900 	rtnl_lock();
901 	dev = nkl->dev;
902 	if (!dev) {
903 		ret = -ENOLINK;
904 		goto out;
905 	}
906 	if (oprog && link->prog != oprog) {
907 		ret = -EPERM;
908 		goto out;
909 	}
910 	oprog = link->prog;
911 	if (oprog == nprog) {
912 		bpf_prog_put(nprog);
913 		goto out;
914 	}
915 	entry = netkit_entry_fetch(dev, false);
916 	if (!entry) {
917 		ret = -ENOENT;
918 		goto out;
919 	}
920 	ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
921 			       BPF_F_REPLACE | BPF_F_ID,
922 			       link->prog->aux->id, 0);
923 	if (!ret) {
924 		WARN_ON_ONCE(entry != entry_new);
925 		oprog = xchg(&link->prog, nprog);
926 		bpf_prog_put(oprog);
927 		bpf_mprog_commit(entry);
928 	}
929 out:
930 	rtnl_unlock();
931 	return ret;
932 }
933 
934 static void netkit_link_dealloc(struct bpf_link *link)
935 {
936 	kfree(netkit_link(link));
937 }
938 
939 static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
940 {
941 	const struct netkit_link *nkl = netkit_link(link);
942 	u32 ifindex = 0;
943 
944 	rtnl_lock();
945 	if (nkl->dev)
946 		ifindex = nkl->dev->ifindex;
947 	rtnl_unlock();
948 
949 	seq_printf(seq, "ifindex:\t%u\n", ifindex);
950 	seq_printf(seq, "attach_type:\t%u (%s)\n",
951 		   link->attach_type,
952 		   link->attach_type == BPF_NETKIT_PRIMARY ? "primary" : "peer");
953 }
954 
955 static int netkit_link_fill_info(const struct bpf_link *link,
956 				 struct bpf_link_info *info)
957 {
958 	const struct netkit_link *nkl = netkit_link(link);
959 	u32 ifindex = 0;
960 
961 	rtnl_lock();
962 	if (nkl->dev)
963 		ifindex = nkl->dev->ifindex;
964 	rtnl_unlock();
965 
966 	info->netkit.ifindex = ifindex;
967 	info->netkit.attach_type = link->attach_type;
968 	return 0;
969 }
970 
971 static int netkit_link_detach(struct bpf_link *link)
972 {
973 	netkit_link_release(link);
974 	return 0;
975 }
976 
977 static const struct bpf_link_ops netkit_link_lops = {
978 	.release	= netkit_link_release,
979 	.detach		= netkit_link_detach,
980 	.dealloc	= netkit_link_dealloc,
981 	.update_prog	= netkit_link_update,
982 	.show_fdinfo	= netkit_link_fdinfo,
983 	.fill_link_info	= netkit_link_fill_info,
984 };
985 
986 static int netkit_link_init(struct netkit_link *nkl,
987 			    struct bpf_link_primer *link_primer,
988 			    const union bpf_attr *attr,
989 			    struct net_device *dev,
990 			    struct bpf_prog *prog)
991 {
992 	bpf_link_init(&nkl->link, BPF_LINK_TYPE_NETKIT,
993 		      &netkit_link_lops, prog, attr->link_create.attach_type);
994 	nkl->dev = dev;
995 	return bpf_link_prime(&nkl->link, link_primer);
996 }
997 
998 int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
999 {
1000 	struct bpf_link_primer link_primer;
1001 	struct netkit_link *nkl;
1002 	struct net_device *dev;
1003 	int ret;
1004 
1005 	rtnl_lock();
1006 	dev = netkit_dev_fetch(current->nsproxy->net_ns,
1007 			       attr->link_create.target_ifindex,
1008 			       attr->link_create.attach_type);
1009 	if (IS_ERR(dev)) {
1010 		ret = PTR_ERR(dev);
1011 		goto out;
1012 	}
1013 	nkl = kzalloc_obj(*nkl, GFP_KERNEL_ACCOUNT);
1014 	if (!nkl) {
1015 		ret = -ENOMEM;
1016 		goto out;
1017 	}
1018 	ret = netkit_link_init(nkl, &link_primer, attr, dev, prog);
1019 	if (ret) {
1020 		kfree(nkl);
1021 		goto out;
1022 	}
1023 	ret = netkit_link_prog_attach(&nkl->link,
1024 				      attr->link_create.flags,
1025 				      attr->link_create.netkit.relative_fd,
1026 				      attr->link_create.netkit.expected_revision);
1027 	if (ret) {
1028 		nkl->dev = NULL;
1029 		bpf_link_cleanup(&link_primer);
1030 		goto out;
1031 	}
1032 	ret = bpf_link_settle(&link_primer);
1033 out:
1034 	rtnl_unlock();
1035 	return ret;
1036 }
1037 
1038 static void netkit_release_all(struct net_device *dev)
1039 {
1040 	struct bpf_mprog_entry *entry;
1041 	struct bpf_tuple tuple = {};
1042 	struct bpf_mprog_fp *fp;
1043 	struct bpf_mprog_cp *cp;
1044 
1045 	entry = netkit_entry_fetch(dev, false);
1046 	if (!entry)
1047 		return;
1048 	netkit_entry_update(dev, NULL);
1049 	netkit_entry_sync();
1050 	bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
1051 		if (tuple.link)
1052 			netkit_link(tuple.link)->dev = NULL;
1053 		else
1054 			bpf_prog_put(tuple.prog);
1055 	}
1056 }
1057 
1058 static void netkit_uninit(struct net_device *dev)
1059 {
1060 	netkit_release_all(dev);
1061 	netkit_queue_unlease(dev);
1062 }
1063 
1064 static void netkit_del_link(struct net_device *dev, struct list_head *head)
1065 {
1066 	struct netkit *nk = netkit_priv(dev);
1067 	struct net_device *peer = rtnl_dereference(nk->peer);
1068 
1069 	RCU_INIT_POINTER(nk->peer, NULL);
1070 	unregister_netdevice_queue(dev, head);
1071 	if (peer) {
1072 		nk = netkit_priv(peer);
1073 		RCU_INIT_POINTER(nk->peer, NULL);
1074 		/* Guard against the peer already being in an unregister
1075 		 * list (e.g. same-namespace teardown where the peer is
1076 		 * in the caller's dev_kill_list). list_move_tail() on an
1077 		 * already-queued device would otherwise corrupt that
1078 		 * list's iteration. This situation can occur via netkit
1079 		 * notifier, hence guard against this scenario.
1080 		 */
1081 		if (!unregister_netdevice_queued(peer))
1082 			unregister_netdevice_queue(peer, head);
1083 	}
1084 }
1085 
1086 static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
1087 			      struct nlattr *data[],
1088 			      struct netlink_ext_ack *extack)
1089 {
1090 	struct netkit *nk = netkit_priv(dev);
1091 	struct net_device *peer = rtnl_dereference(nk->peer);
1092 	enum netkit_action policy;
1093 	struct nlattr *attr;
1094 	int err, i;
1095 	static const struct {
1096 		u32 attr;
1097 		char *name;
1098 	} fixed_params[] = {
1099 		{ IFLA_NETKIT_MODE,       "operating mode" },
1100 		{ IFLA_NETKIT_SCRUB,      "scrubbing" },
1101 		{ IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" },
1102 		{ IFLA_NETKIT_PEER_INFO,  "peer info" },
1103 		{ IFLA_NETKIT_HEADROOM,   "headroom" },
1104 		{ IFLA_NETKIT_TAILROOM,   "tailroom" },
1105 		{ IFLA_NETKIT_PAIRING,    "pairing" },
1106 	};
1107 
1108 	if (!nk->primary) {
1109 		NL_SET_ERR_MSG(extack,
1110 			       "netkit link settings can be changed only through the primary device");
1111 		return -EACCES;
1112 	}
1113 
1114 	for (i = 0; i < ARRAY_SIZE(fixed_params); i++) {
1115 		attr = data[fixed_params[i].attr];
1116 		if (attr) {
1117 			NL_SET_ERR_MSG_ATTR_FMT(extack, attr,
1118 						"netkit link %s cannot be changed after device creation",
1119 						fixed_params[i].name);
1120 			return -EACCES;
1121 		}
1122 	}
1123 
1124 	if (data[IFLA_NETKIT_POLICY]) {
1125 		err = -EOPNOTSUPP;
1126 		attr = data[IFLA_NETKIT_POLICY];
1127 		policy = nla_get_u32(attr);
1128 		if (nk->pair == NETKIT_DEVICE_PAIR)
1129 			err = netkit_check_policy(policy, attr, extack);
1130 		if (err)
1131 			return err;
1132 		WRITE_ONCE(nk->policy, policy);
1133 	}
1134 
1135 	if (data[IFLA_NETKIT_PEER_POLICY]) {
1136 		err = -EOPNOTSUPP;
1137 		attr = data[IFLA_NETKIT_PEER_POLICY];
1138 		policy = nla_get_u32(attr);
1139 		if (peer)
1140 			err = netkit_check_policy(policy, attr, extack);
1141 		if (err)
1142 			return err;
1143 		nk = netkit_priv(peer);
1144 		WRITE_ONCE(nk->policy, policy);
1145 	}
1146 
1147 	return 0;
1148 }
1149 
1150 static void netkit_check_lease_unregister(struct net_device *dev)
1151 {
1152 	LIST_HEAD(list_kill);
1153 	u32 q_idx;
1154 
1155 	if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING ||
1156 	    !dev->dev.parent)
1157 		return;
1158 
1159 	netdev_lock_ops(dev);
1160 	for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
1161 		struct net_device *tmp = dev;
1162 		struct netdev_rx_queue *rxq;
1163 		u32 tmp_q_idx = q_idx;
1164 
1165 		rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx,
1166 						 NETIF_PHYS_TO_VIRT);
1167 		if (rxq && tmp != dev &&
1168 		    tmp->netdev_ops == &netkit_netdev_ops) {
1169 			/* A single phys device can have multiple queues leased
1170 			 * to one netkit device. We can only queue that netkit
1171 			 * device once to the list_kill. Queues of that phys
1172 			 * device can be leased with different individual netkit
1173 			 * devices, hence we batch via list_kill.
1174 			 */
1175 			if (unregister_netdevice_queued(tmp))
1176 				continue;
1177 			netkit_del_link(tmp, &list_kill);
1178 		}
1179 	}
1180 	netdev_unlock_ops(dev);
1181 	unregister_netdevice_many(&list_kill);
1182 }
1183 
1184 static int netkit_notifier(struct notifier_block *this,
1185 			   unsigned long event, void *ptr)
1186 {
1187 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1188 
1189 	if (event == NETDEV_UNREGISTER)
1190 		netkit_check_lease_unregister(dev);
1191 	return NOTIFY_DONE;
1192 }
1193 
1194 static size_t netkit_get_size(const struct net_device *dev)
1195 {
1196 	return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
1197 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
1198 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
1199 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
1200 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
1201 	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
1202 	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
1203 	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
1204 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */
1205 	       0;
1206 }
1207 
1208 static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
1209 {
1210 	struct netkit *nk = netkit_priv(dev);
1211 	struct net_device *peer = rtnl_dereference(nk->peer);
1212 
1213 	if (nla_put_u8(skb, IFLA_NETKIT_PRIMARY, nk->primary))
1214 		return -EMSGSIZE;
1215 	if (nla_put_u32(skb, IFLA_NETKIT_POLICY, nk->policy))
1216 		return -EMSGSIZE;
1217 	if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
1218 		return -EMSGSIZE;
1219 	if (nk->pair == NETKIT_DEVICE_PAIR &&
1220 	    nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
1221 		return -EMSGSIZE;
1222 	if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom))
1223 		return -EMSGSIZE;
1224 	if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
1225 		return -EMSGSIZE;
1226 	if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair))
1227 		return -EMSGSIZE;
1228 
1229 	if (peer) {
1230 		nk = netkit_priv(peer);
1231 		if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
1232 			return -EMSGSIZE;
1233 		if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
1234 			return -EMSGSIZE;
1235 	}
1236 
1237 	return 0;
1238 }
1239 
1240 static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
1241 	[IFLA_NETKIT_PEER_INFO]		= { .len = sizeof(struct ifinfomsg) },
1242 	[IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
1243 	[IFLA_NETKIT_POLICY]		= { .type = NLA_U32 },
1244 	[IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 },
1245 	[IFLA_NETKIT_HEADROOM]		= { .type = NLA_U16 },
1246 	[IFLA_NETKIT_TAILROOM]		= { .type = NLA_U16 },
1247 	[IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
1248 	[IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
1249 	[IFLA_NETKIT_PAIRING]		= NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE),
1250 	[IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT,
1251 					    .reject_message = "Primary attribute is read-only" },
1252 };
1253 
1254 static struct rtnl_link_ops netkit_link_ops = {
1255 	.kind		= NETKIT_DRV_NAME,
1256 	.priv_size	= sizeof(struct netkit),
1257 	.alloc		= netkit_alloc,
1258 	.setup		= netkit_setup,
1259 	.newlink	= netkit_new_link,
1260 	.dellink	= netkit_del_link,
1261 	.changelink	= netkit_change_link,
1262 	.get_link_net	= netkit_get_link_net,
1263 	.get_size	= netkit_get_size,
1264 	.fill_info	= netkit_fill_info,
1265 	.policy		= netkit_policy,
1266 	.validate	= netkit_validate,
1267 	.peer_type	= IFLA_NETKIT_PEER_INFO,
1268 	.maxtype	= IFLA_NETKIT_MAX,
1269 };
1270 
1271 static struct notifier_block netkit_netdev_notifier = {
1272 	.notifier_call	= netkit_notifier,
1273 };
1274 
1275 static __init int netkit_mod_init(void)
1276 {
1277 	int ret;
1278 
1279 	BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
1280 		     (int)NETKIT_PASS != (int)TCX_PASS ||
1281 		     (int)NETKIT_DROP != (int)TCX_DROP ||
1282 		     (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
1283 
1284 	ret = rtnl_link_register(&netkit_link_ops);
1285 	if (ret)
1286 		return ret;
1287 	ret = register_netdevice_notifier(&netkit_netdev_notifier);
1288 	if (ret)
1289 		rtnl_link_unregister(&netkit_link_ops);
1290 	return ret;
1291 }
1292 
1293 static __exit void netkit_mod_exit(void)
1294 {
1295 	unregister_netdevice_notifier(&netkit_netdev_notifier);
1296 	rtnl_link_unregister(&netkit_link_ops);
1297 }
1298 
1299 module_init(netkit_mod_init);
1300 module_exit(netkit_mod_exit);
1301 
1302 MODULE_DESCRIPTION("BPF-programmable network device");
1303 MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
1304 MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
1305 MODULE_LICENSE("GPL");
1306 MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
1307