xref: /linux/drivers/net/netkit.c (revision 05e352444b2430de4b183b4a988085381e5fd6ad)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2023 Isovalent */
3 
4 #include <linux/netdevice.h>
5 #include <linux/ethtool.h>
6 #include <linux/etherdevice.h>
7 #include <linux/filter.h>
8 #include <linux/netfilter_netdev.h>
9 #include <linux/bpf_mprog.h>
10 #include <linux/indirect_call_wrapper.h>
11 
12 #include <net/netdev_lock.h>
13 #include <net/netdev_queues.h>
14 #include <net/netdev_rx_queue.h>
15 #include <net/xdp_sock_drv.h>
16 #include <net/netkit.h>
17 #include <net/dst.h>
18 #include <net/tcx.h>
19 
20 #define NETKIT_DRV_NAME	"netkit"
21 
22 #define NETKIT_NUM_RX_QUEUES_MAX  1024
23 #define NETKIT_NUM_TX_QUEUES_MAX  1
24 
25 #define NETKIT_NUM_RX_QUEUES_REAL 1
26 #define NETKIT_NUM_TX_QUEUES_REAL 1
27 
28 struct netkit {
29 	__cacheline_group_begin(netkit_fastpath);
30 	struct net_device __rcu *peer;
31 	struct bpf_mprog_entry __rcu *active;
32 	enum netkit_action policy;
33 	enum netkit_scrub scrub;
34 	struct bpf_mprog_bundle	bundle;
35 	__cacheline_group_end(netkit_fastpath);
36 
37 	__cacheline_group_begin(netkit_slowpath);
38 	enum netkit_mode mode;
39 	enum netkit_pairing pair;
40 	bool primary;
41 	u32 headroom;
42 	__cacheline_group_end(netkit_slowpath);
43 };
44 
45 struct netkit_link {
46 	struct bpf_link link;
47 	struct net_device *dev;
48 };
49 
50 static struct rtnl_link_ops netkit_link_ops;
51 
52 static __always_inline int
53 netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
54 	   enum netkit_action ret)
55 {
56 	const struct bpf_mprog_fp *fp;
57 	const struct bpf_prog *prog;
58 
59 	bpf_mprog_foreach_prog(entry, fp, prog) {
60 		bpf_compute_data_pointers(skb);
61 		ret = bpf_prog_run(prog, skb);
62 		if (ret != NETKIT_NEXT)
63 			break;
64 	}
65 	return ret;
66 }
67 
68 static void netkit_xnet(struct sk_buff *skb)
69 {
70 	skb->priority = 0;
71 	skb->mark = 0;
72 }
73 
74 static void netkit_prep_forward(struct sk_buff *skb,
75 				bool xnet, bool xnet_scrub)
76 {
77 	skb_scrub_packet(skb, false);
78 	nf_skip_egress(skb, true);
79 	skb_reset_mac_header(skb);
80 	if (!xnet)
81 		return;
82 	skb_clear_tstamp(skb);
83 	if (xnet_scrub)
84 		netkit_xnet(skb);
85 }
86 
87 static struct netkit *netkit_priv(const struct net_device *dev)
88 {
89 	return netdev_priv(dev);
90 }
91 
92 static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
93 {
94 	struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
95 	struct netkit *nk = netkit_priv(dev);
96 	enum netkit_action ret = READ_ONCE(nk->policy);
97 	netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
98 	const struct bpf_mprog_entry *entry;
99 	struct net_device *peer;
100 	int len = skb->len;
101 
102 	bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
103 	rcu_read_lock();
104 	peer = rcu_dereference(nk->peer);
105 	if (unlikely(!peer || !(peer->flags & IFF_UP) ||
106 		     !pskb_may_pull(skb, ETH_HLEN) ||
107 		     skb_orphan_frags(skb, GFP_ATOMIC)))
108 		goto drop;
109 	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
110 			    nk->scrub);
111 	eth_skb_pkt_type(skb, peer);
112 	skb->dev = peer;
113 	entry = rcu_dereference(nk->active);
114 	if (entry)
115 		ret = netkit_run(entry, skb, ret);
116 	switch (ret) {
117 	case NETKIT_NEXT:
118 	case NETKIT_PASS:
119 		eth_skb_pull_mac(skb);
120 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
121 		if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
122 			dev_sw_netstats_tx_add(dev, 1, len);
123 			dev_sw_netstats_rx_add(peer, len);
124 		} else {
125 			goto drop_stats;
126 		}
127 		break;
128 	case NETKIT_REDIRECT:
129 		dev_sw_netstats_tx_add(dev, 1, len);
130 		skb_do_redirect(skb);
131 		break;
132 	case NETKIT_DROP:
133 	default:
134 drop:
135 		kfree_skb(skb);
136 drop_stats:
137 		dev_core_stats_tx_dropped_inc(dev);
138 		ret_dev = NET_XMIT_DROP;
139 		break;
140 	}
141 	rcu_read_unlock();
142 	bpf_net_ctx_clear(bpf_net_ctx);
143 	return ret_dev;
144 }
145 
146 static int netkit_open(struct net_device *dev)
147 {
148 	struct netkit *nk = netkit_priv(dev);
149 	struct net_device *peer = rtnl_dereference(nk->peer);
150 
151 	if (nk->pair == NETKIT_DEVICE_SINGLE) {
152 		netif_carrier_on(dev);
153 		return 0;
154 	}
155 	if (!peer)
156 		return -ENOTCONN;
157 	if (peer->flags & IFF_UP) {
158 		netif_carrier_on(dev);
159 		netif_carrier_on(peer);
160 	}
161 	return 0;
162 }
163 
164 static int netkit_close(struct net_device *dev)
165 {
166 	struct netkit *nk = netkit_priv(dev);
167 	struct net_device *peer = rtnl_dereference(nk->peer);
168 
169 	netif_carrier_off(dev);
170 	if (peer)
171 		netif_carrier_off(peer);
172 	return 0;
173 }
174 
175 static int netkit_get_iflink(const struct net_device *dev)
176 {
177 	struct netkit *nk = netkit_priv(dev);
178 	struct net_device *peer;
179 	int iflink = 0;
180 
181 	rcu_read_lock();
182 	peer = rcu_dereference(nk->peer);
183 	if (peer)
184 		iflink = READ_ONCE(peer->ifindex);
185 	rcu_read_unlock();
186 	return iflink;
187 }
188 
189 static void netkit_set_multicast(struct net_device *dev)
190 {
191 	/* Nothing to do, we receive whatever gets pushed to us! */
192 }
193 
194 static int netkit_set_macaddr(struct net_device *dev, void *sa)
195 {
196 	struct netkit *nk = netkit_priv(dev);
197 
198 	if (nk->mode != NETKIT_L2)
199 		return -EOPNOTSUPP;
200 
201 	return eth_mac_addr(dev, sa);
202 }
203 
204 static void netkit_set_headroom(struct net_device *dev, int headroom)
205 {
206 	struct netkit *nk = netkit_priv(dev), *nk2;
207 	struct net_device *peer;
208 
209 	if (headroom < 0)
210 		headroom = NET_SKB_PAD;
211 
212 	rcu_read_lock();
213 	peer = rcu_dereference(nk->peer);
214 	if (!peer) {
215 		nk->headroom = headroom;
216 		dev->needed_headroom = headroom;
217 	} else {
218 		nk2 = netkit_priv(peer);
219 		nk->headroom = headroom;
220 		headroom = max(nk->headroom, nk2->headroom);
221 
222 		peer->needed_headroom = headroom;
223 		dev->needed_headroom = headroom;
224 	}
225 	rcu_read_unlock();
226 }
227 
228 INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
229 {
230 	return rcu_dereference(netkit_priv(dev)->peer);
231 }
232 
233 static void netkit_get_stats(struct net_device *dev,
234 			     struct rtnl_link_stats64 *stats)
235 {
236 	dev_fetch_sw_netstats(stats, dev->tstats);
237 	stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
238 }
239 
240 static bool netkit_xsk_supported_at_phys(const struct net_device *dev)
241 {
242 	if (!dev->netdev_ops->ndo_bpf ||
243 	    !dev->netdev_ops->ndo_xdp_xmit ||
244 	    !dev->netdev_ops->ndo_xsk_wakeup)
245 		return false;
246 	return true;
247 }
248 
249 static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp)
250 {
251 	struct netkit *nk = netkit_priv(dev);
252 	struct netdev_bpf xdp_lower;
253 	struct netdev_rx_queue *rxq;
254 	struct net_device *phys;
255 	bool create = false;
256 	int ret = -EBUSY;
257 
258 	switch (xdp->command) {
259 	case XDP_SETUP_XSK_POOL:
260 		if (nk->pair == NETKIT_DEVICE_PAIR)
261 			return -EOPNOTSUPP;
262 		if (xdp->xsk.queue_id >= dev->real_num_rx_queues)
263 			return -EINVAL;
264 
265 		rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id);
266 		if (!rxq->lease)
267 			return -EOPNOTSUPP;
268 
269 		phys = rxq->lease->dev;
270 		if (!netkit_xsk_supported_at_phys(phys))
271 			return -EOPNOTSUPP;
272 
273 		create = xdp->xsk.pool;
274 		memcpy(&xdp_lower, xdp, sizeof(xdp_lower));
275 		xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease);
276 		break;
277 	case XDP_SETUP_PROG:
278 		return -EOPNOTSUPP;
279 	default:
280 		return -EINVAL;
281 	}
282 
283 	netdev_lock(phys);
284 	if (create &&
285 	    (phys->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) {
286 		ret = -EOPNOTSUPP;
287 		goto out;
288 	}
289 	if (!create || !dev_get_min_mp_channel_count(phys))
290 		ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower);
291 out:
292 	netdev_unlock(phys);
293 	return ret;
294 }
295 
296 static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
297 {
298 	struct netdev_rx_queue *rxq, *rxq_lease;
299 	struct net_device *phys;
300 
301 	if (queue_id >= dev->real_num_rx_queues)
302 		return -EINVAL;
303 
304 	rxq = __netif_get_rx_queue(dev, queue_id);
305 	rxq_lease = READ_ONCE(rxq->lease);
306 	if (unlikely(!rxq_lease))
307 		return -EOPNOTSUPP;
308 
309 	/* netkit_xsk already validated full xsk support, hence it's
310 	 * fine to call into ndo_xsk_wakeup right away given this
311 	 * was a prerequisite to get here in the first place. The
312 	 * phys xsk support cannot change without tearing down the
313 	 * device (which clears the lease first).
314 	 */
315 	phys = rxq_lease->dev;
316 	return phys->netdev_ops->ndo_xsk_wakeup(phys,
317 			get_netdev_rx_queue_index(rxq_lease), flags);
318 }
319 
320 static int netkit_init(struct net_device *dev)
321 {
322 	netdev_lockdep_set_classes(dev);
323 	return 0;
324 }
325 
326 static void netkit_uninit(struct net_device *dev);
327 
328 static const struct net_device_ops netkit_netdev_ops = {
329 	.ndo_init		= netkit_init,
330 	.ndo_open		= netkit_open,
331 	.ndo_stop		= netkit_close,
332 	.ndo_start_xmit		= netkit_xmit,
333 	.ndo_set_rx_mode	= netkit_set_multicast,
334 	.ndo_set_rx_headroom	= netkit_set_headroom,
335 	.ndo_set_mac_address	= netkit_set_macaddr,
336 	.ndo_get_iflink		= netkit_get_iflink,
337 	.ndo_get_peer_dev	= netkit_peer_dev,
338 	.ndo_get_stats64	= netkit_get_stats,
339 	.ndo_uninit		= netkit_uninit,
340 	.ndo_bpf		= netkit_xsk,
341 	.ndo_xsk_wakeup		= netkit_xsk_wakeup,
342 	.ndo_features_check	= passthru_features_check,
343 };
344 
345 static void netkit_get_drvinfo(struct net_device *dev,
346 			       struct ethtool_drvinfo *info)
347 {
348 	strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
349 }
350 
351 static const struct ethtool_ops netkit_ethtool_ops = {
352 	.get_drvinfo		= netkit_get_drvinfo,
353 };
354 
355 static int netkit_queue_create(struct net_device *dev,
356 			       struct netlink_ext_ack *extack)
357 {
358 	struct netkit *nk = netkit_priv(dev);
359 	u32 rxq_count_old, rxq_count_new;
360 	int err;
361 
362 	rxq_count_old = dev->real_num_rx_queues;
363 	rxq_count_new = rxq_count_old + 1;
364 
365 	/* In paired mode, only the non-primary (peer) device can
366 	 * create leased queues since the primary is the management
367 	 * side. In single device mode, leasing is always allowed.
368 	 */
369 	if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) {
370 		NL_SET_ERR_MSG(extack,
371 			       "netkit can only lease against the peer device");
372 		return -EOPNOTSUPP;
373 	}
374 
375 	err = netif_set_real_num_rx_queues(dev, rxq_count_new);
376 	if (err) {
377 		if (rxq_count_new > dev->num_rx_queues)
378 			NL_SET_ERR_MSG(extack,
379 				       "netkit maximum queue limit reached");
380 		else
381 			NL_SET_ERR_MSG_FMT(extack,
382 					   "netkit cannot create more queues err=%d", err);
383 		return err;
384 	}
385 
386 	return rxq_count_old;
387 }
388 
389 static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
390 	.ndo_queue_create	= netkit_queue_create,
391 };
392 
393 static struct net_device *netkit_alloc(struct nlattr *tb[],
394 				       const char *ifname,
395 				       unsigned char name_assign_type,
396 				       unsigned int num_tx_queues,
397 				       unsigned int num_rx_queues)
398 {
399 	const struct rtnl_link_ops *ops = &netkit_link_ops;
400 	struct net_device *dev;
401 
402 	if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
403 	    num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
404 		return ERR_PTR(-EOPNOTSUPP);
405 
406 	dev = alloc_netdev_mqs(ops->priv_size, ifname,
407 			       name_assign_type, ops->setup,
408 			       num_tx_queues, num_rx_queues);
409 	if (dev) {
410 		dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
411 		dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
412 	}
413 	return dev;
414 }
415 
416 static void netkit_queue_unlease(struct net_device *dev)
417 {
418 	struct netdev_rx_queue *rxq, *rxq_lease;
419 	struct net_device *dev_lease;
420 	int i;
421 
422 	if (dev->real_num_rx_queues == 1)
423 		return;
424 
425 	netdev_lock(dev);
426 	for (i = 1; i < dev->real_num_rx_queues; i++) {
427 		rxq = __netif_get_rx_queue(dev, i);
428 		rxq_lease = rxq->lease;
429 		dev_lease = rxq_lease->dev;
430 
431 		netdev_lock(dev_lease);
432 		netdev_rx_queue_unlease(rxq, rxq_lease);
433 		netdev_unlock(dev_lease);
434 	}
435 	netdev_unlock(dev);
436 }
437 
438 static void netkit_setup(struct net_device *dev)
439 {
440 	static const netdev_features_t netkit_features_hw_vlan =
441 		NETIF_F_HW_VLAN_CTAG_TX |
442 		NETIF_F_HW_VLAN_CTAG_RX |
443 		NETIF_F_HW_VLAN_STAG_TX |
444 		NETIF_F_HW_VLAN_STAG_RX;
445 	static const netdev_features_t netkit_features =
446 		netkit_features_hw_vlan |
447 		NETIF_F_SG |
448 		NETIF_F_FRAGLIST |
449 		NETIF_F_HW_CSUM |
450 		NETIF_F_RXCSUM |
451 		NETIF_F_SCTP_CRC |
452 		NETIF_F_HIGHDMA |
453 		NETIF_F_GSO_SOFTWARE |
454 		NETIF_F_GSO_ENCAP_ALL;
455 
456 	ether_setup(dev);
457 	dev->max_mtu = ETH_MAX_MTU;
458 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
459 
460 	dev->flags |= IFF_NOARP;
461 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
462 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
463 	dev->priv_flags |= IFF_PHONY_HEADROOM;
464 	dev->priv_flags |= IFF_NO_QUEUE;
465 	dev->priv_flags |= IFF_DISABLE_NETPOLL;
466 	dev->lltx = true;
467 
468 	dev->netdev_ops     = &netkit_netdev_ops;
469 	dev->ethtool_ops    = &netkit_ethtool_ops;
470 	dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
471 
472 	dev->features |= netkit_features;
473 	dev->hw_features = netkit_features;
474 	dev->hw_enc_features = netkit_features;
475 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
476 	dev->vlan_features = dev->features & ~netkit_features_hw_vlan;
477 
478 	dev->needs_free_netdev = true;
479 
480 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
481 }
482 
483 static struct net *netkit_get_link_net(const struct net_device *dev)
484 {
485 	struct netkit *nk = netkit_priv(dev);
486 	struct net_device *peer = rtnl_dereference(nk->peer);
487 
488 	return peer ? dev_net(peer) : dev_net(dev);
489 }
490 
491 static int netkit_check_policy(int policy, struct nlattr *tb,
492 			       struct netlink_ext_ack *extack)
493 {
494 	switch (policy) {
495 	case NETKIT_PASS:
496 	case NETKIT_DROP:
497 		return 0;
498 	default:
499 		NL_SET_ERR_MSG_ATTR(extack, tb,
500 				    "Provided default xmit policy not supported");
501 		return -EINVAL;
502 	}
503 }
504 
505 static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
506 			   struct netlink_ext_ack *extack)
507 {
508 	struct nlattr *attr = tb[IFLA_ADDRESS];
509 
510 	if (!attr)
511 		return 0;
512 	if (nla_len(attr) != ETH_ALEN)
513 		return -EINVAL;
514 	if (!is_valid_ether_addr(nla_data(attr)))
515 		return -EADDRNOTAVAIL;
516 	return 0;
517 }
518 
519 static int netkit_new_link(struct net_device *dev,
520 			   struct rtnl_newlink_params *params,
521 			   struct netlink_ext_ack *extack)
522 {
523 	struct net *peer_net = rtnl_newlink_peer_net(params);
524 	enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
525 	enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
526 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr;
527 	enum netkit_pairing pair = NETKIT_DEVICE_PAIR;
528 	enum netkit_action policy_prim = NETKIT_PASS;
529 	enum netkit_action policy_peer = NETKIT_PASS;
530 	bool seen_peer = false, seen_scrub = false;
531 	struct nlattr **data = params->data;
532 	enum netkit_mode mode = NETKIT_L3;
533 	unsigned char ifname_assign_type;
534 	struct nlattr **tb = params->tb;
535 	u16 headroom = 0, tailroom = 0;
536 	struct ifinfomsg *ifmp = NULL;
537 	struct net_device *peer = NULL;
538 	char ifname[IFNAMSIZ];
539 	struct netkit *nk;
540 	int err;
541 
542 	tbp = tb;
543 	if (data) {
544 		if (data[IFLA_NETKIT_MODE])
545 			mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
546 		if (data[IFLA_NETKIT_PEER_INFO]) {
547 			attr = data[IFLA_NETKIT_PEER_INFO];
548 			ifmp = nla_data(attr);
549 			rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
550 			tbp = peer_tb;
551 		}
552 		if (data[IFLA_NETKIT_SCRUB])
553 			scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
554 		if (data[IFLA_NETKIT_PEER_SCRUB])
555 			scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
556 		if (data[IFLA_NETKIT_POLICY]) {
557 			attr = data[IFLA_NETKIT_POLICY];
558 			policy_prim = nla_get_u32(attr);
559 			err = netkit_check_policy(policy_prim, attr, extack);
560 			if (err < 0)
561 				return err;
562 		}
563 		if (data[IFLA_NETKIT_PEER_POLICY]) {
564 			attr = data[IFLA_NETKIT_PEER_POLICY];
565 			policy_peer = nla_get_u32(attr);
566 			err = netkit_check_policy(policy_peer, attr, extack);
567 			if (err < 0)
568 				return err;
569 		}
570 		if (data[IFLA_NETKIT_HEADROOM])
571 			headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
572 		if (data[IFLA_NETKIT_TAILROOM])
573 			tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
574 		if (data[IFLA_NETKIT_PAIRING])
575 			pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]);
576 
577 		seen_scrub = data[IFLA_NETKIT_SCRUB];
578 		seen_peer = data[IFLA_NETKIT_PEER_INFO] ||
579 			    data[IFLA_NETKIT_PEER_SCRUB] ||
580 			    data[IFLA_NETKIT_PEER_POLICY];
581 	}
582 
583 	if (ifmp && tbp[IFLA_IFNAME]) {
584 		nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
585 		ifname_assign_type = NET_NAME_USER;
586 	} else {
587 		strscpy(ifname, "nk%d", IFNAMSIZ);
588 		ifname_assign_type = NET_NAME_ENUM;
589 	}
590 	if (mode != NETKIT_L2 &&
591 	    (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
592 		return -EOPNOTSUPP;
593 	if (pair == NETKIT_DEVICE_SINGLE &&
594 	    (tb != tbp || seen_peer || seen_scrub ||
595 	     policy_prim != NETKIT_PASS))
596 		return -EOPNOTSUPP;
597 
598 	if (pair == NETKIT_DEVICE_PAIR) {
599 		peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
600 					&netkit_link_ops, tbp, extack);
601 		if (IS_ERR(peer))
602 			return PTR_ERR(peer);
603 
604 		netif_inherit_tso_max(peer, dev);
605 		if (headroom)
606 			peer->needed_headroom = headroom;
607 		if (tailroom)
608 			peer->needed_tailroom = tailroom;
609 		if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
610 			eth_hw_addr_random(peer);
611 		if (ifmp && dev->ifindex)
612 			peer->ifindex = ifmp->ifi_index;
613 
614 		nk = netkit_priv(peer);
615 		nk->primary = false;
616 		nk->policy = policy_peer;
617 		nk->scrub = scrub_peer;
618 		nk->mode = mode;
619 		nk->pair = pair;
620 		nk->headroom = headroom;
621 		bpf_mprog_bundle_init(&nk->bundle);
622 
623 		err = register_netdevice(peer);
624 		if (err < 0)
625 			goto err_register_peer;
626 		netif_carrier_off(peer);
627 		if (mode == NETKIT_L2)
628 			dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
629 
630 		err = rtnl_configure_link(peer, NULL, 0, NULL);
631 		if (err < 0)
632 			goto err_configure_peer;
633 	}
634 
635 	if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
636 		eth_hw_addr_random(dev);
637 	if (tb[IFLA_IFNAME])
638 		nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
639 	else
640 		strscpy(dev->name, "nk%d", IFNAMSIZ);
641 	if (headroom)
642 		dev->needed_headroom = headroom;
643 	if (tailroom)
644 		dev->needed_tailroom = tailroom;
645 
646 	nk = netkit_priv(dev);
647 	nk->primary = true;
648 	nk->policy = policy_prim;
649 	nk->scrub = scrub_prim;
650 	nk->mode = mode;
651 	nk->pair = pair;
652 	nk->headroom = headroom;
653 	bpf_mprog_bundle_init(&nk->bundle);
654 
655 	if (pair == NETKIT_DEVICE_SINGLE)
656 		xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK);
657 
658 	err = register_netdevice(dev);
659 	if (err < 0)
660 		goto err_configure_peer;
661 	netif_carrier_off(dev);
662 	if (mode == NETKIT_L2)
663 		dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
664 
665 	rcu_assign_pointer(netkit_priv(dev)->peer, peer);
666 	if (peer)
667 		rcu_assign_pointer(netkit_priv(peer)->peer, dev);
668 	return 0;
669 err_configure_peer:
670 	if (peer)
671 		unregister_netdevice(peer);
672 	return err;
673 err_register_peer:
674 	free_netdev(peer);
675 	return err;
676 }
677 
678 static struct bpf_mprog_entry *netkit_entry_fetch(struct net_device *dev,
679 						  bool bundle_fallback)
680 {
681 	struct netkit *nk = netkit_priv(dev);
682 	struct bpf_mprog_entry *entry;
683 
684 	ASSERT_RTNL();
685 	entry = rcu_dereference_rtnl(nk->active);
686 	if (entry)
687 		return entry;
688 	if (bundle_fallback)
689 		return &nk->bundle.a;
690 	return NULL;
691 }
692 
693 static void netkit_entry_update(struct net_device *dev,
694 				struct bpf_mprog_entry *entry)
695 {
696 	struct netkit *nk = netkit_priv(dev);
697 
698 	ASSERT_RTNL();
699 	rcu_assign_pointer(nk->active, entry);
700 }
701 
702 static void netkit_entry_sync(void)
703 {
704 	synchronize_rcu();
705 }
706 
707 static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 which)
708 {
709 	struct net_device *dev;
710 	struct netkit *nk;
711 
712 	ASSERT_RTNL();
713 
714 	switch (which) {
715 	case BPF_NETKIT_PRIMARY:
716 	case BPF_NETKIT_PEER:
717 		break;
718 	default:
719 		return ERR_PTR(-EINVAL);
720 	}
721 
722 	dev = __dev_get_by_index(net, ifindex);
723 	if (!dev)
724 		return ERR_PTR(-ENODEV);
725 	if (dev->netdev_ops != &netkit_netdev_ops)
726 		return ERR_PTR(-ENXIO);
727 
728 	nk = netkit_priv(dev);
729 	if (!nk->primary)
730 		return ERR_PTR(-EACCES);
731 	if (nk->pair == NETKIT_DEVICE_SINGLE)
732 		return ERR_PTR(-EOPNOTSUPP);
733 	if (which == BPF_NETKIT_PEER) {
734 		dev = rcu_dereference_rtnl(nk->peer);
735 		if (!dev)
736 			return ERR_PTR(-ENODEV);
737 	}
738 	return dev;
739 }
740 
741 int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
742 {
743 	struct bpf_mprog_entry *entry, *entry_new;
744 	struct bpf_prog *replace_prog = NULL;
745 	struct net_device *dev;
746 	int ret;
747 
748 	rtnl_lock();
749 	dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
750 			       attr->attach_type);
751 	if (IS_ERR(dev)) {
752 		ret = PTR_ERR(dev);
753 		goto out;
754 	}
755 	entry = netkit_entry_fetch(dev, true);
756 	if (attr->attach_flags & BPF_F_REPLACE) {
757 		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd,
758 						 prog->type);
759 		if (IS_ERR(replace_prog)) {
760 			ret = PTR_ERR(replace_prog);
761 			replace_prog = NULL;
762 			goto out;
763 		}
764 	}
765 	ret = bpf_mprog_attach(entry, &entry_new, prog, NULL, replace_prog,
766 			       attr->attach_flags, attr->relative_fd,
767 			       attr->expected_revision);
768 	if (!ret) {
769 		if (entry != entry_new) {
770 			netkit_entry_update(dev, entry_new);
771 			netkit_entry_sync();
772 		}
773 		bpf_mprog_commit(entry);
774 	}
775 out:
776 	if (replace_prog)
777 		bpf_prog_put(replace_prog);
778 	rtnl_unlock();
779 	return ret;
780 }
781 
782 int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog)
783 {
784 	struct bpf_mprog_entry *entry, *entry_new;
785 	struct net_device *dev;
786 	int ret;
787 
788 	rtnl_lock();
789 	dev = netkit_dev_fetch(current->nsproxy->net_ns, attr->target_ifindex,
790 			       attr->attach_type);
791 	if (IS_ERR(dev)) {
792 		ret = PTR_ERR(dev);
793 		goto out;
794 	}
795 	entry = netkit_entry_fetch(dev, false);
796 	if (!entry) {
797 		ret = -ENOENT;
798 		goto out;
799 	}
800 	ret = bpf_mprog_detach(entry, &entry_new, prog, NULL, attr->attach_flags,
801 			       attr->relative_fd, attr->expected_revision);
802 	if (!ret) {
803 		if (!bpf_mprog_total(entry_new))
804 			entry_new = NULL;
805 		netkit_entry_update(dev, entry_new);
806 		netkit_entry_sync();
807 		bpf_mprog_commit(entry);
808 	}
809 out:
810 	rtnl_unlock();
811 	return ret;
812 }
813 
814 int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
815 {
816 	struct net_device *dev;
817 	int ret;
818 
819 	rtnl_lock();
820 	dev = netkit_dev_fetch(current->nsproxy->net_ns,
821 			       attr->query.target_ifindex,
822 			       attr->query.attach_type);
823 	if (IS_ERR(dev)) {
824 		ret = PTR_ERR(dev);
825 		goto out;
826 	}
827 	ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false));
828 out:
829 	rtnl_unlock();
830 	return ret;
831 }
832 
833 static struct netkit_link *netkit_link(const struct bpf_link *link)
834 {
835 	return container_of(link, struct netkit_link, link);
836 }
837 
838 static int netkit_link_prog_attach(struct bpf_link *link, u32 flags,
839 				   u32 id_or_fd, u64 revision)
840 {
841 	struct netkit_link *nkl = netkit_link(link);
842 	struct bpf_mprog_entry *entry, *entry_new;
843 	struct net_device *dev = nkl->dev;
844 	int ret;
845 
846 	ASSERT_RTNL();
847 	entry = netkit_entry_fetch(dev, true);
848 	ret = bpf_mprog_attach(entry, &entry_new, link->prog, link, NULL, flags,
849 			       id_or_fd, revision);
850 	if (!ret) {
851 		if (entry != entry_new) {
852 			netkit_entry_update(dev, entry_new);
853 			netkit_entry_sync();
854 		}
855 		bpf_mprog_commit(entry);
856 	}
857 	return ret;
858 }
859 
860 static void netkit_link_release(struct bpf_link *link)
861 {
862 	struct netkit_link *nkl = netkit_link(link);
863 	struct bpf_mprog_entry *entry, *entry_new;
864 	struct net_device *dev;
865 	int ret = 0;
866 
867 	rtnl_lock();
868 	dev = nkl->dev;
869 	if (!dev)
870 		goto out;
871 	entry = netkit_entry_fetch(dev, false);
872 	if (!entry) {
873 		ret = -ENOENT;
874 		goto out;
875 	}
876 	ret = bpf_mprog_detach(entry, &entry_new, link->prog, link, 0, 0, 0);
877 	if (!ret) {
878 		if (!bpf_mprog_total(entry_new))
879 			entry_new = NULL;
880 		netkit_entry_update(dev, entry_new);
881 		netkit_entry_sync();
882 		bpf_mprog_commit(entry);
883 		nkl->dev = NULL;
884 	}
885 out:
886 	WARN_ON_ONCE(ret);
887 	rtnl_unlock();
888 }
889 
890 static int netkit_link_update(struct bpf_link *link, struct bpf_prog *nprog,
891 			      struct bpf_prog *oprog)
892 {
893 	struct netkit_link *nkl = netkit_link(link);
894 	struct bpf_mprog_entry *entry, *entry_new;
895 	struct net_device *dev;
896 	int ret = 0;
897 
898 	rtnl_lock();
899 	dev = nkl->dev;
900 	if (!dev) {
901 		ret = -ENOLINK;
902 		goto out;
903 	}
904 	if (oprog && link->prog != oprog) {
905 		ret = -EPERM;
906 		goto out;
907 	}
908 	oprog = link->prog;
909 	if (oprog == nprog) {
910 		bpf_prog_put(nprog);
911 		goto out;
912 	}
913 	entry = netkit_entry_fetch(dev, false);
914 	if (!entry) {
915 		ret = -ENOENT;
916 		goto out;
917 	}
918 	ret = bpf_mprog_attach(entry, &entry_new, nprog, link, oprog,
919 			       BPF_F_REPLACE | BPF_F_ID,
920 			       link->prog->aux->id, 0);
921 	if (!ret) {
922 		WARN_ON_ONCE(entry != entry_new);
923 		oprog = xchg(&link->prog, nprog);
924 		bpf_prog_put(oprog);
925 		bpf_mprog_commit(entry);
926 	}
927 out:
928 	rtnl_unlock();
929 	return ret;
930 }
931 
932 static void netkit_link_dealloc(struct bpf_link *link)
933 {
934 	kfree(netkit_link(link));
935 }
936 
937 static void netkit_link_fdinfo(const struct bpf_link *link, struct seq_file *seq)
938 {
939 	const struct netkit_link *nkl = netkit_link(link);
940 	u32 ifindex = 0;
941 
942 	rtnl_lock();
943 	if (nkl->dev)
944 		ifindex = nkl->dev->ifindex;
945 	rtnl_unlock();
946 
947 	seq_printf(seq, "ifindex:\t%u\n", ifindex);
948 	seq_printf(seq, "attach_type:\t%u (%s)\n",
949 		   link->attach_type,
950 		   link->attach_type == BPF_NETKIT_PRIMARY ? "primary" : "peer");
951 }
952 
953 static int netkit_link_fill_info(const struct bpf_link *link,
954 				 struct bpf_link_info *info)
955 {
956 	const struct netkit_link *nkl = netkit_link(link);
957 	u32 ifindex = 0;
958 
959 	rtnl_lock();
960 	if (nkl->dev)
961 		ifindex = nkl->dev->ifindex;
962 	rtnl_unlock();
963 
964 	info->netkit.ifindex = ifindex;
965 	info->netkit.attach_type = link->attach_type;
966 	return 0;
967 }
968 
969 static int netkit_link_detach(struct bpf_link *link)
970 {
971 	netkit_link_release(link);
972 	return 0;
973 }
974 
975 static const struct bpf_link_ops netkit_link_lops = {
976 	.release	= netkit_link_release,
977 	.detach		= netkit_link_detach,
978 	.dealloc	= netkit_link_dealloc,
979 	.update_prog	= netkit_link_update,
980 	.show_fdinfo	= netkit_link_fdinfo,
981 	.fill_link_info	= netkit_link_fill_info,
982 };
983 
984 static int netkit_link_init(struct netkit_link *nkl,
985 			    struct bpf_link_primer *link_primer,
986 			    const union bpf_attr *attr,
987 			    struct net_device *dev,
988 			    struct bpf_prog *prog)
989 {
990 	bpf_link_init(&nkl->link, BPF_LINK_TYPE_NETKIT,
991 		      &netkit_link_lops, prog, attr->link_create.attach_type);
992 	nkl->dev = dev;
993 	return bpf_link_prime(&nkl->link, link_primer);
994 }
995 
996 int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
997 {
998 	struct bpf_link_primer link_primer;
999 	struct netkit_link *nkl;
1000 	struct net_device *dev;
1001 	int ret;
1002 
1003 	rtnl_lock();
1004 	dev = netkit_dev_fetch(current->nsproxy->net_ns,
1005 			       attr->link_create.target_ifindex,
1006 			       attr->link_create.attach_type);
1007 	if (IS_ERR(dev)) {
1008 		ret = PTR_ERR(dev);
1009 		goto out;
1010 	}
1011 	nkl = kzalloc_obj(*nkl, GFP_KERNEL_ACCOUNT);
1012 	if (!nkl) {
1013 		ret = -ENOMEM;
1014 		goto out;
1015 	}
1016 	ret = netkit_link_init(nkl, &link_primer, attr, dev, prog);
1017 	if (ret) {
1018 		kfree(nkl);
1019 		goto out;
1020 	}
1021 	ret = netkit_link_prog_attach(&nkl->link,
1022 				      attr->link_create.flags,
1023 				      attr->link_create.netkit.relative_fd,
1024 				      attr->link_create.netkit.expected_revision);
1025 	if (ret) {
1026 		nkl->dev = NULL;
1027 		bpf_link_cleanup(&link_primer);
1028 		goto out;
1029 	}
1030 	ret = bpf_link_settle(&link_primer);
1031 out:
1032 	rtnl_unlock();
1033 	return ret;
1034 }
1035 
1036 static void netkit_release_all(struct net_device *dev)
1037 {
1038 	struct bpf_mprog_entry *entry;
1039 	struct bpf_tuple tuple = {};
1040 	struct bpf_mprog_fp *fp;
1041 	struct bpf_mprog_cp *cp;
1042 
1043 	entry = netkit_entry_fetch(dev, false);
1044 	if (!entry)
1045 		return;
1046 	netkit_entry_update(dev, NULL);
1047 	netkit_entry_sync();
1048 	bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
1049 		if (tuple.link)
1050 			netkit_link(tuple.link)->dev = NULL;
1051 		else
1052 			bpf_prog_put(tuple.prog);
1053 	}
1054 }
1055 
1056 static void netkit_uninit(struct net_device *dev)
1057 {
1058 	netkit_release_all(dev);
1059 	netkit_queue_unlease(dev);
1060 }
1061 
1062 static void netkit_del_link(struct net_device *dev, struct list_head *head)
1063 {
1064 	struct netkit *nk = netkit_priv(dev);
1065 	struct net_device *peer = rtnl_dereference(nk->peer);
1066 
1067 	RCU_INIT_POINTER(nk->peer, NULL);
1068 	unregister_netdevice_queue(dev, head);
1069 	if (peer) {
1070 		nk = netkit_priv(peer);
1071 		RCU_INIT_POINTER(nk->peer, NULL);
1072 		/* Guard against the peer already being in an unregister
1073 		 * list (e.g. same-namespace teardown where the peer is
1074 		 * in the caller's dev_kill_list). list_move_tail() on an
1075 		 * already-queued device would otherwise corrupt that
1076 		 * list's iteration. This situation can occur via netkit
1077 		 * notifier, hence guard against this scenario.
1078 		 */
1079 		if (!unregister_netdevice_queued(peer))
1080 			unregister_netdevice_queue(peer, head);
1081 	}
1082 }
1083 
1084 static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
1085 			      struct nlattr *data[],
1086 			      struct netlink_ext_ack *extack)
1087 {
1088 	struct netkit *nk = netkit_priv(dev);
1089 	struct net_device *peer = rtnl_dereference(nk->peer);
1090 	enum netkit_action policy;
1091 	struct nlattr *attr;
1092 	int err, i;
1093 	static const struct {
1094 		u32 attr;
1095 		char *name;
1096 	} fixed_params[] = {
1097 		{ IFLA_NETKIT_MODE,       "operating mode" },
1098 		{ IFLA_NETKIT_SCRUB,      "scrubbing" },
1099 		{ IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" },
1100 		{ IFLA_NETKIT_PEER_INFO,  "peer info" },
1101 		{ IFLA_NETKIT_HEADROOM,   "headroom" },
1102 		{ IFLA_NETKIT_TAILROOM,   "tailroom" },
1103 		{ IFLA_NETKIT_PAIRING,    "pairing" },
1104 	};
1105 
1106 	if (!nk->primary) {
1107 		NL_SET_ERR_MSG(extack,
1108 			       "netkit link settings can be changed only through the primary device");
1109 		return -EACCES;
1110 	}
1111 
1112 	for (i = 0; i < ARRAY_SIZE(fixed_params); i++) {
1113 		attr = data[fixed_params[i].attr];
1114 		if (attr) {
1115 			NL_SET_ERR_MSG_ATTR_FMT(extack, attr,
1116 						"netkit link %s cannot be changed after device creation",
1117 						fixed_params[i].name);
1118 			return -EACCES;
1119 		}
1120 	}
1121 
1122 	if (data[IFLA_NETKIT_POLICY]) {
1123 		err = -EOPNOTSUPP;
1124 		attr = data[IFLA_NETKIT_POLICY];
1125 		policy = nla_get_u32(attr);
1126 		if (nk->pair == NETKIT_DEVICE_PAIR)
1127 			err = netkit_check_policy(policy, attr, extack);
1128 		if (err)
1129 			return err;
1130 		WRITE_ONCE(nk->policy, policy);
1131 	}
1132 
1133 	if (data[IFLA_NETKIT_PEER_POLICY]) {
1134 		err = -EOPNOTSUPP;
1135 		attr = data[IFLA_NETKIT_PEER_POLICY];
1136 		policy = nla_get_u32(attr);
1137 		if (peer)
1138 			err = netkit_check_policy(policy, attr, extack);
1139 		if (err)
1140 			return err;
1141 		nk = netkit_priv(peer);
1142 		WRITE_ONCE(nk->policy, policy);
1143 	}
1144 
1145 	return 0;
1146 }
1147 
1148 static void netkit_check_lease_unregister(struct net_device *dev)
1149 {
1150 	LIST_HEAD(list_kill);
1151 	u32 q_idx;
1152 
1153 	if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING ||
1154 	    !dev->dev.parent)
1155 		return;
1156 
1157 	netdev_lock_ops(dev);
1158 	for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
1159 		struct net_device *tmp = dev;
1160 		struct netdev_rx_queue *rxq;
1161 		u32 tmp_q_idx = q_idx;
1162 
1163 		rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx,
1164 						 NETIF_PHYS_TO_VIRT);
1165 		if (rxq && tmp != dev &&
1166 		    tmp->netdev_ops == &netkit_netdev_ops) {
1167 			/* A single phys device can have multiple queues leased
1168 			 * to one netkit device. We can only queue that netkit
1169 			 * device once to the list_kill. Queues of that phys
1170 			 * device can be leased with different individual netkit
1171 			 * devices, hence we batch via list_kill.
1172 			 */
1173 			if (unregister_netdevice_queued(tmp))
1174 				continue;
1175 			netkit_del_link(tmp, &list_kill);
1176 		}
1177 	}
1178 	netdev_unlock_ops(dev);
1179 	unregister_netdevice_many(&list_kill);
1180 }
1181 
1182 static int netkit_notifier(struct notifier_block *this,
1183 			   unsigned long event, void *ptr)
1184 {
1185 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1186 
1187 	if (event == NETDEV_UNREGISTER)
1188 		netkit_check_lease_unregister(dev);
1189 	return NOTIFY_DONE;
1190 }
1191 
1192 static size_t netkit_get_size(const struct net_device *dev)
1193 {
1194 	return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
1195 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
1196 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
1197 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
1198 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
1199 	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
1200 	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
1201 	       nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
1202 	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */
1203 	       0;
1204 }
1205 
1206 static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
1207 {
1208 	struct netkit *nk = netkit_priv(dev);
1209 	struct net_device *peer = rtnl_dereference(nk->peer);
1210 
1211 	if (nla_put_u8(skb, IFLA_NETKIT_PRIMARY, nk->primary))
1212 		return -EMSGSIZE;
1213 	if (nla_put_u32(skb, IFLA_NETKIT_POLICY, nk->policy))
1214 		return -EMSGSIZE;
1215 	if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
1216 		return -EMSGSIZE;
1217 	if (nk->pair == NETKIT_DEVICE_PAIR &&
1218 	    nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
1219 		return -EMSGSIZE;
1220 	if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom))
1221 		return -EMSGSIZE;
1222 	if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
1223 		return -EMSGSIZE;
1224 	if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair))
1225 		return -EMSGSIZE;
1226 
1227 	if (peer) {
1228 		nk = netkit_priv(peer);
1229 		if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
1230 			return -EMSGSIZE;
1231 		if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
1232 			return -EMSGSIZE;
1233 	}
1234 
1235 	return 0;
1236 }
1237 
1238 static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
1239 	[IFLA_NETKIT_PEER_INFO]		= { .len = sizeof(struct ifinfomsg) },
1240 	[IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
1241 	[IFLA_NETKIT_POLICY]		= { .type = NLA_U32 },
1242 	[IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 },
1243 	[IFLA_NETKIT_HEADROOM]		= { .type = NLA_U16 },
1244 	[IFLA_NETKIT_TAILROOM]		= { .type = NLA_U16 },
1245 	[IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
1246 	[IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
1247 	[IFLA_NETKIT_PAIRING]		= NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE),
1248 	[IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT,
1249 					    .reject_message = "Primary attribute is read-only" },
1250 };
1251 
1252 static struct rtnl_link_ops netkit_link_ops = {
1253 	.kind		= NETKIT_DRV_NAME,
1254 	.priv_size	= sizeof(struct netkit),
1255 	.alloc		= netkit_alloc,
1256 	.setup		= netkit_setup,
1257 	.newlink	= netkit_new_link,
1258 	.dellink	= netkit_del_link,
1259 	.changelink	= netkit_change_link,
1260 	.get_link_net	= netkit_get_link_net,
1261 	.get_size	= netkit_get_size,
1262 	.fill_info	= netkit_fill_info,
1263 	.policy		= netkit_policy,
1264 	.validate	= netkit_validate,
1265 	.peer_type	= IFLA_NETKIT_PEER_INFO,
1266 	.maxtype	= IFLA_NETKIT_MAX,
1267 };
1268 
1269 static struct notifier_block netkit_netdev_notifier = {
1270 	.notifier_call	= netkit_notifier,
1271 };
1272 
1273 static __init int netkit_mod_init(void)
1274 {
1275 	int ret;
1276 
1277 	BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
1278 		     (int)NETKIT_PASS != (int)TCX_PASS ||
1279 		     (int)NETKIT_DROP != (int)TCX_DROP ||
1280 		     (int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
1281 
1282 	ret = rtnl_link_register(&netkit_link_ops);
1283 	if (ret)
1284 		return ret;
1285 	ret = register_netdevice_notifier(&netkit_netdev_notifier);
1286 	if (ret)
1287 		rtnl_link_unregister(&netkit_link_ops);
1288 	return ret;
1289 }
1290 
1291 static __exit void netkit_mod_exit(void)
1292 {
1293 	unregister_netdevice_notifier(&netkit_netdev_notifier);
1294 	rtnl_link_unregister(&netkit_link_ops);
1295 }
1296 
1297 module_init(netkit_mod_init);
1298 module_exit(netkit_mod_exit);
1299 
1300 MODULE_DESCRIPTION("BPF-programmable network device");
1301 MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
1302 MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
1303 MODULE_LICENSE("GPL");
1304 MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
1305