xref: /linux/drivers/net/netdevsim/netdev.c (revision c715f13bb30f9f4d1bd8888667ef32e43b6fedc1)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq)
41 {
42 	struct netdevsim *ns = netdev_priv(dev);
43 	struct net_device *peer_dev;
44 	struct netdevsim *peer_ns;
45 	struct netdev_queue *txq;
46 	u16 idx;
47 
48 	idx = rq->napi.index;
49 	rcu_read_lock();
50 	peer_ns = rcu_dereference(ns->peer);
51 	if (!peer_ns)
52 		goto out;
53 
54 	/* TX device */
55 	peer_dev = peer_ns->netdev;
56 	if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
57 		goto out;
58 
59 	txq = netdev_get_tx_queue(peer_dev, idx);
60 	if (!netif_tx_queue_stopped(txq))
61 		goto out;
62 
63 	netif_tx_wake_queue(txq);
64 out:
65 	rcu_read_unlock();
66 }
67 
68 static void nsim_stop_tx_queue(struct net_device *tx_dev,
69 			       struct net_device *rx_dev,
70 			       struct nsim_rq *rq,
71 			       u16 idx)
72 {
73 	/* If different queues size, do not stop, since it is not
74 	 * easy to find which TX queue is mapped here
75 	 */
76 	if (rx_dev->real_num_tx_queues != tx_dev->num_rx_queues)
77 		return;
78 
79 	/* rq is the queue on the receive side */
80 	netif_subqueue_try_stop(tx_dev, idx,
81 				NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue),
82 				NSIM_RING_SIZE / 2);
83 }
84 
85 static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev,
86 			struct nsim_rq *rq, struct sk_buff *skb)
87 {
88 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
89 		dev_kfree_skb_any(skb);
90 		return NET_RX_DROP;
91 	}
92 
93 	skb_queue_tail(&rq->skb_queue, skb);
94 
95 	/* Stop the peer TX queue avoiding dropping packets later */
96 	if (skb_queue_len(&rq->skb_queue) >= NSIM_RING_SIZE)
97 		nsim_stop_tx_queue(tx_dev, rx_dev, rq,
98 				   skb_get_queue_mapping(skb));
99 
100 	return NET_RX_SUCCESS;
101 }
102 
103 static int nsim_forward_skb(struct net_device *tx_dev,
104 			    struct net_device *rx_dev,
105 			    struct sk_buff *skb,
106 			    struct nsim_rq *rq,
107 			    struct skb_ext *psp_ext)
108 {
109 	int ret;
110 
111 	ret = __dev_forward_skb(rx_dev, skb);
112 	if (ret) {
113 		if (psp_ext)
114 			__skb_ext_put(psp_ext);
115 		return ret;
116 	}
117 
118 	nsim_psp_handle_ext(skb, psp_ext);
119 
120 	return nsim_napi_rx(tx_dev, rx_dev, rq, skb);
121 }
122 
123 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
124 {
125 	struct netdevsim *ns = netdev_priv(dev);
126 	struct skb_ext *psp_ext = NULL;
127 	struct net_device *peer_dev;
128 	unsigned int len = skb->len;
129 	struct netdevsim *peer_ns;
130 	struct netdev_config *cfg;
131 	struct nsim_rq *rq;
132 	int rxq;
133 	int dr;
134 
135 	rcu_read_lock();
136 	if (!nsim_ipsec_tx(ns, skb))
137 		goto out_drop_any;
138 
139 	/* Check if loopback mode is enabled */
140 	if (dev->features & NETIF_F_LOOPBACK) {
141 		peer_ns = ns;
142 		peer_dev = dev;
143 	} else {
144 		peer_ns = rcu_dereference(ns->peer);
145 		if (!peer_ns)
146 			goto out_drop_any;
147 		peer_dev = peer_ns->netdev;
148 	}
149 
150 	dr = nsim_do_psp(skb, ns, peer_ns, &psp_ext);
151 	if (dr)
152 		goto out_drop_free;
153 
154 	rxq = skb_get_queue_mapping(skb);
155 	if (rxq >= peer_dev->num_rx_queues)
156 		rxq = rxq % peer_dev->num_rx_queues;
157 	rq = peer_ns->rq[rxq];
158 
159 	cfg = peer_dev->cfg;
160 	if (skb_is_nonlinear(skb) &&
161 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
162 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
163 	      cfg->hds_thresh > len)))
164 		skb_linearize(skb);
165 
166 	skb_tx_timestamp(skb);
167 	if (unlikely(nsim_forward_skb(dev, peer_dev,
168 				      skb, rq, psp_ext) == NET_RX_DROP))
169 		goto out_drop_cnt;
170 
171 	if (!hrtimer_active(&rq->napi_timer))
172 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
173 
174 	rcu_read_unlock();
175 	dev_dstats_tx_add(dev, len);
176 	return NETDEV_TX_OK;
177 
178 out_drop_any:
179 	dr = SKB_DROP_REASON_NOT_SPECIFIED;
180 out_drop_free:
181 	kfree_skb_reason(skb, dr);
182 out_drop_cnt:
183 	rcu_read_unlock();
184 	dev_dstats_tx_dropped(dev);
185 	return NETDEV_TX_OK;
186 }
187 
188 static void nsim_set_rx_mode(struct net_device *dev)
189 {
190 }
191 
192 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
193 {
194 	struct netdevsim *ns = netdev_priv(dev);
195 
196 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
197 	    new_mtu > NSIM_XDP_MAX_MTU)
198 		return -EBUSY;
199 
200 	WRITE_ONCE(dev->mtu, new_mtu);
201 
202 	return 0;
203 }
204 
205 static int
206 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
207 {
208 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
209 }
210 
211 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
212 {
213 	struct netdevsim *ns = netdev_priv(dev);
214 	struct nsim_dev *nsim_dev = ns->nsim_dev;
215 
216 	/* Only refuse multicast addresses, zero address can mean unset/any. */
217 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
218 		return -EINVAL;
219 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
220 
221 	return 0;
222 }
223 
224 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
225 			    u16 vlan, u8 qos, __be16 vlan_proto)
226 {
227 	struct netdevsim *ns = netdev_priv(dev);
228 	struct nsim_dev *nsim_dev = ns->nsim_dev;
229 
230 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
231 		return -EINVAL;
232 
233 	nsim_dev->vfconfigs[vf].vlan = vlan;
234 	nsim_dev->vfconfigs[vf].qos = qos;
235 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
236 
237 	return 0;
238 }
239 
240 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
241 {
242 	struct netdevsim *ns = netdev_priv(dev);
243 	struct nsim_dev *nsim_dev = ns->nsim_dev;
244 
245 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
246 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
247 		return -EOPNOTSUPP;
248 	}
249 
250 	if (vf >= nsim_dev_get_vfs(nsim_dev))
251 		return -EINVAL;
252 
253 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
254 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
255 
256 	return 0;
257 }
258 
259 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
260 {
261 	struct netdevsim *ns = netdev_priv(dev);
262 	struct nsim_dev *nsim_dev = ns->nsim_dev;
263 
264 	if (vf >= nsim_dev_get_vfs(nsim_dev))
265 		return -EINVAL;
266 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
267 
268 	return 0;
269 }
270 
271 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
272 {
273 	struct netdevsim *ns = netdev_priv(dev);
274 	struct nsim_dev *nsim_dev = ns->nsim_dev;
275 
276 	if (vf >= nsim_dev_get_vfs(nsim_dev))
277 		return -EINVAL;
278 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
279 
280 	return 0;
281 }
282 
283 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
284 {
285 	struct netdevsim *ns = netdev_priv(dev);
286 	struct nsim_dev *nsim_dev = ns->nsim_dev;
287 
288 	if (vf >= nsim_dev_get_vfs(nsim_dev))
289 		return -EINVAL;
290 	nsim_dev->vfconfigs[vf].trusted = val;
291 
292 	return 0;
293 }
294 
295 static int
296 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
297 {
298 	struct netdevsim *ns = netdev_priv(dev);
299 	struct nsim_dev *nsim_dev = ns->nsim_dev;
300 
301 	if (vf >= nsim_dev_get_vfs(nsim_dev))
302 		return -EINVAL;
303 
304 	ivi->vf = vf;
305 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
306 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
307 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
308 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
309 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
310 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
311 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
312 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
313 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
314 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
315 
316 	return 0;
317 }
318 
319 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
320 {
321 	struct netdevsim *ns = netdev_priv(dev);
322 	struct nsim_dev *nsim_dev = ns->nsim_dev;
323 
324 	if (vf >= nsim_dev_get_vfs(nsim_dev))
325 		return -EINVAL;
326 
327 	switch (state) {
328 	case IFLA_VF_LINK_STATE_AUTO:
329 	case IFLA_VF_LINK_STATE_ENABLE:
330 	case IFLA_VF_LINK_STATE_DISABLE:
331 		break;
332 	default:
333 		return -EINVAL;
334 	}
335 
336 	nsim_dev->vfconfigs[vf].link_state = state;
337 
338 	return 0;
339 }
340 
341 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
342 {
343 	stats->window_drops = 0;
344 	stats->tx_overruns = 0;
345 }
346 
347 static int nsim_setup_tc_taprio(struct net_device *dev,
348 				struct tc_taprio_qopt_offload *offload)
349 {
350 	int err = 0;
351 
352 	switch (offload->cmd) {
353 	case TAPRIO_CMD_REPLACE:
354 	case TAPRIO_CMD_DESTROY:
355 		break;
356 	case TAPRIO_CMD_STATS:
357 		nsim_taprio_stats(&offload->stats);
358 		break;
359 	default:
360 		err = -EOPNOTSUPP;
361 	}
362 
363 	return err;
364 }
365 
366 static LIST_HEAD(nsim_block_cb_list);
367 
368 static int
369 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
370 {
371 	struct netdevsim *ns = netdev_priv(dev);
372 
373 	switch (type) {
374 	case TC_SETUP_QDISC_TAPRIO:
375 		return nsim_setup_tc_taprio(dev, type_data);
376 	case TC_SETUP_BLOCK:
377 		return flow_block_cb_setup_simple(type_data,
378 						  &nsim_block_cb_list,
379 						  nsim_setup_tc_block_cb,
380 						  ns, ns, true);
381 	default:
382 		return -EOPNOTSUPP;
383 	}
384 }
385 
386 static int
387 nsim_set_features(struct net_device *dev, netdev_features_t features)
388 {
389 	struct netdevsim *ns = netdev_priv(dev);
390 
391 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
392 		return nsim_bpf_disable_tc(ns);
393 
394 	return 0;
395 }
396 
397 static int nsim_get_iflink(const struct net_device *dev)
398 {
399 	struct netdevsim *nsim, *peer;
400 	int iflink;
401 
402 	nsim = netdev_priv(dev);
403 
404 	rcu_read_lock();
405 	peer = rcu_dereference(nsim->peer);
406 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
407 			READ_ONCE(dev->ifindex);
408 	rcu_read_unlock();
409 
410 	return iflink;
411 }
412 
413 static int nsim_rcv(struct nsim_rq *rq, int budget)
414 {
415 	struct net_device *dev = rq->napi.dev;
416 	struct bpf_prog *xdp_prog;
417 	struct netdevsim *ns;
418 	struct sk_buff *skb;
419 	unsigned int skblen;
420 	int i, ret;
421 
422 	ns = netdev_priv(dev);
423 	xdp_prog = READ_ONCE(ns->xdp.prog);
424 
425 	for (i = 0; i < budget; i++) {
426 		if (skb_queue_empty(&rq->skb_queue))
427 			break;
428 
429 		skb = skb_dequeue(&rq->skb_queue);
430 
431 		if (xdp_prog) {
432 			/* skb might be freed directly by XDP, save the len */
433 			skblen = skb->len;
434 
435 			if (skb->ip_summed == CHECKSUM_PARTIAL)
436 				skb_checksum_help(skb);
437 			ret = do_xdp_generic(xdp_prog, &skb);
438 			if (ret != XDP_PASS) {
439 				dev_dstats_rx_add(dev, skblen);
440 				continue;
441 			}
442 		}
443 
444 		/* skb might be discard at netif_receive_skb, save the len */
445 		dev_dstats_rx_add(dev, skb->len);
446 		napi_gro_receive(&rq->napi, skb);
447 	}
448 
449 	nsim_start_peer_tx_queue(dev, rq);
450 	return i;
451 }
452 
453 static int nsim_poll(struct napi_struct *napi, int budget)
454 {
455 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
456 	int done;
457 
458 	done = nsim_rcv(rq, budget);
459 	if (done < budget)
460 		napi_complete_done(napi, done);
461 
462 	return done;
463 }
464 
465 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
466 {
467 	struct page_pool_params params = {
468 		.order = 0,
469 		.pool_size = NSIM_RING_SIZE,
470 		.nid = NUMA_NO_NODE,
471 		.dev = &napi->dev->dev,
472 		.napi = napi,
473 		.dma_dir = DMA_BIDIRECTIONAL,
474 		.netdev = napi->dev,
475 	};
476 	struct page_pool *pool;
477 
478 	pool = page_pool_create(&params);
479 	if (IS_ERR(pool))
480 		return PTR_ERR(pool);
481 
482 	*p = pool;
483 	return 0;
484 }
485 
486 static int nsim_init_napi(struct netdevsim *ns)
487 {
488 	struct net_device *dev = ns->netdev;
489 	struct nsim_rq *rq;
490 	int err, i;
491 
492 	for (i = 0; i < dev->num_rx_queues; i++) {
493 		rq = ns->rq[i];
494 
495 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
496 	}
497 
498 	for (i = 0; i < dev->num_rx_queues; i++) {
499 		rq = ns->rq[i];
500 
501 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
502 		if (err)
503 			goto err_pp_destroy;
504 	}
505 
506 	return 0;
507 
508 err_pp_destroy:
509 	while (i--) {
510 		page_pool_destroy(ns->rq[i]->page_pool);
511 		ns->rq[i]->page_pool = NULL;
512 	}
513 
514 	for (i = 0; i < dev->num_rx_queues; i++)
515 		__netif_napi_del_locked(&ns->rq[i]->napi);
516 
517 	return err;
518 }
519 
520 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
521 {
522 	struct nsim_rq *rq;
523 
524 	rq = container_of(timer, struct nsim_rq, napi_timer);
525 	napi_schedule(&rq->napi);
526 
527 	return HRTIMER_NORESTART;
528 }
529 
530 static void nsim_rq_timer_init(struct nsim_rq *rq)
531 {
532 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
533 		      HRTIMER_MODE_REL);
534 }
535 
536 static void nsim_enable_napi(struct netdevsim *ns)
537 {
538 	struct net_device *dev = ns->netdev;
539 	int i;
540 
541 	for (i = 0; i < dev->num_rx_queues; i++) {
542 		struct nsim_rq *rq = ns->rq[i];
543 
544 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
545 		napi_enable_locked(&rq->napi);
546 	}
547 }
548 
549 static int nsim_open(struct net_device *dev)
550 {
551 	struct netdevsim *ns = netdev_priv(dev);
552 	struct netdevsim *peer;
553 	int err;
554 
555 	netdev_assert_locked(dev);
556 
557 	err = nsim_init_napi(ns);
558 	if (err)
559 		return err;
560 
561 	nsim_enable_napi(ns);
562 
563 	peer = rtnl_dereference(ns->peer);
564 	if (peer && netif_running(peer->netdev)) {
565 		netif_carrier_on(dev);
566 		netif_carrier_on(peer->netdev);
567 	}
568 
569 	return 0;
570 }
571 
572 static void nsim_del_napi(struct netdevsim *ns)
573 {
574 	struct net_device *dev = ns->netdev;
575 	int i;
576 
577 	for (i = 0; i < dev->num_rx_queues; i++) {
578 		struct nsim_rq *rq = ns->rq[i];
579 
580 		napi_disable_locked(&rq->napi);
581 		__netif_napi_del_locked(&rq->napi);
582 	}
583 	synchronize_net();
584 
585 	for (i = 0; i < dev->num_rx_queues; i++) {
586 		page_pool_destroy(ns->rq[i]->page_pool);
587 		ns->rq[i]->page_pool = NULL;
588 	}
589 }
590 
591 static int nsim_stop(struct net_device *dev)
592 {
593 	struct netdevsim *ns = netdev_priv(dev);
594 	struct netdevsim *peer;
595 
596 	netdev_assert_locked(dev);
597 
598 	netif_carrier_off(dev);
599 	peer = rtnl_dereference(ns->peer);
600 	if (peer)
601 		netif_carrier_off(peer->netdev);
602 
603 	nsim_del_napi(ns);
604 
605 	return 0;
606 }
607 
608 static int nsim_shaper_set(struct net_shaper_binding *binding,
609 			   const struct net_shaper *shaper,
610 			   struct netlink_ext_ack *extack)
611 {
612 	return 0;
613 }
614 
615 static int nsim_shaper_del(struct net_shaper_binding *binding,
616 			   const struct net_shaper_handle *handle,
617 			   struct netlink_ext_ack *extack)
618 {
619 	return 0;
620 }
621 
622 static int nsim_shaper_group(struct net_shaper_binding *binding,
623 			     int leaves_count,
624 			     const struct net_shaper *leaves,
625 			     const struct net_shaper *root,
626 			     struct netlink_ext_ack *extack)
627 {
628 	return 0;
629 }
630 
631 static void nsim_shaper_cap(struct net_shaper_binding *binding,
632 			    enum net_shaper_scope scope,
633 			    unsigned long *flags)
634 {
635 	*flags = ULONG_MAX;
636 }
637 
638 static const struct net_shaper_ops nsim_shaper_ops = {
639 	.set			= nsim_shaper_set,
640 	.delete			= nsim_shaper_del,
641 	.group			= nsim_shaper_group,
642 	.capabilities		= nsim_shaper_cap,
643 };
644 
645 static const struct net_device_ops nsim_netdev_ops = {
646 	.ndo_start_xmit		= nsim_start_xmit,
647 	.ndo_set_rx_mode	= nsim_set_rx_mode,
648 	.ndo_set_mac_address	= eth_mac_addr,
649 	.ndo_validate_addr	= eth_validate_addr,
650 	.ndo_change_mtu		= nsim_change_mtu,
651 	.ndo_set_vf_mac		= nsim_set_vf_mac,
652 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
653 	.ndo_set_vf_rate	= nsim_set_vf_rate,
654 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
655 	.ndo_set_vf_trust	= nsim_set_vf_trust,
656 	.ndo_get_vf_config	= nsim_get_vf_config,
657 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
658 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
659 	.ndo_setup_tc		= nsim_setup_tc,
660 	.ndo_set_features	= nsim_set_features,
661 	.ndo_get_iflink		= nsim_get_iflink,
662 	.ndo_bpf		= nsim_bpf,
663 	.ndo_open		= nsim_open,
664 	.ndo_stop		= nsim_stop,
665 	.net_shaper_ops		= &nsim_shaper_ops,
666 };
667 
668 static const struct net_device_ops nsim_vf_netdev_ops = {
669 	.ndo_start_xmit		= nsim_start_xmit,
670 	.ndo_set_rx_mode	= nsim_set_rx_mode,
671 	.ndo_set_mac_address	= eth_mac_addr,
672 	.ndo_validate_addr	= eth_validate_addr,
673 	.ndo_change_mtu		= nsim_change_mtu,
674 	.ndo_setup_tc		= nsim_setup_tc,
675 	.ndo_set_features	= nsim_set_features,
676 };
677 
678 /* We don't have true per-queue stats, yet, so do some random fakery here.
679  * Only report stuff for queue 0.
680  */
681 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
682 				    struct netdev_queue_stats_rx *stats)
683 {
684 	struct rtnl_link_stats64 rtstats = {};
685 
686 	if (!idx)
687 		dev_get_stats(dev, &rtstats);
688 
689 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
690 	stats->bytes = rtstats.rx_bytes;
691 }
692 
693 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
694 				    struct netdev_queue_stats_tx *stats)
695 {
696 	struct rtnl_link_stats64 rtstats = {};
697 
698 	if (!idx)
699 		dev_get_stats(dev, &rtstats);
700 
701 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
702 	stats->bytes = rtstats.tx_bytes;
703 }
704 
705 static void nsim_get_base_stats(struct net_device *dev,
706 				struct netdev_queue_stats_rx *rx,
707 				struct netdev_queue_stats_tx *tx)
708 {
709 	struct rtnl_link_stats64 rtstats = {};
710 
711 	dev_get_stats(dev, &rtstats);
712 
713 	rx->packets = !!rtstats.rx_packets;
714 	rx->bytes = 0;
715 	tx->packets = !!rtstats.tx_packets;
716 	tx->bytes = 0;
717 }
718 
719 static const struct netdev_stat_ops nsim_stat_ops = {
720 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
721 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
722 	.get_base_stats		= nsim_get_base_stats,
723 };
724 
725 static struct nsim_rq *nsim_queue_alloc(void)
726 {
727 	struct nsim_rq *rq;
728 
729 	rq = kzalloc_obj(*rq, GFP_KERNEL_ACCOUNT);
730 	if (!rq)
731 		return NULL;
732 
733 	skb_queue_head_init(&rq->skb_queue);
734 	nsim_rq_timer_init(rq);
735 	return rq;
736 }
737 
738 static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq)
739 {
740 	hrtimer_cancel(&rq->napi_timer);
741 
742 	if (rq->skb_queue.qlen) {
743 		local_bh_disable();
744 		dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen);
745 		local_bh_enable();
746 	}
747 
748 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
749 	kfree(rq);
750 }
751 
752 /* Queue reset mode is controlled by ns->rq_reset_mode.
753  * - normal - new NAPI new pool (old NAPI enabled when new added)
754  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
755  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
756  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
757  */
758 struct nsim_queue_mem {
759 	struct nsim_rq *rq;
760 	struct page_pool *pp;
761 };
762 
763 static int
764 nsim_queue_mem_alloc(struct net_device *dev,
765 		     struct netdev_queue_config *qcfg,
766 		     void *per_queue_mem, int idx)
767 {
768 	struct nsim_queue_mem *qmem = per_queue_mem;
769 	struct netdevsim *ns = netdev_priv(dev);
770 	int err;
771 
772 	if (ns->rq_reset_mode > 3)
773 		return -EINVAL;
774 
775 	if (ns->rq_reset_mode == 1) {
776 		if (!netif_running(ns->netdev))
777 			return -ENETDOWN;
778 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
779 	}
780 
781 	qmem->rq = nsim_queue_alloc();
782 	if (!qmem->rq)
783 		return -ENOMEM;
784 
785 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
786 	if (err)
787 		goto err_free;
788 
789 	if (!ns->rq_reset_mode)
790 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
791 					     idx);
792 
793 	return 0;
794 
795 err_free:
796 	nsim_queue_free(dev, qmem->rq);
797 	return err;
798 }
799 
800 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
801 {
802 	struct nsim_queue_mem *qmem = per_queue_mem;
803 	struct netdevsim *ns = netdev_priv(dev);
804 
805 	page_pool_destroy(qmem->pp);
806 	if (qmem->rq) {
807 		if (!ns->rq_reset_mode)
808 			netif_napi_del_locked(&qmem->rq->napi);
809 		page_pool_destroy(qmem->rq->page_pool);
810 		nsim_queue_free(dev, qmem->rq);
811 	}
812 }
813 
814 static int
815 nsim_queue_start(struct net_device *dev, struct netdev_queue_config *qcfg,
816 		 void *per_queue_mem, int idx)
817 {
818 	struct nsim_queue_mem *qmem = per_queue_mem;
819 	struct netdevsim *ns = netdev_priv(dev);
820 
821 	netdev_assert_locked(dev);
822 
823 	if (ns->rq_reset_mode == 1) {
824 		ns->rq[idx]->page_pool = qmem->pp;
825 		napi_enable_locked(&ns->rq[idx]->napi);
826 		return 0;
827 	}
828 
829 	/* netif_napi_add()/_del() should normally be called from alloc/free,
830 	 * here we want to test various call orders.
831 	 */
832 	if (ns->rq_reset_mode == 2) {
833 		netif_napi_del_locked(&ns->rq[idx]->napi);
834 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
835 					     idx);
836 	} else if (ns->rq_reset_mode == 3) {
837 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
838 					     idx);
839 		netif_napi_del_locked(&ns->rq[idx]->napi);
840 	}
841 
842 	ns->rq[idx] = qmem->rq;
843 	napi_enable_locked(&ns->rq[idx]->napi);
844 
845 	return 0;
846 }
847 
848 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
849 {
850 	struct nsim_queue_mem *qmem = per_queue_mem;
851 	struct netdevsim *ns = netdev_priv(dev);
852 
853 	netdev_assert_locked(dev);
854 
855 	napi_disable_locked(&ns->rq[idx]->napi);
856 
857 	if (ns->rq_reset_mode == 1) {
858 		qmem->pp = ns->rq[idx]->page_pool;
859 		page_pool_disable_direct_recycling(qmem->pp);
860 	} else {
861 		qmem->rq = ns->rq[idx];
862 	}
863 
864 	return 0;
865 }
866 
867 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
868 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
869 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
870 	.ndo_queue_mem_free	= nsim_queue_mem_free,
871 	.ndo_queue_start	= nsim_queue_start,
872 	.ndo_queue_stop		= nsim_queue_stop,
873 };
874 
875 static ssize_t
876 nsim_qreset_write(struct file *file, const char __user *data,
877 		  size_t count, loff_t *ppos)
878 {
879 	struct netdevsim *ns = file->private_data;
880 	unsigned int queue, mode;
881 	char buf[32];
882 	ssize_t ret;
883 
884 	if (count >= sizeof(buf))
885 		return -EINVAL;
886 	if (copy_from_user(buf, data, count))
887 		return -EFAULT;
888 	buf[count] = '\0';
889 
890 	ret = sscanf(buf, "%u %u", &queue, &mode);
891 	if (ret != 2)
892 		return -EINVAL;
893 
894 	netdev_lock(ns->netdev);
895 	if (queue >= ns->netdev->real_num_rx_queues) {
896 		ret = -EINVAL;
897 		goto exit_unlock;
898 	}
899 
900 	ns->rq_reset_mode = mode;
901 	ret = netdev_rx_queue_restart(ns->netdev, queue);
902 	ns->rq_reset_mode = 0;
903 	if (ret)
904 		goto exit_unlock;
905 
906 	ret = count;
907 exit_unlock:
908 	netdev_unlock(ns->netdev);
909 	return ret;
910 }
911 
912 static const struct file_operations nsim_qreset_fops = {
913 	.open = simple_open,
914 	.write = nsim_qreset_write,
915 	.owner = THIS_MODULE,
916 };
917 
918 static ssize_t
919 nsim_pp_hold_read(struct file *file, char __user *data,
920 		  size_t count, loff_t *ppos)
921 {
922 	struct netdevsim *ns = file->private_data;
923 	char buf[3] = "n\n";
924 
925 	if (ns->page)
926 		buf[0] = 'y';
927 
928 	return simple_read_from_buffer(data, count, ppos, buf, 2);
929 }
930 
931 static ssize_t
932 nsim_pp_hold_write(struct file *file, const char __user *data,
933 		   size_t count, loff_t *ppos)
934 {
935 	struct netdevsim *ns = file->private_data;
936 	ssize_t ret;
937 	bool val;
938 
939 	ret = kstrtobool_from_user(data, count, &val);
940 	if (ret)
941 		return ret;
942 
943 	rtnl_lock();
944 	ret = count;
945 	if (val == !!ns->page)
946 		goto exit;
947 
948 	if (!netif_running(ns->netdev) && val) {
949 		ret = -ENETDOWN;
950 	} else if (val) {
951 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
952 		if (!ns->page)
953 			ret = -ENOMEM;
954 	} else {
955 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
956 					ns->page, false);
957 		ns->page = NULL;
958 	}
959 
960 exit:
961 	rtnl_unlock();
962 	return ret;
963 }
964 
965 static const struct file_operations nsim_pp_hold_fops = {
966 	.open = simple_open,
967 	.read = nsim_pp_hold_read,
968 	.write = nsim_pp_hold_write,
969 	.llseek = generic_file_llseek,
970 	.owner = THIS_MODULE,
971 };
972 
973 static void nsim_setup(struct net_device *dev)
974 {
975 	ether_setup(dev);
976 	eth_hw_addr_random(dev);
977 
978 	dev->flags &= ~IFF_MULTICAST;
979 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
980 	dev->features |= NETIF_F_HIGHDMA |
981 			 NETIF_F_SG |
982 			 NETIF_F_FRAGLIST |
983 			 NETIF_F_HW_CSUM |
984 			 NETIF_F_LRO |
985 			 NETIF_F_TSO;
986 	dev->hw_features |= NETIF_F_HW_TC |
987 			    NETIF_F_SG |
988 			    NETIF_F_FRAGLIST |
989 			    NETIF_F_HW_CSUM |
990 			    NETIF_F_LRO |
991 			    NETIF_F_TSO |
992 			    NETIF_F_LOOPBACK;
993 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
994 	dev->max_mtu = ETH_MAX_MTU;
995 	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD;
996 }
997 
998 static int nsim_queue_init(struct netdevsim *ns)
999 {
1000 	struct net_device *dev = ns->netdev;
1001 	int i;
1002 
1003 	ns->rq = kzalloc_objs(*ns->rq, dev->num_rx_queues, GFP_KERNEL_ACCOUNT);
1004 	if (!ns->rq)
1005 		return -ENOMEM;
1006 
1007 	for (i = 0; i < dev->num_rx_queues; i++) {
1008 		ns->rq[i] = nsim_queue_alloc();
1009 		if (!ns->rq[i])
1010 			goto err_free_prev;
1011 	}
1012 
1013 	return 0;
1014 
1015 err_free_prev:
1016 	while (i--)
1017 		kfree(ns->rq[i]);
1018 	kfree(ns->rq);
1019 	return -ENOMEM;
1020 }
1021 
1022 static void nsim_queue_uninit(struct netdevsim *ns)
1023 {
1024 	struct net_device *dev = ns->netdev;
1025 	int i;
1026 
1027 	for (i = 0; i < dev->num_rx_queues; i++)
1028 		nsim_queue_free(dev, ns->rq[i]);
1029 
1030 	kfree(ns->rq);
1031 	ns->rq = NULL;
1032 }
1033 
1034 static int nsim_init_netdevsim(struct netdevsim *ns)
1035 {
1036 	struct netdevsim *peer;
1037 	struct mock_phc *phc;
1038 	int err;
1039 
1040 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
1041 	if (IS_ERR(phc))
1042 		return PTR_ERR(phc);
1043 
1044 	ns->phc = phc;
1045 	ns->netdev->netdev_ops = &nsim_netdev_ops;
1046 	ns->netdev->stat_ops = &nsim_stat_ops;
1047 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
1048 	netdev_lockdep_set_classes(ns->netdev);
1049 
1050 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
1051 	if (err)
1052 		goto err_phc_destroy;
1053 
1054 	rtnl_lock();
1055 	err = nsim_queue_init(ns);
1056 	if (err)
1057 		goto err_utn_destroy;
1058 
1059 	err = nsim_bpf_init(ns);
1060 	if (err)
1061 		goto err_rq_destroy;
1062 
1063 	nsim_macsec_init(ns);
1064 	nsim_ipsec_init(ns);
1065 
1066 	err = register_netdevice(ns->netdev);
1067 	if (err)
1068 		goto err_ipsec_teardown;
1069 	rtnl_unlock();
1070 
1071 	err = nsim_psp_init(ns);
1072 	if (err)
1073 		goto err_unregister_netdev;
1074 
1075 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
1076 		ns->nb.notifier_call = netdev_debug_event;
1077 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1078 							&ns->nn))
1079 			ns->nb.notifier_call = NULL;
1080 	}
1081 
1082 	return 0;
1083 
1084 err_unregister_netdev:
1085 	rtnl_lock();
1086 	peer = rtnl_dereference(ns->peer);
1087 	if (peer)
1088 		RCU_INIT_POINTER(peer->peer, NULL);
1089 	RCU_INIT_POINTER(ns->peer, NULL);
1090 	unregister_netdevice(ns->netdev);
1091 err_ipsec_teardown:
1092 	nsim_ipsec_teardown(ns);
1093 	nsim_macsec_teardown(ns);
1094 	nsim_bpf_uninit(ns);
1095 err_rq_destroy:
1096 	nsim_queue_uninit(ns);
1097 err_utn_destroy:
1098 	rtnl_unlock();
1099 	nsim_udp_tunnels_info_destroy(ns->netdev);
1100 err_phc_destroy:
1101 	mock_phc_destroy(ns->phc);
1102 	return err;
1103 }
1104 
1105 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
1106 {
1107 	int err;
1108 
1109 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
1110 	rtnl_lock();
1111 	err = register_netdevice(ns->netdev);
1112 	rtnl_unlock();
1113 	return err;
1114 }
1115 
1116 static void nsim_exit_netdevsim(struct netdevsim *ns)
1117 {
1118 	nsim_udp_tunnels_info_destroy(ns->netdev);
1119 	mock_phc_destroy(ns->phc);
1120 }
1121 
1122 struct netdevsim *nsim_create(struct nsim_dev *nsim_dev,
1123 			      struct nsim_dev_port *nsim_dev_port,
1124 			      u8 perm_addr[ETH_ALEN])
1125 {
1126 	struct net_device *dev;
1127 	struct netdevsim *ns;
1128 	int err;
1129 
1130 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1131 			      nsim_dev->nsim_bus_dev->num_queues);
1132 	if (!dev)
1133 		return ERR_PTR(-ENOMEM);
1134 
1135 	if (perm_addr)
1136 		memcpy(dev->perm_addr, perm_addr, ETH_ALEN);
1137 
1138 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1139 	ns = netdev_priv(dev);
1140 	ns->netdev = dev;
1141 	ns->nsim_dev = nsim_dev;
1142 	ns->nsim_dev_port = nsim_dev_port;
1143 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1144 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1145 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1146 	nsim_ethtool_init(ns);
1147 	if (nsim_dev_port_is_pf(nsim_dev_port))
1148 		err = nsim_init_netdevsim(ns);
1149 	else
1150 		err = nsim_init_netdevsim_vf(ns);
1151 	if (err)
1152 		goto err_free_netdev;
1153 
1154 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1155 					 ns, &nsim_pp_hold_fops);
1156 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1157 					 nsim_dev_port->ddir, ns,
1158 					 &nsim_qreset_fops);
1159 	return ns;
1160 
1161 err_free_netdev:
1162 	free_netdev(dev);
1163 	return ERR_PTR(err);
1164 }
1165 
1166 void nsim_destroy(struct netdevsim *ns)
1167 {
1168 	struct net_device *dev = ns->netdev;
1169 	struct netdevsim *peer;
1170 
1171 	debugfs_remove(ns->qr_dfs);
1172 	debugfs_remove(ns->pp_dfs);
1173 
1174 	if (ns->nb.notifier_call)
1175 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1176 						      &ns->nn);
1177 
1178 	nsim_psp_uninit(ns);
1179 
1180 	rtnl_lock();
1181 	peer = rtnl_dereference(ns->peer);
1182 	if (peer)
1183 		RCU_INIT_POINTER(peer->peer, NULL);
1184 	RCU_INIT_POINTER(ns->peer, NULL);
1185 	unregister_netdevice(dev);
1186 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1187 		nsim_macsec_teardown(ns);
1188 		nsim_ipsec_teardown(ns);
1189 		nsim_bpf_uninit(ns);
1190 		nsim_queue_uninit(ns);
1191 	}
1192 	rtnl_unlock();
1193 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1194 		nsim_exit_netdevsim(ns);
1195 
1196 	/* Put this intentionally late to exercise the orphaning path */
1197 	if (ns->page) {
1198 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
1199 					ns->page, false);
1200 		ns->page = NULL;
1201 	}
1202 
1203 	free_netdev(dev);
1204 }
1205 
1206 bool netdev_is_nsim(struct net_device *dev)
1207 {
1208 	return dev->netdev_ops == &nsim_netdev_ops;
1209 }
1210 
1211 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1212 			 struct netlink_ext_ack *extack)
1213 {
1214 	NL_SET_ERR_MSG_MOD(extack,
1215 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1216 	return -EOPNOTSUPP;
1217 }
1218 
1219 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1220 	.kind		= DRV_NAME,
1221 	.validate	= nsim_validate,
1222 };
1223 
1224 static int __init nsim_module_init(void)
1225 {
1226 	int err;
1227 
1228 	err = nsim_dev_init();
1229 	if (err)
1230 		return err;
1231 
1232 	err = nsim_bus_init();
1233 	if (err)
1234 		goto err_dev_exit;
1235 
1236 	err = rtnl_link_register(&nsim_link_ops);
1237 	if (err)
1238 		goto err_bus_exit;
1239 
1240 	return 0;
1241 
1242 err_bus_exit:
1243 	nsim_bus_exit();
1244 err_dev_exit:
1245 	nsim_dev_exit();
1246 	return err;
1247 }
1248 
1249 static void __exit nsim_module_exit(void)
1250 {
1251 	rtnl_link_unregister(&nsim_link_ops);
1252 	nsim_bus_exit();
1253 	nsim_dev_exit();
1254 }
1255 
1256 module_init(nsim_module_init);
1257 module_exit(nsim_module_exit);
1258 MODULE_LICENSE("GPL");
1259 MODULE_DESCRIPTION("Simulated networking device for testing");
1260 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1261