xref: /linux/net/core/netdev-genl.c (revision 65d657d806848add1e1f0632562d7f47d5d5c188)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/netdevice.h>
4 #include <linux/notifier.h>
5 #include <linux/rtnetlink.h>
6 #include <net/busy_poll.h>
7 #include <net/net_namespace.h>
8 #include <net/netdev_queues.h>
9 #include <net/netdev_rx_queue.h>
10 #include <net/sock.h>
11 #include <net/xdp.h>
12 #include <net/xdp_sock.h>
13 #include <net/page_pool/memory_provider.h>
14 
15 #include "dev.h"
16 #include "devmem.h"
17 #include "netdev-genl-gen.h"
18 
19 struct netdev_nl_dump_ctx {
20 	unsigned long	ifindex;
21 	unsigned int	rxq_idx;
22 	unsigned int	txq_idx;
23 	unsigned int	napi_id;
24 };
25 
26 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
27 {
28 	NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
29 
30 	return (struct netdev_nl_dump_ctx *)cb->ctx;
31 }
32 
33 static int
34 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
35 		   const struct genl_info *info)
36 {
37 	u64 xsk_features = 0;
38 	u64 xdp_rx_meta = 0;
39 	void *hdr;
40 
41 	netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
42 
43 	hdr = genlmsg_iput(rsp, info);
44 	if (!hdr)
45 		return -EMSGSIZE;
46 
47 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \
48 	if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
49 		xdp_rx_meta |= flag;
50 XDP_METADATA_KFUNC_xxx
51 #undef XDP_METADATA_KFUNC
52 
53 	if (netdev->xsk_tx_metadata_ops) {
54 		if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
55 			xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
56 		if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
57 			xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
58 		if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time)
59 			xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO;
60 	}
61 
62 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
63 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
64 			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
65 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
66 			      xdp_rx_meta, NETDEV_A_DEV_PAD) ||
67 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
68 			      xsk_features, NETDEV_A_DEV_PAD))
69 		goto err_cancel_msg;
70 
71 	if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
72 		if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
73 				netdev->xdp_zc_max_segs))
74 			goto err_cancel_msg;
75 	}
76 
77 	genlmsg_end(rsp, hdr);
78 
79 	return 0;
80 
81 err_cancel_msg:
82 	genlmsg_cancel(rsp, hdr);
83 	return -EMSGSIZE;
84 }
85 
86 static void
87 netdev_genl_dev_notify(struct net_device *netdev, int cmd)
88 {
89 	struct genl_info info;
90 	struct sk_buff *ntf;
91 
92 	if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
93 				NETDEV_NLGRP_MGMT))
94 		return;
95 
96 	genl_info_init_ntf(&info, &netdev_nl_family, cmd);
97 
98 	ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
99 	if (!ntf)
100 		return;
101 
102 	if (netdev_nl_dev_fill(netdev, ntf, &info)) {
103 		nlmsg_free(ntf);
104 		return;
105 	}
106 
107 	genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
108 				0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
109 }
110 
111 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
112 {
113 	struct net_device *netdev;
114 	struct sk_buff *rsp;
115 	u32 ifindex;
116 	int err;
117 
118 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
119 		return -EINVAL;
120 
121 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
122 
123 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
124 	if (!rsp)
125 		return -ENOMEM;
126 
127 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
128 	if (!netdev) {
129 		err = -ENODEV;
130 		goto err_free_msg;
131 	}
132 
133 	err = netdev_nl_dev_fill(netdev, rsp, info);
134 	netdev_unlock(netdev);
135 
136 	if (err)
137 		goto err_free_msg;
138 
139 	return genlmsg_reply(rsp, info);
140 
141 err_free_msg:
142 	nlmsg_free(rsp);
143 	return err;
144 }
145 
146 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
147 {
148 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
149 	struct net *net = sock_net(skb->sk);
150 	int err;
151 
152 	for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
153 		err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
154 		if (err < 0)
155 			return err;
156 	}
157 
158 	return 0;
159 }
160 
161 static int
162 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
163 			const struct genl_info *info)
164 {
165 	unsigned long irq_suspend_timeout;
166 	unsigned long gro_flush_timeout;
167 	u32 napi_defer_hard_irqs;
168 	void *hdr;
169 	pid_t pid;
170 
171 	if (!napi->dev->up)
172 		return 0;
173 
174 	hdr = genlmsg_iput(rsp, info);
175 	if (!hdr)
176 		return -EMSGSIZE;
177 
178 	if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
179 		goto nla_put_failure;
180 
181 	if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
182 		goto nla_put_failure;
183 
184 	if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
185 		goto nla_put_failure;
186 
187 	if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
188 			 napi_get_threaded(napi)))
189 		goto nla_put_failure;
190 
191 	if (napi->thread) {
192 		pid = task_pid_nr(napi->thread);
193 		if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
194 			goto nla_put_failure;
195 	}
196 
197 	napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
198 	if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
199 			napi_defer_hard_irqs))
200 		goto nla_put_failure;
201 
202 	irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
203 	if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
204 			 irq_suspend_timeout))
205 		goto nla_put_failure;
206 
207 	gro_flush_timeout = napi_get_gro_flush_timeout(napi);
208 	if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
209 			 gro_flush_timeout))
210 		goto nla_put_failure;
211 
212 	genlmsg_end(rsp, hdr);
213 
214 	return 0;
215 
216 nla_put_failure:
217 	genlmsg_cancel(rsp, hdr);
218 	return -EMSGSIZE;
219 }
220 
221 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
222 {
223 	struct napi_struct *napi;
224 	struct sk_buff *rsp;
225 	u32 napi_id;
226 	int err;
227 
228 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
229 		return -EINVAL;
230 
231 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
232 
233 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
234 	if (!rsp)
235 		return -ENOMEM;
236 
237 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
238 	if (napi) {
239 		err = netdev_nl_napi_fill_one(rsp, napi, info);
240 		netdev_unlock(napi->dev);
241 	} else {
242 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
243 		err = -ENOENT;
244 	}
245 
246 	if (err) {
247 		goto err_free_msg;
248 	} else if (!rsp->len) {
249 		err = -ENOENT;
250 		goto err_free_msg;
251 	}
252 
253 	return genlmsg_reply(rsp, info);
254 
255 err_free_msg:
256 	nlmsg_free(rsp);
257 	return err;
258 }
259 
260 static int
261 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
262 			const struct genl_info *info,
263 			struct netdev_nl_dump_ctx *ctx)
264 {
265 	struct napi_struct *napi;
266 	unsigned int prev_id;
267 	int err = 0;
268 
269 	if (!netdev->up)
270 		return err;
271 
272 	prev_id = UINT_MAX;
273 	list_for_each_entry(napi, &netdev->napi_list, dev_list) {
274 		if (!napi_id_valid(napi->napi_id))
275 			continue;
276 
277 		/* Dump continuation below depends on the list being sorted */
278 		WARN_ON_ONCE(napi->napi_id >= prev_id);
279 		prev_id = napi->napi_id;
280 
281 		if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
282 			continue;
283 
284 		err = netdev_nl_napi_fill_one(rsp, napi, info);
285 		if (err)
286 			return err;
287 		ctx->napi_id = napi->napi_id;
288 	}
289 	return err;
290 }
291 
292 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
293 {
294 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
295 	const struct genl_info *info = genl_info_dump(cb);
296 	struct net *net = sock_net(skb->sk);
297 	struct net_device *netdev;
298 	u32 ifindex = 0;
299 	int err = 0;
300 
301 	if (info->attrs[NETDEV_A_NAPI_IFINDEX])
302 		ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
303 
304 	if (ifindex) {
305 		netdev = netdev_get_by_index_lock(net, ifindex);
306 		if (netdev) {
307 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
308 			netdev_unlock(netdev);
309 		} else {
310 			err = -ENODEV;
311 		}
312 	} else {
313 		for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
314 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
315 			if (err < 0)
316 				break;
317 			ctx->napi_id = 0;
318 		}
319 	}
320 
321 	return err;
322 }
323 
324 static int
325 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
326 {
327 	u64 irq_suspend_timeout = 0;
328 	u64 gro_flush_timeout = 0;
329 	u8 threaded = 0;
330 	u32 defer = 0;
331 
332 	if (info->attrs[NETDEV_A_NAPI_THREADED]) {
333 		int ret;
334 
335 		threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
336 		ret = napi_set_threaded(napi, threaded);
337 		if (ret)
338 			return ret;
339 	}
340 
341 	if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
342 		defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
343 		napi_set_defer_hard_irqs(napi, defer);
344 	}
345 
346 	if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
347 		irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
348 		napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
349 	}
350 
351 	if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
352 		gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
353 		napi_set_gro_flush_timeout(napi, gro_flush_timeout);
354 	}
355 
356 	return 0;
357 }
358 
359 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
360 {
361 	struct napi_struct *napi;
362 	unsigned int napi_id;
363 	int err;
364 
365 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
366 		return -EINVAL;
367 
368 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
369 
370 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
371 	if (napi) {
372 		err = netdev_nl_napi_set_config(napi, info);
373 		netdev_unlock(napi->dev);
374 	} else {
375 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
376 		err = -ENOENT;
377 	}
378 
379 	return err;
380 }
381 
382 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
383 {
384 	if (napi && napi_id_valid(napi->napi_id))
385 		return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
386 	return 0;
387 }
388 
389 static int
390 netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev,
391 			   u32 q_idx, u32 q_type)
392 {
393 	struct net_device *orig_netdev = netdev;
394 	struct nlattr *nest_lease, *nest_queue;
395 	struct netdev_rx_queue *rxq;
396 	struct net *net, *peer_net;
397 
398 	rxq = __netif_get_rx_queue_lease(&netdev, &q_idx,
399 					 NETIF_PHYS_TO_VIRT);
400 	if (!rxq || orig_netdev == netdev)
401 		return 0;
402 
403 	nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
404 	if (!nest_lease)
405 		goto nla_put_failure;
406 
407 	nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
408 	if (!nest_queue)
409 		goto nla_put_failure;
410 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx))
411 		goto nla_put_failure;
412 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
413 		goto nla_put_failure;
414 	nla_nest_end(rsp, nest_queue);
415 
416 	if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
417 			READ_ONCE(netdev->ifindex)))
418 		goto nla_put_failure;
419 
420 	rcu_read_lock();
421 	peer_net = dev_net_rcu(netdev);
422 	net = dev_net_rcu(orig_netdev);
423 	if (!net_eq(net, peer_net)) {
424 		s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
425 
426 		if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
427 			goto nla_put_failure_unlock;
428 	}
429 	rcu_read_unlock();
430 	nla_nest_end(rsp, nest_lease);
431 	return 0;
432 
433 nla_put_failure_unlock:
434 	rcu_read_unlock();
435 nla_put_failure:
436 	return -ENOMEM;
437 }
438 
439 static int
440 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
441 			 u32 q_idx, u32 q_type, const struct genl_info *info)
442 {
443 	struct pp_memory_provider_params *params;
444 	struct net_device *orig_netdev = netdev;
445 	struct netdev_rx_queue *rxq, *rxq_lease;
446 	struct netdev_queue *txq;
447 	void *hdr;
448 
449 	hdr = genlmsg_iput(rsp, info);
450 	if (!hdr)
451 		return -EMSGSIZE;
452 
453 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
454 	    nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
455 	    nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
456 		goto nla_put_failure;
457 
458 	switch (q_type) {
459 	case NETDEV_QUEUE_TYPE_RX:
460 		rxq = __netif_get_rx_queue(netdev, q_idx);
461 		if (nla_put_napi_id(rsp, rxq->napi))
462 			goto nla_put_failure;
463 		if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type))
464 			goto nla_put_failure;
465 
466 		rxq_lease = netif_get_rx_queue_lease_locked(&netdev, &q_idx);
467 		if (rxq_lease)
468 			rxq = rxq_lease;
469 		params = &rxq->mp_params;
470 		if (params->mp_ops &&
471 		    params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
472 			goto nla_put_failure_lease;
473 #ifdef CONFIG_XDP_SOCKETS
474 		if (rxq->pool)
475 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
476 				goto nla_put_failure_lease;
477 #endif
478 		netif_put_rx_queue_lease_locked(orig_netdev, netdev);
479 		break;
480 	case NETDEV_QUEUE_TYPE_TX:
481 		txq = netdev_get_tx_queue(netdev, q_idx);
482 		if (nla_put_napi_id(rsp, txq->napi))
483 			goto nla_put_failure;
484 #ifdef CONFIG_XDP_SOCKETS
485 		if (txq->pool)
486 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
487 				goto nla_put_failure;
488 #endif
489 		break;
490 	}
491 
492 	genlmsg_end(rsp, hdr);
493 
494 	return 0;
495 
496 nla_put_failure_lease:
497 	netif_put_rx_queue_lease_locked(orig_netdev, netdev);
498 nla_put_failure:
499 	genlmsg_cancel(rsp, hdr);
500 	return -EMSGSIZE;
501 }
502 
503 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
504 				    u32 q_type)
505 {
506 	switch (q_type) {
507 	case NETDEV_QUEUE_TYPE_RX:
508 		if (q_id >= netdev->real_num_rx_queues)
509 			return -EINVAL;
510 		return 0;
511 	case NETDEV_QUEUE_TYPE_TX:
512 		if (q_id >= netdev->real_num_tx_queues)
513 			return -EINVAL;
514 	}
515 	return 0;
516 }
517 
518 static int
519 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
520 		     u32 q_type, const struct genl_info *info)
521 {
522 	int err;
523 
524 	if (!netdev->up)
525 		return -ENOENT;
526 
527 	err = netdev_nl_queue_validate(netdev, q_idx, q_type);
528 	if (err)
529 		return err;
530 
531 	return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
532 }
533 
534 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
535 {
536 	u32 q_id, q_type, ifindex;
537 	struct net_device *netdev;
538 	struct sk_buff *rsp;
539 	int err;
540 
541 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
542 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
543 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
544 		return -EINVAL;
545 
546 	q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
547 	q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
548 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
549 
550 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
551 	if (!rsp)
552 		return -ENOMEM;
553 
554 	netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
555 						     ifindex);
556 	if (netdev) {
557 		err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
558 		netdev_unlock_ops_compat(netdev);
559 	} else {
560 		err = -ENODEV;
561 	}
562 
563 	if (err)
564 		goto err_free_msg;
565 
566 	return genlmsg_reply(rsp, info);
567 
568 err_free_msg:
569 	nlmsg_free(rsp);
570 	return err;
571 }
572 
573 static int
574 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
575 			 const struct genl_info *info,
576 			 struct netdev_nl_dump_ctx *ctx)
577 {
578 	int err = 0;
579 
580 	if (!netdev->up)
581 		return err;
582 
583 	for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
584 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
585 					       NETDEV_QUEUE_TYPE_RX, info);
586 		if (err)
587 			return err;
588 	}
589 	for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
590 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
591 					       NETDEV_QUEUE_TYPE_TX, info);
592 		if (err)
593 			return err;
594 	}
595 
596 	return err;
597 }
598 
599 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
600 {
601 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
602 	const struct genl_info *info = genl_info_dump(cb);
603 	struct net *net = sock_net(skb->sk);
604 	struct net_device *netdev;
605 	u32 ifindex = 0;
606 	int err = 0;
607 
608 	if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
609 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
610 
611 	if (ifindex) {
612 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
613 		if (netdev) {
614 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
615 			netdev_unlock_ops_compat(netdev);
616 		} else {
617 			err = -ENODEV;
618 		}
619 	} else {
620 		for_each_netdev_lock_ops_compat_scoped(net, netdev,
621 						       ctx->ifindex) {
622 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
623 			if (err < 0)
624 				break;
625 			ctx->rxq_idx = 0;
626 			ctx->txq_idx = 0;
627 		}
628 	}
629 
630 	return err;
631 }
632 
633 #define NETDEV_STAT_NOT_SET		(~0ULL)
634 
635 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
636 {
637 	const u64 *add = _add;
638 	u64 *sum = _sum;
639 
640 	while (size) {
641 		if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
642 			*sum += *add;
643 		sum++;
644 		add++;
645 		size -= 8;
646 	}
647 }
648 
649 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
650 {
651 	if (value == NETDEV_STAT_NOT_SET)
652 		return 0;
653 	return nla_put_uint(rsp, attr_id, value);
654 }
655 
656 static int
657 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
658 {
659 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
660 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
661 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
662 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
663 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
664 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
665 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
666 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
667 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
668 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
669 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
670 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
671 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
672 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
673 		return -EMSGSIZE;
674 	return 0;
675 }
676 
677 static int
678 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
679 {
680 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
681 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
682 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
683 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
684 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
685 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
686 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
687 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
688 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
689 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
690 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
691 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
692 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
693 		return -EMSGSIZE;
694 	return 0;
695 }
696 
697 static int
698 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
699 		      u32 q_type, int i, const struct genl_info *info)
700 {
701 	const struct netdev_stat_ops *ops = netdev->stat_ops;
702 	struct netdev_queue_stats_rx rx;
703 	struct netdev_queue_stats_tx tx;
704 	void *hdr;
705 
706 	hdr = genlmsg_iput(rsp, info);
707 	if (!hdr)
708 		return -EMSGSIZE;
709 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
710 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
711 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
712 		goto nla_put_failure;
713 
714 	switch (q_type) {
715 	case NETDEV_QUEUE_TYPE_RX:
716 		memset(&rx, 0xff, sizeof(rx));
717 		ops->get_queue_stats_rx(netdev, i, &rx);
718 		if (!memchr_inv(&rx, 0xff, sizeof(rx)))
719 			goto nla_cancel;
720 		if (netdev_nl_stats_write_rx(rsp, &rx))
721 			goto nla_put_failure;
722 		break;
723 	case NETDEV_QUEUE_TYPE_TX:
724 		memset(&tx, 0xff, sizeof(tx));
725 		ops->get_queue_stats_tx(netdev, i, &tx);
726 		if (!memchr_inv(&tx, 0xff, sizeof(tx)))
727 			goto nla_cancel;
728 		if (netdev_nl_stats_write_tx(rsp, &tx))
729 			goto nla_put_failure;
730 		break;
731 	}
732 
733 	genlmsg_end(rsp, hdr);
734 	return 0;
735 
736 nla_cancel:
737 	genlmsg_cancel(rsp, hdr);
738 	return 0;
739 nla_put_failure:
740 	genlmsg_cancel(rsp, hdr);
741 	return -EMSGSIZE;
742 }
743 
744 static int
745 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
746 			 const struct genl_info *info,
747 			 struct netdev_nl_dump_ctx *ctx)
748 {
749 	const struct netdev_stat_ops *ops = netdev->stat_ops;
750 	int i, err;
751 
752 	if (!(netdev->flags & IFF_UP))
753 		return 0;
754 
755 	i = ctx->rxq_idx;
756 	while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
757 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
758 					    i, info);
759 		if (err)
760 			return err;
761 		ctx->rxq_idx = ++i;
762 	}
763 	i = ctx->txq_idx;
764 	while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
765 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
766 					    i, info);
767 		if (err)
768 			return err;
769 		ctx->txq_idx = ++i;
770 	}
771 
772 	ctx->rxq_idx = 0;
773 	ctx->txq_idx = 0;
774 	return 0;
775 }
776 
777 /**
778  * netdev_stat_queue_sum() - add up queue stats from range of queues
779  * @netdev:	net_device
780  * @rx_start:	index of the first Rx queue to query
781  * @rx_end:	index after the last Rx queue (first *not* to query)
782  * @rx_sum:	output Rx stats, should be already initialized
783  * @tx_start:	index of the first Tx queue to query
784  * @tx_end:	index after the last Tx queue (first *not* to query)
785  * @tx_sum:	output Tx stats, should be already initialized
786  *
787  * Add stats from [start, end) range of queue IDs to *x_sum structs.
788  * The sum structs must be already initialized. Usually this
789  * helper is invoked from the .get_base_stats callbacks of drivers
790  * to account for stats of disabled queues. In that case the ranges
791  * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
792  */
793 void netdev_stat_queue_sum(struct net_device *netdev,
794 			   int rx_start, int rx_end,
795 			   struct netdev_queue_stats_rx *rx_sum,
796 			   int tx_start, int tx_end,
797 			   struct netdev_queue_stats_tx *tx_sum)
798 {
799 	const struct netdev_stat_ops *ops;
800 	struct netdev_queue_stats_rx rx;
801 	struct netdev_queue_stats_tx tx;
802 	int i;
803 
804 	ops = netdev->stat_ops;
805 
806 	for (i = rx_start; i < rx_end; i++) {
807 		memset(&rx, 0xff, sizeof(rx));
808 		if (ops->get_queue_stats_rx)
809 			ops->get_queue_stats_rx(netdev, i, &rx);
810 		netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
811 	}
812 	for (i = tx_start; i < tx_end; i++) {
813 		memset(&tx, 0xff, sizeof(tx));
814 		if (ops->get_queue_stats_tx)
815 			ops->get_queue_stats_tx(netdev, i, &tx);
816 		netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
817 	}
818 }
819 EXPORT_SYMBOL(netdev_stat_queue_sum);
820 
821 static int
822 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
823 			  const struct genl_info *info)
824 {
825 	struct netdev_queue_stats_rx rx_sum;
826 	struct netdev_queue_stats_tx tx_sum;
827 	void *hdr;
828 
829 	/* Netdev can't guarantee any complete counters */
830 	if (!netdev->stat_ops->get_base_stats)
831 		return 0;
832 
833 	memset(&rx_sum, 0xff, sizeof(rx_sum));
834 	memset(&tx_sum, 0xff, sizeof(tx_sum));
835 
836 	netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
837 
838 	/* The op was there, but nothing reported, don't bother */
839 	if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
840 	    !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
841 		return 0;
842 
843 	hdr = genlmsg_iput(rsp, info);
844 	if (!hdr)
845 		return -EMSGSIZE;
846 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
847 		goto nla_put_failure;
848 
849 	netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
850 			      0, netdev->real_num_tx_queues, &tx_sum);
851 
852 	if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
853 	    netdev_nl_stats_write_tx(rsp, &tx_sum))
854 		goto nla_put_failure;
855 
856 	genlmsg_end(rsp, hdr);
857 	return 0;
858 
859 nla_put_failure:
860 	genlmsg_cancel(rsp, hdr);
861 	return -EMSGSIZE;
862 }
863 
864 static int
865 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
866 			      struct sk_buff *skb, const struct genl_info *info,
867 			      struct netdev_nl_dump_ctx *ctx)
868 {
869 	if (!netdev->stat_ops)
870 		return 0;
871 
872 	switch (scope) {
873 	case 0:
874 		return netdev_nl_stats_by_netdev(netdev, skb, info);
875 	case NETDEV_QSTATS_SCOPE_QUEUE:
876 		return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
877 	}
878 
879 	return -EINVAL;	/* Should not happen, per netlink policy */
880 }
881 
882 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
883 				struct netlink_callback *cb)
884 {
885 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
886 	const struct genl_info *info = genl_info_dump(cb);
887 	struct net *net = sock_net(skb->sk);
888 	struct net_device *netdev;
889 	unsigned int ifindex;
890 	unsigned int scope;
891 	int err = 0;
892 
893 	scope = 0;
894 	if (info->attrs[NETDEV_A_QSTATS_SCOPE])
895 		scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
896 
897 	ifindex = 0;
898 	if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
899 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
900 
901 	if (ifindex) {
902 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
903 		if (!netdev) {
904 			NL_SET_BAD_ATTR(info->extack,
905 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
906 			return -ENODEV;
907 		}
908 		if (netdev->stat_ops) {
909 			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
910 							    info, ctx);
911 		} else {
912 			NL_SET_BAD_ATTR(info->extack,
913 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
914 			err = -EOPNOTSUPP;
915 		}
916 		netdev_unlock_ops_compat(netdev);
917 		return err;
918 	}
919 
920 	for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
921 		err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
922 						    info, ctx);
923 		if (err < 0)
924 			break;
925 	}
926 
927 	return err;
928 }
929 
930 static int netdev_nl_read_rxq_bitmap(struct genl_info *info,
931 				     u32 rxq_bitmap_len,
932 				     unsigned long *rxq_bitmap)
933 {
934 	const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
935 	struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
936 	struct nlattr *attr;
937 	int rem, err = 0;
938 	u32 rxq_idx;
939 
940 	nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
941 			       genlmsg_data(info->genlhdr),
942 			       genlmsg_len(info->genlhdr), rem) {
943 		err = nla_parse_nested(tb, maxtype, attr,
944 				       netdev_queue_id_nl_policy, info->extack);
945 		if (err < 0)
946 			return err;
947 
948 		if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
949 		    NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE))
950 			return -EINVAL;
951 
952 		if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
953 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
954 			return -EINVAL;
955 		}
956 
957 		rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
958 		if (rxq_idx >= rxq_bitmap_len) {
959 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]);
960 			return -EINVAL;
961 		}
962 
963 		bitmap_set(rxq_bitmap, rxq_idx, 1);
964 	}
965 
966 	return 0;
967 }
968 
969 static struct device *
970 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
971 		      struct netlink_ext_ack *extack)
972 {
973 	struct device *dma_dev = NULL;
974 	u32 rxq_idx, prev_rxq_idx;
975 
976 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
977 		struct device *rxq_dma_dev;
978 
979 		rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx,
980 						       NETDEV_QUEUE_TYPE_RX);
981 		if (dma_dev && rxq_dma_dev != dma_dev) {
982 			NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
983 					   rxq_idx, prev_rxq_idx);
984 			return ERR_PTR(-EOPNOTSUPP);
985 		}
986 
987 		dma_dev = rxq_dma_dev;
988 		prev_rxq_idx = rxq_idx;
989 	}
990 
991 	return dma_dev;
992 }
993 
994 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
995 {
996 	struct net_devmem_dmabuf_binding *binding;
997 	u32 ifindex, dmabuf_fd, rxq_idx;
998 	struct netdev_nl_sock *priv;
999 	struct net_device *netdev;
1000 	unsigned long *rxq_bitmap;
1001 	struct device *dma_dev;
1002 	struct sk_buff *rsp;
1003 	int err = 0;
1004 	void *hdr;
1005 
1006 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1007 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
1008 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
1009 		return -EINVAL;
1010 
1011 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1012 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1013 
1014 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1015 	if (IS_ERR(priv))
1016 		return PTR_ERR(priv);
1017 
1018 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1019 	if (!rsp)
1020 		return -ENOMEM;
1021 
1022 	hdr = genlmsg_iput(rsp, info);
1023 	if (!hdr) {
1024 		err = -EMSGSIZE;
1025 		goto err_genlmsg_free;
1026 	}
1027 
1028 	mutex_lock(&priv->lock);
1029 
1030 	err = 0;
1031 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1032 	if (!netdev) {
1033 		err = -ENODEV;
1034 		goto err_unlock_sock;
1035 	}
1036 	if (!netif_device_present(netdev))
1037 		err = -ENODEV;
1038 	else if (!netdev_need_ops_lock(netdev))
1039 		err = -EOPNOTSUPP;
1040 	if (err) {
1041 		NL_SET_BAD_ATTR(info->extack,
1042 				info->attrs[NETDEV_A_DEV_IFINDEX]);
1043 		goto err_unlock;
1044 	}
1045 
1046 	rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL);
1047 	if (!rxq_bitmap) {
1048 		err = -ENOMEM;
1049 		goto err_unlock;
1050 	}
1051 
1052 	err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues,
1053 					rxq_bitmap);
1054 	if (err)
1055 		goto err_rxq_bitmap;
1056 
1057 	dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack);
1058 	if (IS_ERR(dma_dev)) {
1059 		err = PTR_ERR(dma_dev);
1060 		goto err_rxq_bitmap;
1061 	}
1062 
1063 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE,
1064 					 dmabuf_fd, priv, info->extack);
1065 	if (IS_ERR(binding)) {
1066 		err = PTR_ERR(binding);
1067 		goto err_rxq_bitmap;
1068 	}
1069 
1070 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
1071 		err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
1072 						      info->extack);
1073 		if (err)
1074 			goto err_unbind;
1075 	}
1076 
1077 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1078 	genlmsg_end(rsp, hdr);
1079 
1080 	err = genlmsg_reply(rsp, info);
1081 	if (err)
1082 		goto err_unbind;
1083 
1084 	bitmap_free(rxq_bitmap);
1085 
1086 	netdev_unlock(netdev);
1087 
1088 	mutex_unlock(&priv->lock);
1089 
1090 	return 0;
1091 
1092 err_unbind:
1093 	net_devmem_unbind_dmabuf(binding);
1094 err_rxq_bitmap:
1095 	bitmap_free(rxq_bitmap);
1096 err_unlock:
1097 	netdev_unlock(netdev);
1098 err_unlock_sock:
1099 	mutex_unlock(&priv->lock);
1100 err_genlmsg_free:
1101 	nlmsg_free(rsp);
1102 	return err;
1103 }
1104 
1105 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
1106 {
1107 	struct net_devmem_dmabuf_binding *binding;
1108 	struct netdev_nl_sock *priv;
1109 	struct net_device *netdev;
1110 	struct device *dma_dev;
1111 	u32 ifindex, dmabuf_fd;
1112 	struct sk_buff *rsp;
1113 	int err = 0;
1114 	void *hdr;
1115 
1116 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1117 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
1118 		return -EINVAL;
1119 
1120 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1121 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1122 
1123 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1124 	if (IS_ERR(priv))
1125 		return PTR_ERR(priv);
1126 
1127 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1128 	if (!rsp)
1129 		return -ENOMEM;
1130 
1131 	hdr = genlmsg_iput(rsp, info);
1132 	if (!hdr) {
1133 		err = -EMSGSIZE;
1134 		goto err_genlmsg_free;
1135 	}
1136 
1137 	mutex_lock(&priv->lock);
1138 
1139 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1140 	if (!netdev) {
1141 		err = -ENODEV;
1142 		goto err_unlock_sock;
1143 	}
1144 
1145 	if (!netif_device_present(netdev)) {
1146 		err = -ENODEV;
1147 		goto err_unlock_netdev;
1148 	}
1149 
1150 	if (!netdev->netmem_tx) {
1151 		err = -EOPNOTSUPP;
1152 		NL_SET_ERR_MSG(info->extack,
1153 			       "Driver does not support netmem TX");
1154 		goto err_unlock_netdev;
1155 	}
1156 
1157 	dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX);
1158 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
1159 					 dmabuf_fd, priv, info->extack);
1160 	if (IS_ERR(binding)) {
1161 		err = PTR_ERR(binding);
1162 		goto err_unlock_netdev;
1163 	}
1164 
1165 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1166 	genlmsg_end(rsp, hdr);
1167 
1168 	netdev_unlock(netdev);
1169 	mutex_unlock(&priv->lock);
1170 
1171 	return genlmsg_reply(rsp, info);
1172 
1173 err_unlock_netdev:
1174 	netdev_unlock(netdev);
1175 err_unlock_sock:
1176 	mutex_unlock(&priv->lock);
1177 err_genlmsg_free:
1178 	nlmsg_free(rsp);
1179 	return err;
1180 }
1181 
1182 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
1183 {
1184 	const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
1185 	const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
1186 	int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
1187 	struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
1188 	struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
1189 	struct netdev_rx_queue *rxq, *rxq_lease;
1190 	struct net_device *dev, *dev_lease;
1191 	netdevice_tracker dev_tracker;
1192 	s32 netns_lease = -1;
1193 	struct nlattr *nest;
1194 	struct sk_buff *rsp;
1195 	struct net *net;
1196 	void *hdr;
1197 
1198 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
1199 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
1200 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
1201 		return -EINVAL;
1202 	if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
1203 	    NETDEV_QUEUE_TYPE_RX) {
1204 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
1205 		return -EINVAL;
1206 	}
1207 
1208 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
1209 
1210 	nest = info->attrs[NETDEV_A_QUEUE_LEASE];
1211 	err = nla_parse_nested(ltb, lmaxtype, nest,
1212 			       netdev_lease_nl_policy, info->extack);
1213 	if (err < 0)
1214 		return err;
1215 	if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
1216 	    NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
1217 		return -EINVAL;
1218 	if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
1219 		if (!capable(CAP_NET_ADMIN))
1220 			return -EPERM;
1221 		netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]);
1222 	}
1223 
1224 	ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
1225 
1226 	nest = ltb[NETDEV_A_LEASE_QUEUE];
1227 	err = nla_parse_nested(qtb, qmaxtype, nest,
1228 			       netdev_queue_id_nl_policy, info->extack);
1229 	if (err < 0)
1230 		return err;
1231 	if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
1232 	    NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
1233 		return -EINVAL;
1234 	if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
1235 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
1236 		return -EINVAL;
1237 	}
1238 
1239 	queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
1240 
1241 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1242 	if (!rsp)
1243 		return -ENOMEM;
1244 
1245 	hdr = genlmsg_iput(rsp, info);
1246 	if (!hdr) {
1247 		err = -EMSGSIZE;
1248 		goto err_genlmsg_free;
1249 	}
1250 
1251 	/* Locking order is always from the virtual to the physical device
1252 	 * since this is also the same order when applications open the
1253 	 * memory provider later on.
1254 	 */
1255 	dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1256 	if (!dev) {
1257 		err = -ENODEV;
1258 		goto err_genlmsg_free;
1259 	}
1260 	if (!netdev_can_create_queue(dev, info->extack)) {
1261 		err = -EINVAL;
1262 		goto err_unlock_dev;
1263 	}
1264 
1265 	net = genl_info_net(info);
1266 	if (netns_lease >= 0) {
1267 		net = get_net_ns_by_id(net, netns_lease);
1268 		if (!net) {
1269 			err = -ENONET;
1270 			goto err_unlock_dev;
1271 		}
1272 	}
1273 
1274 	dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker,
1275 					GFP_KERNEL);
1276 	if (!dev_lease) {
1277 		err = -ENODEV;
1278 		goto err_put_netns;
1279 	}
1280 	if (!netdev_can_lease_queue(dev_lease, info->extack)) {
1281 		netdev_put(dev_lease, &dev_tracker);
1282 		err = -EINVAL;
1283 		goto err_put_netns;
1284 	}
1285 
1286 	dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker);
1287 	if (!dev_lease) {
1288 		err = -ENODEV;
1289 		goto err_put_netns;
1290 	}
1291 	if (queue_id_lease >= dev_lease->real_num_rx_queues) {
1292 		err = -ERANGE;
1293 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
1294 		goto err_unlock_dev_lease;
1295 	}
1296 	if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX,
1297 			      info->extack)) {
1298 		err = -EBUSY;
1299 		goto err_unlock_dev_lease;
1300 	}
1301 
1302 	rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
1303 	rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
1304 
1305 	/* Leasing queues from different physical devices is currently
1306 	 * not supported. Capabilities such as XDP features and DMA
1307 	 * device may differ between physical devices, and computing
1308 	 * a correct intersection for the virtual device is not yet
1309 	 * implemented.
1310 	 */
1311 	if (rxq->lease && rxq->lease->dev != dev_lease) {
1312 		err = -EOPNOTSUPP;
1313 		NL_SET_ERR_MSG(info->extack,
1314 			       "Leasing queues from different devices not supported");
1315 		goto err_unlock_dev_lease;
1316 	}
1317 
1318 	queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack);
1319 	if (queue_id < 0) {
1320 		err = queue_id;
1321 		goto err_unlock_dev_lease;
1322 	}
1323 	rxq = __netif_get_rx_queue(dev, queue_id);
1324 
1325 	netdev_rx_queue_lease(rxq, rxq_lease);
1326 
1327 	nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
1328 	genlmsg_end(rsp, hdr);
1329 
1330 	netdev_unlock(dev_lease);
1331 	netdev_unlock(dev);
1332 	if (netns_lease >= 0)
1333 		put_net(net);
1334 
1335 	return genlmsg_reply(rsp, info);
1336 
1337 err_unlock_dev_lease:
1338 	netdev_unlock(dev_lease);
1339 err_put_netns:
1340 	if (netns_lease >= 0)
1341 		put_net(net);
1342 err_unlock_dev:
1343 	netdev_unlock(dev);
1344 err_genlmsg_free:
1345 	nlmsg_free(rsp);
1346 	return err;
1347 }
1348 
1349 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
1350 {
1351 	INIT_LIST_HEAD(&priv->bindings);
1352 	mutex_init(&priv->lock);
1353 }
1354 
1355 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
1356 {
1357 	struct net_devmem_dmabuf_binding *binding;
1358 	struct net_devmem_dmabuf_binding *temp;
1359 	netdevice_tracker dev_tracker;
1360 	struct net_device *dev;
1361 
1362 	mutex_lock(&priv->lock);
1363 	list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
1364 		mutex_lock(&binding->lock);
1365 		dev = binding->dev;
1366 		if (!dev) {
1367 			mutex_unlock(&binding->lock);
1368 			net_devmem_unbind_dmabuf(binding);
1369 			continue;
1370 		}
1371 		netdev_hold(dev, &dev_tracker, GFP_KERNEL);
1372 		mutex_unlock(&binding->lock);
1373 
1374 		netdev_lock(dev);
1375 		net_devmem_unbind_dmabuf(binding);
1376 		netdev_unlock(dev);
1377 		netdev_put(dev, &dev_tracker);
1378 	}
1379 	mutex_unlock(&priv->lock);
1380 }
1381 
1382 static int netdev_genl_netdevice_event(struct notifier_block *nb,
1383 				       unsigned long event, void *ptr)
1384 {
1385 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1386 
1387 	switch (event) {
1388 	case NETDEV_REGISTER:
1389 		netdev_lock_ops_to_full(netdev);
1390 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
1391 		netdev_unlock_full_to_ops(netdev);
1392 		break;
1393 	case NETDEV_UNREGISTER:
1394 		netdev_lock(netdev);
1395 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
1396 		netdev_unlock(netdev);
1397 		break;
1398 	case NETDEV_XDP_FEAT_CHANGE:
1399 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
1400 		break;
1401 	}
1402 
1403 	return NOTIFY_OK;
1404 }
1405 
1406 static struct notifier_block netdev_genl_nb = {
1407 	.notifier_call	= netdev_genl_netdevice_event,
1408 };
1409 
1410 static int __init netdev_genl_init(void)
1411 {
1412 	int err;
1413 
1414 	err = register_netdevice_notifier(&netdev_genl_nb);
1415 	if (err)
1416 		return err;
1417 
1418 	err = genl_register_family(&netdev_nl_family);
1419 	if (err)
1420 		goto err_unreg_ntf;
1421 
1422 	return 0;
1423 
1424 err_unreg_ntf:
1425 	unregister_netdevice_notifier(&netdev_genl_nb);
1426 	return err;
1427 }
1428 
1429 subsys_initcall(netdev_genl_init);
1430