xref: /linux/net/core/netdev-genl.c (revision 6be87fbb27763c2999e1c69bbec1f3a63cf05422)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/netdevice.h>
4 #include <linux/notifier.h>
5 #include <linux/rtnetlink.h>
6 #include <net/busy_poll.h>
7 #include <net/net_namespace.h>
8 #include <net/netdev_queues.h>
9 #include <net/netdev_rx_queue.h>
10 #include <net/sock.h>
11 #include <net/xdp.h>
12 #include <net/xdp_sock.h>
13 #include <net/page_pool/memory_provider.h>
14 
15 #include "dev.h"
16 #include "devmem.h"
17 #include "netdev-genl-gen.h"
18 
19 struct netdev_nl_dump_ctx {
20 	unsigned long	ifindex;
21 	unsigned int	rxq_idx;
22 	unsigned int	txq_idx;
23 	unsigned int	napi_id;
24 };
25 
26 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
27 {
28 	NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
29 
30 	return (struct netdev_nl_dump_ctx *)cb->ctx;
31 }
32 
33 static int
34 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
35 		   const struct genl_info *info)
36 {
37 	u64 xsk_features = 0;
38 	u64 xdp_rx_meta = 0;
39 	void *hdr;
40 
41 	netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
42 
43 	hdr = genlmsg_iput(rsp, info);
44 	if (!hdr)
45 		return -EMSGSIZE;
46 
47 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \
48 	if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
49 		xdp_rx_meta |= flag;
50 XDP_METADATA_KFUNC_xxx
51 #undef XDP_METADATA_KFUNC
52 
53 	if (netdev->xsk_tx_metadata_ops) {
54 		if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
55 			xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
56 		if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
57 			xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
58 		if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time)
59 			xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO;
60 	}
61 
62 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
63 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
64 			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
65 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
66 			      xdp_rx_meta, NETDEV_A_DEV_PAD) ||
67 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
68 			      xsk_features, NETDEV_A_DEV_PAD))
69 		goto err_cancel_msg;
70 
71 	if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
72 		if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
73 				netdev->xdp_zc_max_segs))
74 			goto err_cancel_msg;
75 	}
76 
77 	genlmsg_end(rsp, hdr);
78 
79 	return 0;
80 
81 err_cancel_msg:
82 	genlmsg_cancel(rsp, hdr);
83 	return -EMSGSIZE;
84 }
85 
86 static void
87 netdev_genl_dev_notify(struct net_device *netdev, int cmd)
88 {
89 	struct genl_info info;
90 	struct sk_buff *ntf;
91 
92 	if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
93 				NETDEV_NLGRP_MGMT))
94 		return;
95 
96 	genl_info_init_ntf(&info, &netdev_nl_family, cmd);
97 
98 	ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
99 	if (!ntf)
100 		return;
101 
102 	if (netdev_nl_dev_fill(netdev, ntf, &info)) {
103 		nlmsg_free(ntf);
104 		return;
105 	}
106 
107 	genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
108 				0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
109 }
110 
111 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
112 {
113 	struct net_device *netdev;
114 	struct sk_buff *rsp;
115 	u32 ifindex;
116 	int err;
117 
118 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
119 		return -EINVAL;
120 
121 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
122 
123 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
124 	if (!rsp)
125 		return -ENOMEM;
126 
127 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
128 	if (!netdev) {
129 		err = -ENODEV;
130 		goto err_free_msg;
131 	}
132 
133 	err = netdev_nl_dev_fill(netdev, rsp, info);
134 	netdev_unlock(netdev);
135 
136 	if (err)
137 		goto err_free_msg;
138 
139 	return genlmsg_reply(rsp, info);
140 
141 err_free_msg:
142 	nlmsg_free(rsp);
143 	return err;
144 }
145 
146 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
147 {
148 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
149 	struct net *net = sock_net(skb->sk);
150 	int err;
151 
152 	for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
153 		err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
154 		if (err < 0)
155 			return err;
156 	}
157 
158 	return 0;
159 }
160 
161 static int
162 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
163 			const struct genl_info *info)
164 {
165 	unsigned long irq_suspend_timeout;
166 	unsigned long gro_flush_timeout;
167 	u32 napi_defer_hard_irqs;
168 	void *hdr;
169 	pid_t pid;
170 
171 	if (!napi->dev->up)
172 		return 0;
173 
174 	hdr = genlmsg_iput(rsp, info);
175 	if (!hdr)
176 		return -EMSGSIZE;
177 
178 	if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
179 		goto nla_put_failure;
180 
181 	if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
182 		goto nla_put_failure;
183 
184 	if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
185 		goto nla_put_failure;
186 
187 	if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
188 			 napi_get_threaded(napi)))
189 		goto nla_put_failure;
190 
191 	if (napi->thread) {
192 		pid = task_pid_nr(napi->thread);
193 		if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
194 			goto nla_put_failure;
195 	}
196 
197 	napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
198 	if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
199 			napi_defer_hard_irqs))
200 		goto nla_put_failure;
201 
202 	irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
203 	if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
204 			 irq_suspend_timeout))
205 		goto nla_put_failure;
206 
207 	gro_flush_timeout = napi_get_gro_flush_timeout(napi);
208 	if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
209 			 gro_flush_timeout))
210 		goto nla_put_failure;
211 
212 	genlmsg_end(rsp, hdr);
213 
214 	return 0;
215 
216 nla_put_failure:
217 	genlmsg_cancel(rsp, hdr);
218 	return -EMSGSIZE;
219 }
220 
221 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
222 {
223 	struct napi_struct *napi;
224 	struct sk_buff *rsp;
225 	u32 napi_id;
226 	int err;
227 
228 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
229 		return -EINVAL;
230 
231 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
232 
233 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
234 	if (!rsp)
235 		return -ENOMEM;
236 
237 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
238 	if (napi) {
239 		err = netdev_nl_napi_fill_one(rsp, napi, info);
240 		netdev_unlock(napi->dev);
241 	} else {
242 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
243 		err = -ENOENT;
244 	}
245 
246 	if (err) {
247 		goto err_free_msg;
248 	} else if (!rsp->len) {
249 		err = -ENOENT;
250 		goto err_free_msg;
251 	}
252 
253 	return genlmsg_reply(rsp, info);
254 
255 err_free_msg:
256 	nlmsg_free(rsp);
257 	return err;
258 }
259 
260 static int
261 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
262 			const struct genl_info *info,
263 			struct netdev_nl_dump_ctx *ctx)
264 {
265 	struct napi_struct *napi;
266 	unsigned int prev_id;
267 	int err = 0;
268 
269 	if (!netdev->up)
270 		return err;
271 
272 	prev_id = UINT_MAX;
273 	list_for_each_entry(napi, &netdev->napi_list, dev_list) {
274 		if (!napi_id_valid(napi->napi_id))
275 			continue;
276 
277 		/* Dump continuation below depends on the list being sorted */
278 		WARN_ON_ONCE(napi->napi_id >= prev_id);
279 		prev_id = napi->napi_id;
280 
281 		if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
282 			continue;
283 
284 		err = netdev_nl_napi_fill_one(rsp, napi, info);
285 		if (err)
286 			return err;
287 		ctx->napi_id = napi->napi_id;
288 	}
289 	return err;
290 }
291 
292 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
293 {
294 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
295 	const struct genl_info *info = genl_info_dump(cb);
296 	struct net *net = sock_net(skb->sk);
297 	struct net_device *netdev;
298 	u32 ifindex = 0;
299 	int err = 0;
300 
301 	if (info->attrs[NETDEV_A_NAPI_IFINDEX])
302 		ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
303 
304 	if (ifindex) {
305 		netdev = netdev_get_by_index_lock(net, ifindex);
306 		if (netdev) {
307 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
308 			netdev_unlock(netdev);
309 		} else {
310 			err = -ENODEV;
311 		}
312 	} else {
313 		for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
314 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
315 			if (err < 0)
316 				break;
317 			ctx->napi_id = 0;
318 		}
319 	}
320 
321 	return err;
322 }
323 
324 static int
325 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
326 {
327 	u64 irq_suspend_timeout = 0;
328 	u64 gro_flush_timeout = 0;
329 	u8 threaded = 0;
330 	u32 defer = 0;
331 
332 	if (info->attrs[NETDEV_A_NAPI_THREADED]) {
333 		int ret;
334 
335 		threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
336 		ret = napi_set_threaded(napi, threaded);
337 		if (ret)
338 			return ret;
339 	}
340 
341 	if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
342 		defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
343 		napi_set_defer_hard_irqs(napi, defer);
344 	}
345 
346 	if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
347 		irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
348 		napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
349 	}
350 
351 	if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
352 		gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
353 		napi_set_gro_flush_timeout(napi, gro_flush_timeout);
354 	}
355 
356 	return 0;
357 }
358 
359 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
360 {
361 	struct napi_struct *napi;
362 	unsigned int napi_id;
363 	int err;
364 
365 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
366 		return -EINVAL;
367 
368 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
369 
370 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
371 	if (napi) {
372 		err = netdev_nl_napi_set_config(napi, info);
373 		netdev_unlock(napi->dev);
374 	} else {
375 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
376 		err = -ENOENT;
377 	}
378 
379 	return err;
380 }
381 
382 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
383 {
384 	if (napi && napi_id_valid(napi->napi_id))
385 		return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
386 	return 0;
387 }
388 
389 static int
390 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
391 			 u32 q_idx, u32 q_type, const struct genl_info *info)
392 {
393 	struct pp_memory_provider_params *params;
394 	struct net_device *orig_netdev = netdev;
395 	struct nlattr *nest_lease, *nest_queue;
396 	struct netdev_rx_queue *rxq;
397 	struct netdev_queue *txq;
398 	u32 lease_q_idx = q_idx;
399 	void *hdr;
400 
401 	hdr = genlmsg_iput(rsp, info);
402 	if (!hdr)
403 		return -EMSGSIZE;
404 
405 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
406 	    nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
407 	    nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
408 		goto nla_put_failure;
409 
410 	switch (q_type) {
411 	case NETDEV_QUEUE_TYPE_RX:
412 		rxq = __netif_get_rx_queue(netdev, q_idx);
413 		if (nla_put_napi_id(rsp, rxq->napi))
414 			goto nla_put_failure;
415 
416 		if (netif_rx_queue_lease_get_owner(&netdev, &lease_q_idx)) {
417 			struct net *net, *peer_net;
418 
419 			nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
420 			if (!nest_lease)
421 				goto nla_put_failure;
422 			nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
423 			if (!nest_queue)
424 				goto nla_put_failure;
425 			if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, lease_q_idx))
426 				goto nla_put_failure;
427 			if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
428 				goto nla_put_failure;
429 			nla_nest_end(rsp, nest_queue);
430 			if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
431 					READ_ONCE(netdev->ifindex)))
432 				goto nla_put_failure;
433 			rcu_read_lock();
434 			peer_net = dev_net_rcu(netdev);
435 			net = dev_net_rcu(orig_netdev);
436 			if (!net_eq(net, peer_net)) {
437 				s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
438 
439 				if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
440 					goto nla_put_failure_unlock;
441 			}
442 			rcu_read_unlock();
443 			nla_nest_end(rsp, nest_lease);
444 			netdev = orig_netdev;
445 		}
446 
447 		params = &rxq->mp_params;
448 		if (params->mp_ops &&
449 		    params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
450 			goto nla_put_failure;
451 #ifdef CONFIG_XDP_SOCKETS
452 		if (rxq->pool)
453 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
454 				goto nla_put_failure;
455 #endif
456 
457 		break;
458 	case NETDEV_QUEUE_TYPE_TX:
459 		txq = netdev_get_tx_queue(netdev, q_idx);
460 		if (nla_put_napi_id(rsp, txq->napi))
461 			goto nla_put_failure;
462 #ifdef CONFIG_XDP_SOCKETS
463 		if (txq->pool)
464 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
465 				goto nla_put_failure;
466 #endif
467 		break;
468 	}
469 
470 	genlmsg_end(rsp, hdr);
471 
472 	return 0;
473 
474 nla_put_failure_unlock:
475 	rcu_read_unlock();
476 nla_put_failure:
477 	genlmsg_cancel(rsp, hdr);
478 	return -EMSGSIZE;
479 }
480 
481 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
482 				    u32 q_type)
483 {
484 	switch (q_type) {
485 	case NETDEV_QUEUE_TYPE_RX:
486 		if (q_id >= netdev->real_num_rx_queues)
487 			return -EINVAL;
488 		return 0;
489 	case NETDEV_QUEUE_TYPE_TX:
490 		if (q_id >= netdev->real_num_tx_queues)
491 			return -EINVAL;
492 	}
493 	return 0;
494 }
495 
496 static int
497 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
498 		     u32 q_type, const struct genl_info *info)
499 {
500 	int err;
501 
502 	if (!netdev->up)
503 		return -ENOENT;
504 
505 	err = netdev_nl_queue_validate(netdev, q_idx, q_type);
506 	if (err)
507 		return err;
508 
509 	return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
510 }
511 
512 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
513 {
514 	u32 q_id, q_type, ifindex;
515 	struct net_device *netdev;
516 	struct sk_buff *rsp;
517 	int err;
518 
519 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
520 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
521 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
522 		return -EINVAL;
523 
524 	q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
525 	q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
526 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
527 
528 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
529 	if (!rsp)
530 		return -ENOMEM;
531 
532 	netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
533 						     ifindex);
534 	if (netdev) {
535 		err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
536 		netdev_unlock_ops_compat(netdev);
537 	} else {
538 		err = -ENODEV;
539 	}
540 
541 	if (err)
542 		goto err_free_msg;
543 
544 	return genlmsg_reply(rsp, info);
545 
546 err_free_msg:
547 	nlmsg_free(rsp);
548 	return err;
549 }
550 
551 static int
552 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
553 			 const struct genl_info *info,
554 			 struct netdev_nl_dump_ctx *ctx)
555 {
556 	int err = 0;
557 
558 	if (!netdev->up)
559 		return err;
560 
561 	for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
562 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
563 					       NETDEV_QUEUE_TYPE_RX, info);
564 		if (err)
565 			return err;
566 	}
567 	for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
568 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
569 					       NETDEV_QUEUE_TYPE_TX, info);
570 		if (err)
571 			return err;
572 	}
573 
574 	return err;
575 }
576 
577 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
578 {
579 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
580 	const struct genl_info *info = genl_info_dump(cb);
581 	struct net *net = sock_net(skb->sk);
582 	struct net_device *netdev;
583 	u32 ifindex = 0;
584 	int err = 0;
585 
586 	if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
587 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
588 
589 	if (ifindex) {
590 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
591 		if (netdev) {
592 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
593 			netdev_unlock_ops_compat(netdev);
594 		} else {
595 			err = -ENODEV;
596 		}
597 	} else {
598 		for_each_netdev_lock_ops_compat_scoped(net, netdev,
599 						       ctx->ifindex) {
600 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
601 			if (err < 0)
602 				break;
603 			ctx->rxq_idx = 0;
604 			ctx->txq_idx = 0;
605 		}
606 	}
607 
608 	return err;
609 }
610 
611 #define NETDEV_STAT_NOT_SET		(~0ULL)
612 
613 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
614 {
615 	const u64 *add = _add;
616 	u64 *sum = _sum;
617 
618 	while (size) {
619 		if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
620 			*sum += *add;
621 		sum++;
622 		add++;
623 		size -= 8;
624 	}
625 }
626 
627 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
628 {
629 	if (value == NETDEV_STAT_NOT_SET)
630 		return 0;
631 	return nla_put_uint(rsp, attr_id, value);
632 }
633 
634 static int
635 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
636 {
637 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
638 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
639 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
640 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
641 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
642 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
643 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
644 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
645 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
646 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
647 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
648 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
649 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
650 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
651 		return -EMSGSIZE;
652 	return 0;
653 }
654 
655 static int
656 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
657 {
658 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
659 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
660 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
661 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
662 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
663 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
664 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
665 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
666 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
667 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
668 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
669 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
670 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
671 		return -EMSGSIZE;
672 	return 0;
673 }
674 
675 static int
676 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
677 		      u32 q_type, int i, const struct genl_info *info)
678 {
679 	const struct netdev_stat_ops *ops = netdev->stat_ops;
680 	struct netdev_queue_stats_rx rx;
681 	struct netdev_queue_stats_tx tx;
682 	void *hdr;
683 
684 	hdr = genlmsg_iput(rsp, info);
685 	if (!hdr)
686 		return -EMSGSIZE;
687 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
688 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
689 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
690 		goto nla_put_failure;
691 
692 	switch (q_type) {
693 	case NETDEV_QUEUE_TYPE_RX:
694 		memset(&rx, 0xff, sizeof(rx));
695 		ops->get_queue_stats_rx(netdev, i, &rx);
696 		if (!memchr_inv(&rx, 0xff, sizeof(rx)))
697 			goto nla_cancel;
698 		if (netdev_nl_stats_write_rx(rsp, &rx))
699 			goto nla_put_failure;
700 		break;
701 	case NETDEV_QUEUE_TYPE_TX:
702 		memset(&tx, 0xff, sizeof(tx));
703 		ops->get_queue_stats_tx(netdev, i, &tx);
704 		if (!memchr_inv(&tx, 0xff, sizeof(tx)))
705 			goto nla_cancel;
706 		if (netdev_nl_stats_write_tx(rsp, &tx))
707 			goto nla_put_failure;
708 		break;
709 	}
710 
711 	genlmsg_end(rsp, hdr);
712 	return 0;
713 
714 nla_cancel:
715 	genlmsg_cancel(rsp, hdr);
716 	return 0;
717 nla_put_failure:
718 	genlmsg_cancel(rsp, hdr);
719 	return -EMSGSIZE;
720 }
721 
722 static int
723 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
724 			 const struct genl_info *info,
725 			 struct netdev_nl_dump_ctx *ctx)
726 {
727 	const struct netdev_stat_ops *ops = netdev->stat_ops;
728 	int i, err;
729 
730 	if (!(netdev->flags & IFF_UP))
731 		return 0;
732 
733 	i = ctx->rxq_idx;
734 	while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
735 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
736 					    i, info);
737 		if (err)
738 			return err;
739 		ctx->rxq_idx = ++i;
740 	}
741 	i = ctx->txq_idx;
742 	while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
743 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
744 					    i, info);
745 		if (err)
746 			return err;
747 		ctx->txq_idx = ++i;
748 	}
749 
750 	ctx->rxq_idx = 0;
751 	ctx->txq_idx = 0;
752 	return 0;
753 }
754 
755 /**
756  * netdev_stat_queue_sum() - add up queue stats from range of queues
757  * @netdev:	net_device
758  * @rx_start:	index of the first Rx queue to query
759  * @rx_end:	index after the last Rx queue (first *not* to query)
760  * @rx_sum:	output Rx stats, should be already initialized
761  * @tx_start:	index of the first Tx queue to query
762  * @tx_end:	index after the last Tx queue (first *not* to query)
763  * @tx_sum:	output Tx stats, should be already initialized
764  *
765  * Add stats from [start, end) range of queue IDs to *x_sum structs.
766  * The sum structs must be already initialized. Usually this
767  * helper is invoked from the .get_base_stats callbacks of drivers
768  * to account for stats of disabled queues. In that case the ranges
769  * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
770  */
771 void netdev_stat_queue_sum(struct net_device *netdev,
772 			   int rx_start, int rx_end,
773 			   struct netdev_queue_stats_rx *rx_sum,
774 			   int tx_start, int tx_end,
775 			   struct netdev_queue_stats_tx *tx_sum)
776 {
777 	const struct netdev_stat_ops *ops;
778 	struct netdev_queue_stats_rx rx;
779 	struct netdev_queue_stats_tx tx;
780 	int i;
781 
782 	ops = netdev->stat_ops;
783 
784 	for (i = rx_start; i < rx_end; i++) {
785 		memset(&rx, 0xff, sizeof(rx));
786 		if (ops->get_queue_stats_rx)
787 			ops->get_queue_stats_rx(netdev, i, &rx);
788 		netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
789 	}
790 	for (i = tx_start; i < tx_end; i++) {
791 		memset(&tx, 0xff, sizeof(tx));
792 		if (ops->get_queue_stats_tx)
793 			ops->get_queue_stats_tx(netdev, i, &tx);
794 		netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
795 	}
796 }
797 EXPORT_SYMBOL(netdev_stat_queue_sum);
798 
799 static int
800 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
801 			  const struct genl_info *info)
802 {
803 	struct netdev_queue_stats_rx rx_sum;
804 	struct netdev_queue_stats_tx tx_sum;
805 	void *hdr;
806 
807 	/* Netdev can't guarantee any complete counters */
808 	if (!netdev->stat_ops->get_base_stats)
809 		return 0;
810 
811 	memset(&rx_sum, 0xff, sizeof(rx_sum));
812 	memset(&tx_sum, 0xff, sizeof(tx_sum));
813 
814 	netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
815 
816 	/* The op was there, but nothing reported, don't bother */
817 	if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
818 	    !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
819 		return 0;
820 
821 	hdr = genlmsg_iput(rsp, info);
822 	if (!hdr)
823 		return -EMSGSIZE;
824 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
825 		goto nla_put_failure;
826 
827 	netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
828 			      0, netdev->real_num_tx_queues, &tx_sum);
829 
830 	if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
831 	    netdev_nl_stats_write_tx(rsp, &tx_sum))
832 		goto nla_put_failure;
833 
834 	genlmsg_end(rsp, hdr);
835 	return 0;
836 
837 nla_put_failure:
838 	genlmsg_cancel(rsp, hdr);
839 	return -EMSGSIZE;
840 }
841 
842 static int
843 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
844 			      struct sk_buff *skb, const struct genl_info *info,
845 			      struct netdev_nl_dump_ctx *ctx)
846 {
847 	if (!netdev->stat_ops)
848 		return 0;
849 
850 	switch (scope) {
851 	case 0:
852 		return netdev_nl_stats_by_netdev(netdev, skb, info);
853 	case NETDEV_QSTATS_SCOPE_QUEUE:
854 		return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
855 	}
856 
857 	return -EINVAL;	/* Should not happen, per netlink policy */
858 }
859 
860 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
861 				struct netlink_callback *cb)
862 {
863 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
864 	const struct genl_info *info = genl_info_dump(cb);
865 	struct net *net = sock_net(skb->sk);
866 	struct net_device *netdev;
867 	unsigned int ifindex;
868 	unsigned int scope;
869 	int err = 0;
870 
871 	scope = 0;
872 	if (info->attrs[NETDEV_A_QSTATS_SCOPE])
873 		scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
874 
875 	ifindex = 0;
876 	if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
877 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
878 
879 	if (ifindex) {
880 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
881 		if (!netdev) {
882 			NL_SET_BAD_ATTR(info->extack,
883 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
884 			return -ENODEV;
885 		}
886 		if (netdev->stat_ops) {
887 			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
888 							    info, ctx);
889 		} else {
890 			NL_SET_BAD_ATTR(info->extack,
891 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
892 			err = -EOPNOTSUPP;
893 		}
894 		netdev_unlock_ops_compat(netdev);
895 		return err;
896 	}
897 
898 	for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
899 		err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
900 						    info, ctx);
901 		if (err < 0)
902 			break;
903 	}
904 
905 	return err;
906 }
907 
908 static int netdev_nl_read_rxq_bitmap(struct genl_info *info,
909 				     u32 rxq_bitmap_len,
910 				     unsigned long *rxq_bitmap)
911 {
912 	const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
913 	struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
914 	struct nlattr *attr;
915 	int rem, err = 0;
916 	u32 rxq_idx;
917 
918 	nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
919 			       genlmsg_data(info->genlhdr),
920 			       genlmsg_len(info->genlhdr), rem) {
921 		err = nla_parse_nested(tb, maxtype, attr,
922 				       netdev_queue_id_nl_policy, info->extack);
923 		if (err < 0)
924 			return err;
925 
926 		if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
927 		    NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE))
928 			return -EINVAL;
929 
930 		if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
931 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
932 			return -EINVAL;
933 		}
934 
935 		rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
936 		if (rxq_idx >= rxq_bitmap_len) {
937 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]);
938 			return -EINVAL;
939 		}
940 
941 		bitmap_set(rxq_bitmap, rxq_idx, 1);
942 	}
943 
944 	return 0;
945 }
946 
947 static struct device *
948 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
949 		      struct netlink_ext_ack *extack)
950 {
951 	struct device *dma_dev = NULL;
952 	u32 rxq_idx, prev_rxq_idx;
953 
954 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
955 		struct device *rxq_dma_dev;
956 
957 		rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx);
958 		if (dma_dev && rxq_dma_dev != dma_dev) {
959 			NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
960 					   rxq_idx, prev_rxq_idx);
961 			return ERR_PTR(-EOPNOTSUPP);
962 		}
963 
964 		dma_dev = rxq_dma_dev;
965 		prev_rxq_idx = rxq_idx;
966 	}
967 
968 	return dma_dev;
969 }
970 
971 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
972 {
973 	struct net_devmem_dmabuf_binding *binding;
974 	u32 ifindex, dmabuf_fd, rxq_idx;
975 	struct netdev_nl_sock *priv;
976 	struct net_device *netdev;
977 	unsigned long *rxq_bitmap;
978 	struct device *dma_dev;
979 	struct sk_buff *rsp;
980 	int err = 0;
981 	void *hdr;
982 
983 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
984 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
985 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
986 		return -EINVAL;
987 
988 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
989 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
990 
991 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
992 	if (IS_ERR(priv))
993 		return PTR_ERR(priv);
994 
995 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
996 	if (!rsp)
997 		return -ENOMEM;
998 
999 	hdr = genlmsg_iput(rsp, info);
1000 	if (!hdr) {
1001 		err = -EMSGSIZE;
1002 		goto err_genlmsg_free;
1003 	}
1004 
1005 	mutex_lock(&priv->lock);
1006 
1007 	err = 0;
1008 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1009 	if (!netdev) {
1010 		err = -ENODEV;
1011 		goto err_unlock_sock;
1012 	}
1013 	if (!netif_device_present(netdev))
1014 		err = -ENODEV;
1015 	else if (!netdev_need_ops_lock(netdev))
1016 		err = -EOPNOTSUPP;
1017 	if (err) {
1018 		NL_SET_BAD_ATTR(info->extack,
1019 				info->attrs[NETDEV_A_DEV_IFINDEX]);
1020 		goto err_unlock;
1021 	}
1022 
1023 	rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL);
1024 	if (!rxq_bitmap) {
1025 		err = -ENOMEM;
1026 		goto err_unlock;
1027 	}
1028 
1029 	err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues,
1030 					rxq_bitmap);
1031 	if (err)
1032 		goto err_rxq_bitmap;
1033 
1034 	dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack);
1035 	if (IS_ERR(dma_dev)) {
1036 		err = PTR_ERR(dma_dev);
1037 		goto err_rxq_bitmap;
1038 	}
1039 
1040 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_FROM_DEVICE,
1041 					 dmabuf_fd, priv, info->extack);
1042 	if (IS_ERR(binding)) {
1043 		err = PTR_ERR(binding);
1044 		goto err_rxq_bitmap;
1045 	}
1046 
1047 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
1048 		err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
1049 						      info->extack);
1050 		if (err)
1051 			goto err_unbind;
1052 	}
1053 
1054 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1055 	genlmsg_end(rsp, hdr);
1056 
1057 	err = genlmsg_reply(rsp, info);
1058 	if (err)
1059 		goto err_unbind;
1060 
1061 	bitmap_free(rxq_bitmap);
1062 
1063 	netdev_unlock(netdev);
1064 
1065 	mutex_unlock(&priv->lock);
1066 
1067 	return 0;
1068 
1069 err_unbind:
1070 	net_devmem_unbind_dmabuf(binding);
1071 err_rxq_bitmap:
1072 	bitmap_free(rxq_bitmap);
1073 err_unlock:
1074 	netdev_unlock(netdev);
1075 err_unlock_sock:
1076 	mutex_unlock(&priv->lock);
1077 err_genlmsg_free:
1078 	nlmsg_free(rsp);
1079 	return err;
1080 }
1081 
1082 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
1083 {
1084 	struct net_devmem_dmabuf_binding *binding;
1085 	struct netdev_nl_sock *priv;
1086 	struct net_device *netdev;
1087 	struct device *dma_dev;
1088 	u32 ifindex, dmabuf_fd;
1089 	struct sk_buff *rsp;
1090 	int err = 0;
1091 	void *hdr;
1092 
1093 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1094 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
1095 		return -EINVAL;
1096 
1097 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1098 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1099 
1100 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1101 	if (IS_ERR(priv))
1102 		return PTR_ERR(priv);
1103 
1104 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1105 	if (!rsp)
1106 		return -ENOMEM;
1107 
1108 	hdr = genlmsg_iput(rsp, info);
1109 	if (!hdr) {
1110 		err = -EMSGSIZE;
1111 		goto err_genlmsg_free;
1112 	}
1113 
1114 	mutex_lock(&priv->lock);
1115 
1116 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1117 	if (!netdev) {
1118 		err = -ENODEV;
1119 		goto err_unlock_sock;
1120 	}
1121 
1122 	if (!netif_device_present(netdev)) {
1123 		err = -ENODEV;
1124 		goto err_unlock_netdev;
1125 	}
1126 
1127 	if (!netdev->netmem_tx) {
1128 		err = -EOPNOTSUPP;
1129 		NL_SET_ERR_MSG(info->extack,
1130 			       "Driver does not support netmem TX");
1131 		goto err_unlock_netdev;
1132 	}
1133 
1134 	dma_dev = netdev_queue_get_dma_dev(netdev, 0);
1135 	binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
1136 					 dmabuf_fd, priv, info->extack);
1137 	if (IS_ERR(binding)) {
1138 		err = PTR_ERR(binding);
1139 		goto err_unlock_netdev;
1140 	}
1141 
1142 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1143 	genlmsg_end(rsp, hdr);
1144 
1145 	netdev_unlock(netdev);
1146 	mutex_unlock(&priv->lock);
1147 
1148 	return genlmsg_reply(rsp, info);
1149 
1150 err_unlock_netdev:
1151 	netdev_unlock(netdev);
1152 err_unlock_sock:
1153 	mutex_unlock(&priv->lock);
1154 err_genlmsg_free:
1155 	nlmsg_free(rsp);
1156 	return err;
1157 }
1158 
1159 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
1160 {
1161 	const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
1162 	const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
1163 	int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
1164 	struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
1165 	struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
1166 	struct netdev_rx_queue *rxq, *rxq_lease;
1167 	struct net_device *dev, *dev_lease;
1168 	netdevice_tracker dev_tracker;
1169 	struct nlattr *nest;
1170 	struct sk_buff *rsp;
1171 	void *hdr;
1172 
1173 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
1174 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
1175 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
1176 		return -EINVAL;
1177 	if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
1178 	    NETDEV_QUEUE_TYPE_RX) {
1179 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
1180 		return -EINVAL;
1181 	}
1182 
1183 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
1184 
1185 	nest = info->attrs[NETDEV_A_QUEUE_LEASE];
1186 	err = nla_parse_nested(ltb, lmaxtype, nest,
1187 			       netdev_lease_nl_policy, info->extack);
1188 	if (err < 0)
1189 		return err;
1190 	if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
1191 	    NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
1192 		return -EINVAL;
1193 	if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
1194 		NL_SET_BAD_ATTR(info->extack, ltb[NETDEV_A_LEASE_NETNS_ID]);
1195 		return -EINVAL;
1196 	}
1197 
1198 	ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
1199 
1200 	nest = ltb[NETDEV_A_LEASE_QUEUE];
1201 	err = nla_parse_nested(qtb, qmaxtype, nest,
1202 			       netdev_queue_id_nl_policy, info->extack);
1203 	if (err < 0)
1204 		return err;
1205 	if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
1206 	    NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
1207 		return -EINVAL;
1208 	if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
1209 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
1210 		return -EINVAL;
1211 	}
1212 	if (ifindex == ifindex_lease) {
1213 		NL_SET_ERR_MSG(info->extack,
1214 			       "Lease ifindex cannot be the same as queue creation ifindex");
1215 		return -EINVAL;
1216 	}
1217 
1218 	queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
1219 
1220 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1221 	if (!rsp)
1222 		return -ENOMEM;
1223 
1224 	hdr = genlmsg_iput(rsp, info);
1225 	if (!hdr) {
1226 		err = -EMSGSIZE;
1227 		goto err_genlmsg_free;
1228 	}
1229 
1230 	/* Locking order is always from the virtual to the physical device
1231 	 * since this is also the same order when applications open the
1232 	 * memory provider later on.
1233 	 */
1234 	dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1235 	if (!dev) {
1236 		err = -ENODEV;
1237 		goto err_genlmsg_free;
1238 	}
1239 	if (!netdev_can_create_queue(dev, info->extack)) {
1240 		err = -EINVAL;
1241 		goto err_unlock_dev;
1242 	}
1243 
1244 	dev_lease = netdev_get_by_index(genl_info_net(info), ifindex_lease,
1245 					&dev_tracker, GFP_KERNEL);
1246 	if (!dev_lease) {
1247 		err = -ENODEV;
1248 		goto err_unlock_dev;
1249 	}
1250 	if (!netdev_can_lease_queue(dev_lease, info->extack)) {
1251 		netdev_put(dev_lease, &dev_tracker);
1252 		err = -EINVAL;
1253 		goto err_unlock_dev;
1254 	}
1255 
1256 	dev_lease = netdev_put_lock(dev_lease, &dev_tracker);
1257 	if (!dev_lease) {
1258 		err = -ENODEV;
1259 		goto err_unlock_dev;
1260 	}
1261 	if (queue_id_lease >= dev_lease->real_num_rx_queues) {
1262 		err = -ERANGE;
1263 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
1264 		goto err_unlock_dev_lease;
1265 	}
1266 	if (netdev_queue_busy(dev_lease, queue_id_lease, info->extack)) {
1267 		err = -EBUSY;
1268 		goto err_unlock_dev_lease;
1269 	}
1270 
1271 	rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
1272 	rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
1273 
1274 	if (rxq->lease && rxq->lease->dev != dev_lease) {
1275 		err = -EOPNOTSUPP;
1276 		NL_SET_ERR_MSG(info->extack,
1277 			       "Leasing multiple queues from different devices not supported");
1278 		goto err_unlock_dev_lease;
1279 	}
1280 
1281 	err = queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev);
1282 	if (err < 0) {
1283 		NL_SET_ERR_MSG(info->extack,
1284 			       "Device is unable to create a new queue");
1285 		goto err_unlock_dev_lease;
1286 	}
1287 
1288 	rxq = __netif_get_rx_queue(dev, queue_id);
1289 	netdev_rx_queue_lease(rxq, rxq_lease);
1290 
1291 	nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
1292 	genlmsg_end(rsp, hdr);
1293 
1294 	netdev_unlock(dev_lease);
1295 	netdev_unlock(dev);
1296 
1297 	return genlmsg_reply(rsp, info);
1298 
1299 err_unlock_dev_lease:
1300 	netdev_unlock(dev_lease);
1301 err_unlock_dev:
1302 	netdev_unlock(dev);
1303 err_genlmsg_free:
1304 	nlmsg_free(rsp);
1305 	return err;
1306 }
1307 
1308 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
1309 {
1310 	INIT_LIST_HEAD(&priv->bindings);
1311 	mutex_init(&priv->lock);
1312 }
1313 
1314 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
1315 {
1316 	struct net_devmem_dmabuf_binding *binding;
1317 	struct net_devmem_dmabuf_binding *temp;
1318 	netdevice_tracker dev_tracker;
1319 	struct net_device *dev;
1320 
1321 	mutex_lock(&priv->lock);
1322 	list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
1323 		mutex_lock(&binding->lock);
1324 		dev = binding->dev;
1325 		if (!dev) {
1326 			mutex_unlock(&binding->lock);
1327 			net_devmem_unbind_dmabuf(binding);
1328 			continue;
1329 		}
1330 		netdev_hold(dev, &dev_tracker, GFP_KERNEL);
1331 		mutex_unlock(&binding->lock);
1332 
1333 		netdev_lock(dev);
1334 		net_devmem_unbind_dmabuf(binding);
1335 		netdev_unlock(dev);
1336 		netdev_put(dev, &dev_tracker);
1337 	}
1338 	mutex_unlock(&priv->lock);
1339 }
1340 
1341 static int netdev_genl_netdevice_event(struct notifier_block *nb,
1342 				       unsigned long event, void *ptr)
1343 {
1344 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1345 
1346 	switch (event) {
1347 	case NETDEV_REGISTER:
1348 		netdev_lock_ops_to_full(netdev);
1349 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
1350 		netdev_unlock_full_to_ops(netdev);
1351 		break;
1352 	case NETDEV_UNREGISTER:
1353 		netdev_lock(netdev);
1354 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
1355 		netdev_unlock(netdev);
1356 		break;
1357 	case NETDEV_XDP_FEAT_CHANGE:
1358 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
1359 		break;
1360 	}
1361 
1362 	return NOTIFY_OK;
1363 }
1364 
1365 static struct notifier_block netdev_genl_nb = {
1366 	.notifier_call	= netdev_genl_netdevice_event,
1367 };
1368 
1369 static int __init netdev_genl_init(void)
1370 {
1371 	int err;
1372 
1373 	err = register_netdevice_notifier(&netdev_genl_nb);
1374 	if (err)
1375 		return err;
1376 
1377 	err = genl_register_family(&netdev_nl_family);
1378 	if (err)
1379 		goto err_unreg_ntf;
1380 
1381 	return 0;
1382 
1383 err_unreg_ntf:
1384 	unregister_netdevice_notifier(&netdev_genl_nb);
1385 	return err;
1386 }
1387 
1388 subsys_initcall(netdev_genl_init);
1389