xref: /linux/net/core/netdev-genl.c (revision 6b3f7af57881f6d6250c6dcc4d910fe8e855a607)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/netdevice.h>
4 #include <linux/notifier.h>
5 #include <linux/pid_namespace.h>
6 #include <linux/rtnetlink.h>
7 #include <net/busy_poll.h>
8 #include <net/net_namespace.h>
9 #include <net/netdev_queues.h>
10 #include <net/netdev_rx_queue.h>
11 #include <net/sock.h>
12 #include <net/xdp.h>
13 #include <net/xdp_sock.h>
14 #include <net/page_pool/memory_provider.h>
15 
16 #include "dev.h"
17 #include "devmem.h"
18 #include "netdev-genl-gen.h"
19 
20 struct netdev_nl_dump_ctx {
21 	unsigned long	ifindex;
22 	unsigned int	rxq_idx;
23 	unsigned int	txq_idx;
24 	unsigned int	napi_id;
25 };
26 
27 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
28 {
29 	NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx);
30 
31 	return (struct netdev_nl_dump_ctx *)cb->ctx;
32 }
33 
34 static int
35 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
36 		   const struct genl_info *info)
37 {
38 	u64 xsk_features = 0;
39 	u64 xdp_rx_meta = 0;
40 	void *hdr;
41 
42 	netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
43 
44 	hdr = genlmsg_iput(rsp, info);
45 	if (!hdr)
46 		return -EMSGSIZE;
47 
48 #define XDP_METADATA_KFUNC(_, flag, __, xmo) \
49 	if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
50 		xdp_rx_meta |= flag;
51 XDP_METADATA_KFUNC_xxx
52 #undef XDP_METADATA_KFUNC
53 
54 	if (netdev->xsk_tx_metadata_ops) {
55 		if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
56 			xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
57 		if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
58 			xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
59 		if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time)
60 			xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO;
61 	}
62 
63 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
64 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
65 			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
66 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
67 			      xdp_rx_meta, NETDEV_A_DEV_PAD) ||
68 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
69 			      xsk_features, NETDEV_A_DEV_PAD))
70 		goto err_cancel_msg;
71 
72 	if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
73 		if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
74 				netdev->xdp_zc_max_segs))
75 			goto err_cancel_msg;
76 	}
77 
78 	genlmsg_end(rsp, hdr);
79 
80 	return 0;
81 
82 err_cancel_msg:
83 	genlmsg_cancel(rsp, hdr);
84 	return -EMSGSIZE;
85 }
86 
87 static void
88 netdev_genl_dev_notify(struct net_device *netdev, int cmd)
89 {
90 	struct genl_info info;
91 	struct sk_buff *ntf;
92 
93 	if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
94 				NETDEV_NLGRP_MGMT))
95 		return;
96 
97 	genl_info_init_ntf(&info, &netdev_nl_family, cmd);
98 
99 	ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
100 	if (!ntf)
101 		return;
102 
103 	if (netdev_nl_dev_fill(netdev, ntf, &info)) {
104 		nlmsg_free(ntf);
105 		return;
106 	}
107 
108 	genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
109 				0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
110 }
111 
112 int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
113 {
114 	struct net_device *netdev;
115 	struct sk_buff *rsp;
116 	u32 ifindex;
117 	int err;
118 
119 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
120 		return -EINVAL;
121 
122 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
123 
124 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
125 	if (!rsp)
126 		return -ENOMEM;
127 
128 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
129 	if (!netdev) {
130 		err = -ENODEV;
131 		goto err_free_msg;
132 	}
133 
134 	err = netdev_nl_dev_fill(netdev, rsp, info);
135 	netdev_unlock(netdev);
136 
137 	if (err)
138 		goto err_free_msg;
139 
140 	return genlmsg_reply(rsp, info);
141 
142 err_free_msg:
143 	nlmsg_free(rsp);
144 	return err;
145 }
146 
147 int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
148 {
149 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
150 	struct net *net = sock_net(skb->sk);
151 	int err;
152 
153 	for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
154 		err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
155 		if (err < 0)
156 			return err;
157 	}
158 
159 	return 0;
160 }
161 
162 static int
163 netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
164 			const struct genl_info *info)
165 {
166 	unsigned long irq_suspend_timeout;
167 	unsigned long gro_flush_timeout;
168 	u32 napi_defer_hard_irqs;
169 	void *hdr;
170 	pid_t pid;
171 
172 	if (!napi->dev->up)
173 		return 0;
174 
175 	hdr = genlmsg_iput(rsp, info);
176 	if (!hdr)
177 		return -EMSGSIZE;
178 
179 	if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
180 		goto nla_put_failure;
181 
182 	if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
183 		goto nla_put_failure;
184 
185 	if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
186 		goto nla_put_failure;
187 
188 	if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
189 			 napi_get_threaded(napi)))
190 		goto nla_put_failure;
191 
192 	if (napi->thread) {
193 		pid = task_pid_nr_ns(napi->thread,
194 				     task_active_pid_ns(current));
195 		if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
196 			goto nla_put_failure;
197 	}
198 
199 	napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi);
200 	if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS,
201 			napi_defer_hard_irqs))
202 		goto nla_put_failure;
203 
204 	irq_suspend_timeout = napi_get_irq_suspend_timeout(napi);
205 	if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
206 			 irq_suspend_timeout))
207 		goto nla_put_failure;
208 
209 	gro_flush_timeout = napi_get_gro_flush_timeout(napi);
210 	if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
211 			 gro_flush_timeout))
212 		goto nla_put_failure;
213 
214 	genlmsg_end(rsp, hdr);
215 
216 	return 0;
217 
218 nla_put_failure:
219 	genlmsg_cancel(rsp, hdr);
220 	return -EMSGSIZE;
221 }
222 
223 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
224 {
225 	struct napi_struct *napi;
226 	struct sk_buff *rsp;
227 	u32 napi_id;
228 	int err;
229 
230 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
231 		return -EINVAL;
232 
233 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
234 
235 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
236 	if (!rsp)
237 		return -ENOMEM;
238 
239 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
240 	if (napi) {
241 		err = netdev_nl_napi_fill_one(rsp, napi, info);
242 		netdev_unlock(napi->dev);
243 	} else {
244 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
245 		err = -ENOENT;
246 	}
247 
248 	if (err) {
249 		goto err_free_msg;
250 	} else if (!rsp->len) {
251 		err = -ENOENT;
252 		goto err_free_msg;
253 	}
254 
255 	return genlmsg_reply(rsp, info);
256 
257 err_free_msg:
258 	nlmsg_free(rsp);
259 	return err;
260 }
261 
262 static int
263 netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
264 			const struct genl_info *info,
265 			struct netdev_nl_dump_ctx *ctx)
266 {
267 	struct napi_struct *napi;
268 	unsigned int prev_id;
269 	int err = 0;
270 
271 	if (!netdev->up)
272 		return err;
273 
274 	prev_id = UINT_MAX;
275 	list_for_each_entry(napi, &netdev->napi_list, dev_list) {
276 		if (!napi_id_valid(napi->napi_id))
277 			continue;
278 
279 		/* Dump continuation below depends on the list being sorted */
280 		WARN_ON_ONCE(napi->napi_id >= prev_id);
281 		prev_id = napi->napi_id;
282 
283 		if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
284 			continue;
285 
286 		err = netdev_nl_napi_fill_one(rsp, napi, info);
287 		if (err)
288 			return err;
289 		ctx->napi_id = napi->napi_id;
290 	}
291 	return err;
292 }
293 
294 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
295 {
296 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
297 	const struct genl_info *info = genl_info_dump(cb);
298 	struct net *net = sock_net(skb->sk);
299 	struct net_device *netdev;
300 	u32 ifindex = 0;
301 	int err = 0;
302 
303 	if (info->attrs[NETDEV_A_NAPI_IFINDEX])
304 		ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
305 
306 	if (ifindex) {
307 		netdev = netdev_get_by_index_lock(net, ifindex);
308 		if (netdev) {
309 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
310 			netdev_unlock(netdev);
311 		} else {
312 			err = -ENODEV;
313 		}
314 	} else {
315 		for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
316 			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
317 			if (err < 0)
318 				break;
319 			ctx->napi_id = 0;
320 		}
321 	}
322 
323 	return err;
324 }
325 
326 static int
327 netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
328 {
329 	u64 irq_suspend_timeout = 0;
330 	u64 gro_flush_timeout = 0;
331 	u8 threaded = 0;
332 	u32 defer = 0;
333 
334 	if (info->attrs[NETDEV_A_NAPI_THREADED]) {
335 		int ret;
336 
337 		threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
338 		ret = napi_set_threaded(napi, threaded);
339 		if (ret)
340 			return ret;
341 	}
342 
343 	if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
344 		defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
345 		napi_set_defer_hard_irqs(napi, defer);
346 	}
347 
348 	if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) {
349 		irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]);
350 		napi_set_irq_suspend_timeout(napi, irq_suspend_timeout);
351 	}
352 
353 	if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) {
354 		gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]);
355 		napi_set_gro_flush_timeout(napi, gro_flush_timeout);
356 	}
357 
358 	return 0;
359 }
360 
361 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
362 {
363 	struct napi_struct *napi;
364 	unsigned int napi_id;
365 	int err;
366 
367 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
368 		return -EINVAL;
369 
370 	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
371 
372 	napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
373 	if (napi) {
374 		err = netdev_nl_napi_set_config(napi, info);
375 		netdev_unlock(napi->dev);
376 	} else {
377 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
378 		err = -ENOENT;
379 	}
380 
381 	return err;
382 }
383 
384 static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
385 {
386 	if (napi && napi_id_valid(napi->napi_id))
387 		return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id);
388 	return 0;
389 }
390 
391 static int
392 netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev,
393 			   u32 q_idx, u32 q_type)
394 {
395 	struct net_device *orig_netdev = netdev;
396 	struct nlattr *nest_lease, *nest_queue;
397 	struct netdev_rx_queue *rxq;
398 	struct net *net, *peer_net;
399 
400 	rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT);
401 	if (!rxq || orig_netdev == netdev)
402 		return 0;
403 
404 	nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
405 	if (!nest_lease)
406 		goto nla_put_failure;
407 
408 	nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
409 	if (!nest_queue)
410 		goto nla_put_failure;
411 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx))
412 		goto nla_put_failure;
413 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
414 		goto nla_put_failure;
415 	nla_nest_end(rsp, nest_queue);
416 
417 	if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
418 			READ_ONCE(netdev->ifindex)))
419 		goto nla_put_failure;
420 
421 	rcu_read_lock();
422 	peer_net = dev_net_rcu(netdev);
423 	net = dev_net_rcu(orig_netdev);
424 	if (!net_eq(net, peer_net)) {
425 		s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
426 
427 		if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
428 			goto nla_put_failure_unlock;
429 	}
430 	rcu_read_unlock();
431 	nla_nest_end(rsp, nest_lease);
432 	return 0;
433 
434 nla_put_failure_unlock:
435 	rcu_read_unlock();
436 nla_put_failure:
437 	return -ENOMEM;
438 }
439 
440 static int
441 __netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq)
442 {
443 	struct pp_memory_provider_params *params = &rxq->mp_params;
444 
445 	if (params->mp_ops &&
446 	    params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
447 		return -EMSGSIZE;
448 
449 #ifdef CONFIG_XDP_SOCKETS
450 	if (rxq->pool)
451 		if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
452 			return -EMSGSIZE;
453 #endif
454 	return 0;
455 }
456 
457 static int
458 netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev,
459 			struct netdev_rx_queue *rxq)
460 {
461 	struct netdev_rx_queue *hw_rxq;
462 	int ret;
463 
464 	hw_rxq = rxq->lease;
465 	if (!hw_rxq || !netif_is_queue_leasee(netdev))
466 		return __netdev_nl_queue_fill_mp(rsp, rxq);
467 
468 	netdev_lock(hw_rxq->dev);
469 	ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq);
470 	netdev_unlock(hw_rxq->dev);
471 	return ret;
472 }
473 
474 static int
475 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
476 			 u32 q_idx, u32 q_type, const struct genl_info *info)
477 {
478 	struct netdev_rx_queue *rxq;
479 	struct netdev_queue *txq;
480 	void *hdr;
481 
482 	hdr = genlmsg_iput(rsp, info);
483 	if (!hdr)
484 		return -EMSGSIZE;
485 
486 	if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
487 	    nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
488 	    nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
489 		goto nla_put_failure;
490 
491 	switch (q_type) {
492 	case NETDEV_QUEUE_TYPE_RX:
493 		rxq = __netif_get_rx_queue(netdev, q_idx);
494 		if (nla_put_napi_id(rsp, rxq->napi))
495 			goto nla_put_failure;
496 		if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type))
497 			goto nla_put_failure;
498 		if (netdev_nl_queue_fill_mp(rsp, netdev, rxq))
499 			goto nla_put_failure;
500 		break;
501 	case NETDEV_QUEUE_TYPE_TX:
502 		txq = netdev_get_tx_queue(netdev, q_idx);
503 		if (nla_put_napi_id(rsp, txq->napi))
504 			goto nla_put_failure;
505 #ifdef CONFIG_XDP_SOCKETS
506 		if (txq->pool)
507 			if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
508 				goto nla_put_failure;
509 #endif
510 		break;
511 	}
512 
513 	genlmsg_end(rsp, hdr);
514 
515 	return 0;
516 
517 nla_put_failure:
518 	genlmsg_cancel(rsp, hdr);
519 	return -EMSGSIZE;
520 }
521 
522 static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
523 				    u32 q_type)
524 {
525 	switch (q_type) {
526 	case NETDEV_QUEUE_TYPE_RX:
527 		if (q_id >= netdev->real_num_rx_queues)
528 			return -EINVAL;
529 		return 0;
530 	case NETDEV_QUEUE_TYPE_TX:
531 		if (q_id >= netdev->real_num_tx_queues)
532 			return -EINVAL;
533 	}
534 	return 0;
535 }
536 
537 static int
538 netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
539 		     u32 q_type, const struct genl_info *info)
540 {
541 	int err;
542 
543 	if (!netdev->up)
544 		return -ENOENT;
545 
546 	err = netdev_nl_queue_validate(netdev, q_idx, q_type);
547 	if (err)
548 		return err;
549 
550 	return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
551 }
552 
553 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
554 {
555 	u32 q_id, q_type, ifindex;
556 	struct net_device *netdev;
557 	struct sk_buff *rsp;
558 	int err;
559 
560 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
561 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
562 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
563 		return -EINVAL;
564 
565 	q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
566 	q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
567 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
568 
569 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
570 	if (!rsp)
571 		return -ENOMEM;
572 
573 	netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
574 						     ifindex);
575 	if (netdev) {
576 		err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
577 		netdev_unlock_ops_compat(netdev);
578 	} else {
579 		err = -ENODEV;
580 	}
581 
582 	if (err)
583 		goto err_free_msg;
584 
585 	return genlmsg_reply(rsp, info);
586 
587 err_free_msg:
588 	nlmsg_free(rsp);
589 	return err;
590 }
591 
592 static int
593 netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
594 			 const struct genl_info *info,
595 			 struct netdev_nl_dump_ctx *ctx)
596 {
597 	int err = 0;
598 
599 	if (!netdev->up)
600 		return err;
601 
602 	for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
603 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx,
604 					       NETDEV_QUEUE_TYPE_RX, info);
605 		if (err)
606 			return err;
607 	}
608 	for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) {
609 		err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx,
610 					       NETDEV_QUEUE_TYPE_TX, info);
611 		if (err)
612 			return err;
613 	}
614 
615 	return err;
616 }
617 
618 int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
619 {
620 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
621 	const struct genl_info *info = genl_info_dump(cb);
622 	struct net *net = sock_net(skb->sk);
623 	struct net_device *netdev;
624 	u32 ifindex = 0;
625 	int err = 0;
626 
627 	if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
628 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
629 
630 	if (ifindex) {
631 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
632 		if (netdev) {
633 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
634 			netdev_unlock_ops_compat(netdev);
635 		} else {
636 			err = -ENODEV;
637 		}
638 	} else {
639 		for_each_netdev_lock_ops_compat_scoped(net, netdev,
640 						       ctx->ifindex) {
641 			err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
642 			if (err < 0)
643 				break;
644 			ctx->rxq_idx = 0;
645 			ctx->txq_idx = 0;
646 		}
647 	}
648 
649 	return err;
650 }
651 
652 #define NETDEV_STAT_NOT_SET		(~0ULL)
653 
654 static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size)
655 {
656 	const u64 *add = _add;
657 	u64 *sum = _sum;
658 
659 	while (size) {
660 		if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET)
661 			*sum += *add;
662 		sum++;
663 		add++;
664 		size -= 8;
665 	}
666 }
667 
668 static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value)
669 {
670 	if (value == NETDEV_STAT_NOT_SET)
671 		return 0;
672 	return nla_put_uint(rsp, attr_id, value);
673 }
674 
675 static int
676 netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
677 {
678 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
679 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
680 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
681 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
682 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
683 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
684 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
685 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
686 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
687 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
688 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
689 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
690 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
691 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
692 		return -EMSGSIZE;
693 	return 0;
694 }
695 
696 static int
697 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
698 {
699 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
700 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
701 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
702 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
703 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
704 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
705 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
706 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
707 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
708 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
709 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
710 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
711 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
712 		return -EMSGSIZE;
713 	return 0;
714 }
715 
716 static int
717 netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp,
718 		      u32 q_type, int i, const struct genl_info *info)
719 {
720 	const struct netdev_stat_ops *ops = netdev->stat_ops;
721 	struct netdev_queue_stats_rx rx;
722 	struct netdev_queue_stats_tx tx;
723 	void *hdr;
724 
725 	hdr = genlmsg_iput(rsp, info);
726 	if (!hdr)
727 		return -EMSGSIZE;
728 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) ||
729 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) ||
730 	    nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i))
731 		goto nla_put_failure;
732 
733 	switch (q_type) {
734 	case NETDEV_QUEUE_TYPE_RX:
735 		memset(&rx, 0xff, sizeof(rx));
736 		ops->get_queue_stats_rx(netdev, i, &rx);
737 		if (!memchr_inv(&rx, 0xff, sizeof(rx)))
738 			goto nla_cancel;
739 		if (netdev_nl_stats_write_rx(rsp, &rx))
740 			goto nla_put_failure;
741 		break;
742 	case NETDEV_QUEUE_TYPE_TX:
743 		memset(&tx, 0xff, sizeof(tx));
744 		ops->get_queue_stats_tx(netdev, i, &tx);
745 		if (!memchr_inv(&tx, 0xff, sizeof(tx)))
746 			goto nla_cancel;
747 		if (netdev_nl_stats_write_tx(rsp, &tx))
748 			goto nla_put_failure;
749 		break;
750 	}
751 
752 	genlmsg_end(rsp, hdr);
753 	return 0;
754 
755 nla_cancel:
756 	genlmsg_cancel(rsp, hdr);
757 	return 0;
758 nla_put_failure:
759 	genlmsg_cancel(rsp, hdr);
760 	return -EMSGSIZE;
761 }
762 
763 static int
764 netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
765 			 const struct genl_info *info,
766 			 struct netdev_nl_dump_ctx *ctx)
767 {
768 	const struct netdev_stat_ops *ops = netdev->stat_ops;
769 	int i, err;
770 
771 	if (!(netdev->flags & IFF_UP))
772 		return 0;
773 
774 	i = ctx->rxq_idx;
775 	while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) {
776 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX,
777 					    i, info);
778 		if (err)
779 			return err;
780 		ctx->rxq_idx = ++i;
781 	}
782 	i = ctx->txq_idx;
783 	while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) {
784 		err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX,
785 					    i, info);
786 		if (err)
787 			return err;
788 		ctx->txq_idx = ++i;
789 	}
790 
791 	ctx->rxq_idx = 0;
792 	ctx->txq_idx = 0;
793 	return 0;
794 }
795 
796 /**
797  * netdev_stat_queue_sum() - add up queue stats from range of queues
798  * @netdev:	net_device
799  * @rx_start:	index of the first Rx queue to query
800  * @rx_end:	index after the last Rx queue (first *not* to query)
801  * @rx_sum:	output Rx stats, should be already initialized
802  * @tx_start:	index of the first Tx queue to query
803  * @tx_end:	index after the last Tx queue (first *not* to query)
804  * @tx_sum:	output Tx stats, should be already initialized
805  *
806  * Add stats from [start, end) range of queue IDs to *x_sum structs.
807  * The sum structs must be already initialized. Usually this
808  * helper is invoked from the .get_base_stats callbacks of drivers
809  * to account for stats of disabled queues. In that case the ranges
810  * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
811  */
812 void netdev_stat_queue_sum(struct net_device *netdev,
813 			   int rx_start, int rx_end,
814 			   struct netdev_queue_stats_rx *rx_sum,
815 			   int tx_start, int tx_end,
816 			   struct netdev_queue_stats_tx *tx_sum)
817 {
818 	const struct netdev_stat_ops *ops;
819 	struct netdev_queue_stats_rx rx;
820 	struct netdev_queue_stats_tx tx;
821 	int i;
822 
823 	ops = netdev->stat_ops;
824 
825 	for (i = rx_start; i < rx_end; i++) {
826 		memset(&rx, 0xff, sizeof(rx));
827 		if (ops->get_queue_stats_rx)
828 			ops->get_queue_stats_rx(netdev, i, &rx);
829 		netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
830 	}
831 	for (i = tx_start; i < tx_end; i++) {
832 		memset(&tx, 0xff, sizeof(tx));
833 		if (ops->get_queue_stats_tx)
834 			ops->get_queue_stats_tx(netdev, i, &tx);
835 		netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
836 	}
837 }
838 EXPORT_SYMBOL(netdev_stat_queue_sum);
839 
840 static int
841 netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
842 			  const struct genl_info *info)
843 {
844 	struct netdev_queue_stats_rx rx_sum;
845 	struct netdev_queue_stats_tx tx_sum;
846 	void *hdr;
847 
848 	/* Netdev can't guarantee any complete counters */
849 	if (!netdev->stat_ops->get_base_stats)
850 		return 0;
851 
852 	memset(&rx_sum, 0xff, sizeof(rx_sum));
853 	memset(&tx_sum, 0xff, sizeof(tx_sum));
854 
855 	netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
856 
857 	/* The op was there, but nothing reported, don't bother */
858 	if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
859 	    !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum)))
860 		return 0;
861 
862 	hdr = genlmsg_iput(rsp, info);
863 	if (!hdr)
864 		return -EMSGSIZE;
865 	if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
866 		goto nla_put_failure;
867 
868 	netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
869 			      0, netdev->real_num_tx_queues, &tx_sum);
870 
871 	if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
872 	    netdev_nl_stats_write_tx(rsp, &tx_sum))
873 		goto nla_put_failure;
874 
875 	genlmsg_end(rsp, hdr);
876 	return 0;
877 
878 nla_put_failure:
879 	genlmsg_cancel(rsp, hdr);
880 	return -EMSGSIZE;
881 }
882 
883 static int
884 netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
885 			      struct sk_buff *skb, const struct genl_info *info,
886 			      struct netdev_nl_dump_ctx *ctx)
887 {
888 	if (!netdev->stat_ops)
889 		return 0;
890 
891 	switch (scope) {
892 	case 0:
893 		return netdev_nl_stats_by_netdev(netdev, skb, info);
894 	case NETDEV_QSTATS_SCOPE_QUEUE:
895 		return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
896 	}
897 
898 	return -EINVAL;	/* Should not happen, per netlink policy */
899 }
900 
901 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
902 				struct netlink_callback *cb)
903 {
904 	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
905 	const struct genl_info *info = genl_info_dump(cb);
906 	struct net *net = sock_net(skb->sk);
907 	struct net_device *netdev;
908 	unsigned int ifindex;
909 	unsigned int scope;
910 	int err = 0;
911 
912 	scope = 0;
913 	if (info->attrs[NETDEV_A_QSTATS_SCOPE])
914 		scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
915 
916 	ifindex = 0;
917 	if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
918 		ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
919 
920 	if (ifindex) {
921 		netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
922 		if (!netdev) {
923 			NL_SET_BAD_ATTR(info->extack,
924 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
925 			return -ENODEV;
926 		}
927 		if (netdev->stat_ops) {
928 			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
929 							    info, ctx);
930 		} else {
931 			NL_SET_BAD_ATTR(info->extack,
932 					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
933 			err = -EOPNOTSUPP;
934 		}
935 		netdev_unlock_ops_compat(netdev);
936 		return err;
937 	}
938 
939 	for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
940 		err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
941 						    info, ctx);
942 		if (err < 0)
943 			break;
944 	}
945 
946 	return err;
947 }
948 
949 static int netdev_nl_read_rxq_bitmap(struct genl_info *info,
950 				     u32 rxq_bitmap_len,
951 				     unsigned long *rxq_bitmap)
952 {
953 	const int maxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
954 	struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
955 	struct nlattr *attr;
956 	int rem, err = 0;
957 	u32 rxq_idx;
958 
959 	nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
960 			       genlmsg_data(info->genlhdr),
961 			       genlmsg_len(info->genlhdr), rem) {
962 		err = nla_parse_nested(tb, maxtype, attr,
963 				       netdev_queue_id_nl_policy, info->extack);
964 		if (err < 0)
965 			return err;
966 
967 		if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
968 		    NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE))
969 			return -EINVAL;
970 
971 		if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
972 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
973 			return -EINVAL;
974 		}
975 
976 		rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
977 		if (rxq_idx >= rxq_bitmap_len) {
978 			NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_ID]);
979 			return -EINVAL;
980 		}
981 
982 		bitmap_set(rxq_bitmap, rxq_idx, 1);
983 	}
984 
985 	return 0;
986 }
987 
988 static struct device *
989 netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
990 		      struct netlink_ext_ack *extack)
991 {
992 	struct device *dma_dev = NULL;
993 	u32 rxq_idx, prev_rxq_idx;
994 
995 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
996 		struct device *rxq_dma_dev;
997 
998 		rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx,
999 						       NETDEV_QUEUE_TYPE_RX);
1000 		if (dma_dev && rxq_dma_dev != dma_dev) {
1001 			NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
1002 					   rxq_idx, prev_rxq_idx);
1003 			return ERR_PTR(-EOPNOTSUPP);
1004 		}
1005 
1006 		dma_dev = rxq_dma_dev;
1007 		prev_rxq_idx = rxq_idx;
1008 	}
1009 
1010 	return dma_dev;
1011 }
1012 
1013 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
1014 {
1015 	struct net_devmem_dmabuf_binding *binding;
1016 	u32 ifindex, dmabuf_fd, rxq_idx;
1017 	struct netdev_nl_sock *priv;
1018 	struct net_device *netdev;
1019 	unsigned long *rxq_bitmap;
1020 	struct device *dma_dev;
1021 	struct sk_buff *rsp;
1022 	int err = 0;
1023 	void *hdr;
1024 
1025 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1026 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
1027 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
1028 		return -EINVAL;
1029 
1030 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1031 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1032 
1033 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1034 	if (IS_ERR(priv))
1035 		return PTR_ERR(priv);
1036 
1037 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1038 	if (!rsp)
1039 		return -ENOMEM;
1040 
1041 	hdr = genlmsg_iput(rsp, info);
1042 	if (!hdr) {
1043 		err = -EMSGSIZE;
1044 		goto err_genlmsg_free;
1045 	}
1046 
1047 	mutex_lock(&priv->lock);
1048 
1049 	err = 0;
1050 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1051 	if (!netdev) {
1052 		err = -ENODEV;
1053 		goto err_unlock_sock;
1054 	}
1055 	if (!netif_device_present(netdev))
1056 		err = -ENODEV;
1057 	else if (!netdev_need_ops_lock(netdev))
1058 		err = -EOPNOTSUPP;
1059 	if (err) {
1060 		NL_SET_BAD_ATTR(info->extack,
1061 				info->attrs[NETDEV_A_DEV_IFINDEX]);
1062 		goto err_unlock;
1063 	}
1064 
1065 	rxq_bitmap = bitmap_zalloc(netdev->real_num_rx_queues, GFP_KERNEL);
1066 	if (!rxq_bitmap) {
1067 		err = -ENOMEM;
1068 		goto err_unlock;
1069 	}
1070 
1071 	err = netdev_nl_read_rxq_bitmap(info, netdev->real_num_rx_queues,
1072 					rxq_bitmap);
1073 	if (err)
1074 		goto err_rxq_bitmap;
1075 
1076 	dma_dev = netdev_nl_get_dma_dev(netdev, rxq_bitmap, info->extack);
1077 	if (IS_ERR(dma_dev)) {
1078 		err = PTR_ERR(dma_dev);
1079 		goto err_rxq_bitmap;
1080 	}
1081 
1082 	binding = net_devmem_bind_dmabuf(netdev, NULL, dma_dev, DMA_FROM_DEVICE,
1083 					 dmabuf_fd, priv, info->extack);
1084 	if (IS_ERR(binding)) {
1085 		err = PTR_ERR(binding);
1086 		goto err_rxq_bitmap;
1087 	}
1088 
1089 	for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
1090 		err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
1091 						      info->extack);
1092 		if (err)
1093 			goto err_unbind;
1094 	}
1095 
1096 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1097 	genlmsg_end(rsp, hdr);
1098 
1099 	err = genlmsg_reply(rsp, info);
1100 
1101 	bitmap_free(rxq_bitmap);
1102 
1103 	netdev_unlock(netdev);
1104 
1105 	mutex_unlock(&priv->lock);
1106 
1107 	return err < 0 ? err : 0;
1108 
1109 err_unbind:
1110 	net_devmem_unbind_dmabuf(binding);
1111 err_rxq_bitmap:
1112 	bitmap_free(rxq_bitmap);
1113 err_unlock:
1114 	netdev_unlock(netdev);
1115 err_unlock_sock:
1116 	mutex_unlock(&priv->lock);
1117 err_genlmsg_free:
1118 	nlmsg_free(rsp);
1119 	return err;
1120 }
1121 
1122 /* Find the DMA-capable device for a netmem TX binding.
1123  *
1124  * For NETMEM_TX_DMA devices, return the device itself.
1125  * For NETMEM_TX_NO_DMA devices, walk leased RX queues to find the underlying
1126  * physical device and return it.
1127  */
1128 static struct net_device *
1129 netdev_find_netmem_tx_dev(struct net_device *dev)
1130 {
1131 	struct netdev_rx_queue *lease_rxq;
1132 	struct net_device *phys_dev;
1133 	int i;
1134 
1135 	if (dev->netmem_tx == NETMEM_TX_DMA)
1136 		return dev;
1137 
1138 	if (dev->netmem_tx != NETMEM_TX_NO_DMA)
1139 		return NULL;
1140 
1141 	for (i = 0; i < dev->real_num_rx_queues; i++) {
1142 		lease_rxq = READ_ONCE(__netif_get_rx_queue(dev, i)->lease);
1143 		if (!lease_rxq)
1144 			continue;
1145 
1146 		phys_dev = lease_rxq->dev;
1147 		if (netif_device_present(phys_dev) &&
1148 		    phys_dev->netmem_tx == NETMEM_TX_DMA)
1149 			return phys_dev;
1150 	}
1151 
1152 	return NULL;
1153 }
1154 
1155 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
1156 {
1157 	struct net_devmem_dmabuf_binding *binding;
1158 	struct net_device *bind_dev;
1159 	struct netdev_nl_sock *priv;
1160 	struct net_device *netdev;
1161 	struct device *dma_dev;
1162 	u32 ifindex, dmabuf_fd;
1163 	struct sk_buff *rsp;
1164 	int err = 0;
1165 	void *hdr;
1166 
1167 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
1168 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD))
1169 		return -EINVAL;
1170 
1171 	ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
1172 	dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
1173 
1174 	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
1175 	if (IS_ERR(priv))
1176 		return PTR_ERR(priv);
1177 
1178 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1179 	if (!rsp)
1180 		return -ENOMEM;
1181 
1182 	hdr = genlmsg_iput(rsp, info);
1183 	if (!hdr) {
1184 		err = -EMSGSIZE;
1185 		goto err_genlmsg_free;
1186 	}
1187 
1188 	mutex_lock(&priv->lock);
1189 
1190 	netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1191 	if (!netdev) {
1192 		err = -ENODEV;
1193 		goto err_unlock_sock;
1194 	}
1195 
1196 	if (!netif_device_present(netdev)) {
1197 		err = -ENODEV;
1198 		goto err_unlock_netdev;
1199 	}
1200 
1201 	if (netdev->netmem_tx == NETMEM_TX_NONE) {
1202 		err = -EOPNOTSUPP;
1203 		NL_SET_ERR_MSG(info->extack,
1204 			       "Driver does not support netmem TX");
1205 		goto err_unlock_netdev;
1206 	}
1207 
1208 	bind_dev = netdev_find_netmem_tx_dev(netdev);
1209 	if (!bind_dev) {
1210 		err = -EOPNOTSUPP;
1211 		NL_SET_ERR_MSG(info->extack,
1212 			       "No DMA-capable device found for netmem TX");
1213 		goto err_unlock_netdev;
1214 	}
1215 
1216 	if (bind_dev != netdev)
1217 		netdev_lock(bind_dev);
1218 
1219 	dma_dev = netdev_queue_get_dma_dev(bind_dev, 0, NETDEV_QUEUE_TYPE_TX);
1220 
1221 	binding = net_devmem_bind_dmabuf(bind_dev,
1222 					 bind_dev != netdev ? netdev : NULL,
1223 					 dma_dev, DMA_TO_DEVICE, dmabuf_fd,
1224 					 priv, info->extack);
1225 	if (IS_ERR(binding)) {
1226 		err = PTR_ERR(binding);
1227 		goto err_unlock_bind_dev;
1228 	}
1229 
1230 	nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
1231 	genlmsg_end(rsp, hdr);
1232 
1233 	if (bind_dev != netdev)
1234 		netdev_unlock(bind_dev);
1235 	netdev_unlock(netdev);
1236 	mutex_unlock(&priv->lock);
1237 
1238 	return genlmsg_reply(rsp, info);
1239 
1240 err_unlock_bind_dev:
1241 	if (bind_dev != netdev)
1242 		netdev_unlock(bind_dev);
1243 err_unlock_netdev:
1244 	netdev_unlock(netdev);
1245 err_unlock_sock:
1246 	mutex_unlock(&priv->lock);
1247 err_genlmsg_free:
1248 	nlmsg_free(rsp);
1249 	return err;
1250 }
1251 
1252 int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
1253 {
1254 	const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
1255 	const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
1256 	int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
1257 	struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
1258 	struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
1259 	struct netdev_rx_queue *rxq, *rxq_lease;
1260 	struct net_device *dev, *dev_lease;
1261 	netdevice_tracker dev_tracker;
1262 	s32 netns_lease = -1;
1263 	struct nlattr *nest;
1264 	struct sk_buff *rsp;
1265 	struct net *net;
1266 	void *hdr;
1267 
1268 	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
1269 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
1270 	    GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
1271 		return -EINVAL;
1272 	if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
1273 	    NETDEV_QUEUE_TYPE_RX) {
1274 		NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
1275 		return -EINVAL;
1276 	}
1277 
1278 	ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
1279 
1280 	nest = info->attrs[NETDEV_A_QUEUE_LEASE];
1281 	err = nla_parse_nested(ltb, lmaxtype, nest,
1282 			       netdev_lease_nl_policy, info->extack);
1283 	if (err < 0)
1284 		return err;
1285 	if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
1286 	    NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
1287 		return -EINVAL;
1288 	if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
1289 		if (!capable(CAP_NET_ADMIN))
1290 			return -EPERM;
1291 		netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]);
1292 	}
1293 
1294 	ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
1295 
1296 	nest = ltb[NETDEV_A_LEASE_QUEUE];
1297 	err = nla_parse_nested(qtb, qmaxtype, nest,
1298 			       netdev_queue_id_nl_policy, info->extack);
1299 	if (err < 0)
1300 		return err;
1301 	if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
1302 	    NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
1303 		return -EINVAL;
1304 	if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
1305 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
1306 		return -EINVAL;
1307 	}
1308 
1309 	queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
1310 
1311 	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
1312 	if (!rsp)
1313 		return -ENOMEM;
1314 
1315 	hdr = genlmsg_iput(rsp, info);
1316 	if (!hdr) {
1317 		err = -EMSGSIZE;
1318 		goto err_genlmsg_free;
1319 	}
1320 
1321 	/* Locking order is always from the virtual to the physical device
1322 	 * since this is also the same order when applications open the
1323 	 * memory provider later on.
1324 	 */
1325 	dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
1326 	if (!dev) {
1327 		err = -ENODEV;
1328 		goto err_genlmsg_free;
1329 	}
1330 	if (!netdev_can_create_queue(dev, info->extack)) {
1331 		err = -EINVAL;
1332 		goto err_unlock_dev;
1333 	}
1334 
1335 	net = genl_info_net(info);
1336 	if (netns_lease >= 0) {
1337 		net = get_net_ns_by_id(net, netns_lease);
1338 		if (!net) {
1339 			err = -ENONET;
1340 			goto err_unlock_dev;
1341 		}
1342 	}
1343 
1344 	dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker,
1345 					GFP_KERNEL);
1346 	if (!dev_lease) {
1347 		err = -ENODEV;
1348 		goto err_put_netns;
1349 	}
1350 	if (!netdev_can_lease_queue(dev_lease, info->extack)) {
1351 		netdev_put(dev_lease, &dev_tracker);
1352 		err = -EINVAL;
1353 		goto err_put_netns;
1354 	}
1355 
1356 	dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker);
1357 	if (!dev_lease) {
1358 		err = -ENODEV;
1359 		goto err_put_netns;
1360 	}
1361 	if (queue_id_lease >= dev_lease->real_num_rx_queues) {
1362 		err = -ERANGE;
1363 		NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
1364 		goto err_unlock_dev_lease;
1365 	}
1366 	if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX,
1367 			      info->extack)) {
1368 		err = -EBUSY;
1369 		goto err_unlock_dev_lease;
1370 	}
1371 
1372 	rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
1373 	rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
1374 
1375 	/* Leasing queues from different physical devices is currently
1376 	 * not supported. Capabilities such as XDP features and DMA
1377 	 * device may differ between physical devices, and computing
1378 	 * a correct intersection for the virtual device is not yet
1379 	 * implemented.
1380 	 */
1381 	if (rxq->lease && rxq->lease->dev != dev_lease) {
1382 		err = -EOPNOTSUPP;
1383 		NL_SET_ERR_MSG(info->extack,
1384 			       "Leasing queues from different devices not supported");
1385 		goto err_unlock_dev_lease;
1386 	}
1387 
1388 	queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack);
1389 	if (queue_id < 0) {
1390 		err = queue_id;
1391 		goto err_unlock_dev_lease;
1392 	}
1393 	rxq = __netif_get_rx_queue(dev, queue_id);
1394 
1395 	netdev_rx_queue_lease(rxq, rxq_lease);
1396 
1397 	nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
1398 	genlmsg_end(rsp, hdr);
1399 
1400 	netdev_unlock(dev_lease);
1401 	netdev_unlock(dev);
1402 	if (netns_lease >= 0)
1403 		put_net(net);
1404 
1405 	return genlmsg_reply(rsp, info);
1406 
1407 err_unlock_dev_lease:
1408 	netdev_unlock(dev_lease);
1409 err_put_netns:
1410 	if (netns_lease >= 0)
1411 		put_net(net);
1412 err_unlock_dev:
1413 	netdev_unlock(dev);
1414 err_genlmsg_free:
1415 	nlmsg_free(rsp);
1416 	return err;
1417 }
1418 
1419 void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
1420 {
1421 	INIT_LIST_HEAD(&priv->bindings);
1422 	mutex_init(&priv->lock);
1423 }
1424 
1425 void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv)
1426 {
1427 	struct net_devmem_dmabuf_binding *binding;
1428 	struct net_devmem_dmabuf_binding *temp;
1429 	netdevice_tracker dev_tracker;
1430 	struct net_device *dev;
1431 
1432 	mutex_lock(&priv->lock);
1433 	list_for_each_entry_safe(binding, temp, &priv->bindings, list) {
1434 		mutex_lock(&binding->lock);
1435 		dev = binding->dev;
1436 		if (!dev) {
1437 			mutex_unlock(&binding->lock);
1438 			net_devmem_unbind_dmabuf(binding);
1439 			continue;
1440 		}
1441 		netdev_hold(dev, &dev_tracker, GFP_KERNEL);
1442 		mutex_unlock(&binding->lock);
1443 
1444 		netdev_lock(dev);
1445 		net_devmem_unbind_dmabuf(binding);
1446 		netdev_unlock(dev);
1447 		netdev_put(dev, &dev_tracker);
1448 	}
1449 	mutex_unlock(&priv->lock);
1450 }
1451 
1452 static int netdev_genl_netdevice_event(struct notifier_block *nb,
1453 				       unsigned long event, void *ptr)
1454 {
1455 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1456 
1457 	switch (event) {
1458 	case NETDEV_REGISTER:
1459 		netdev_lock_ops_to_full(netdev);
1460 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
1461 		netdev_unlock_full_to_ops(netdev);
1462 		break;
1463 	case NETDEV_UNREGISTER:
1464 		netdev_lock(netdev);
1465 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
1466 		netdev_unlock(netdev);
1467 		break;
1468 	case NETDEV_XDP_FEAT_CHANGE:
1469 		netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
1470 		break;
1471 	}
1472 
1473 	return NOTIFY_OK;
1474 }
1475 
1476 static struct notifier_block netdev_genl_nb = {
1477 	.notifier_call	= netdev_genl_netdevice_event,
1478 };
1479 
1480 static int __init netdev_genl_init(void)
1481 {
1482 	int err;
1483 
1484 	err = register_netdevice_notifier(&netdev_genl_nb);
1485 	if (err)
1486 		return err;
1487 
1488 	err = genl_register_family(&netdev_nl_family);
1489 	if (err)
1490 		goto err_unreg_ntf;
1491 
1492 	return 0;
1493 
1494 err_unreg_ntf:
1495 	unregister_netdevice_notifier(&netdev_genl_nb);
1496 	return err;
1497 }
1498 
1499 subsys_initcall(netdev_genl_init);
1500