1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/mutex.h>
4 #include <linux/netdevice.h>
5 #include <linux/xarray.h>
6 #include <net/busy_poll.h>
7 #include <net/net_debug.h>
8 #include <net/netdev_rx_queue.h>
9 #include <net/page_pool/helpers.h>
10 #include <net/page_pool/types.h>
11 #include <net/page_pool/memory_provider.h>
12 #include <net/sock.h>
13
14 #include "page_pool_priv.h"
15 #include "netdev-genl-gen.h"
16
17 static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
18 /* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev,
19 * pool->user.
20 * Ordering: inside rtnl_lock
21 */
22 DEFINE_MUTEX(page_pools_lock);
23
24 /* Page pools are only reachable from user space (via netlink) if they are
25 * linked to a netdev at creation time. Following page pool "visibility"
26 * states are possible:
27 * - normal
28 * - user.list: linked to real netdev, netdev: real netdev
29 * - orphaned - real netdev has disappeared
30 * - user.list: linked to lo, netdev: lo
31 * - invisible - either (a) created without netdev linking, (b) unlisted due
32 * to error, or (c) the entire namespace which owned this pool disappeared
33 * - user.list: unhashed, netdev: unknown
34 */
35
36 typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
37 const struct genl_info *info);
38
39 static int
netdev_nl_page_pool_get_do(struct genl_info * info,u32 id,pp_nl_fill_cb fill)40 netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
41 {
42 struct page_pool *pool;
43 struct sk_buff *rsp;
44 int err;
45
46 mutex_lock(&page_pools_lock);
47 pool = xa_load(&page_pools, id);
48 if (!pool || hlist_unhashed(&pool->user.list) ||
49 !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
50 err = -ENOENT;
51 goto err_unlock;
52 }
53
54 rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
55 if (!rsp) {
56 err = -ENOMEM;
57 goto err_unlock;
58 }
59
60 err = fill(rsp, pool, info);
61 if (err)
62 goto err_free_msg;
63
64 mutex_unlock(&page_pools_lock);
65
66 return genlmsg_reply(rsp, info);
67
68 err_free_msg:
69 nlmsg_free(rsp);
70 err_unlock:
71 mutex_unlock(&page_pools_lock);
72 return err;
73 }
74
75 struct page_pool_dump_cb {
76 unsigned long ifindex;
77 u32 pp_id;
78 };
79
80 static int
netdev_nl_page_pool_get_dump(struct sk_buff * skb,struct netlink_callback * cb,pp_nl_fill_cb fill)81 netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
82 pp_nl_fill_cb fill)
83 {
84 struct page_pool_dump_cb *state = (void *)cb->ctx;
85 const struct genl_info *info = genl_info_dump(cb);
86 struct net *net = sock_net(skb->sk);
87 struct net_device *netdev;
88 struct page_pool *pool;
89 int err = 0;
90
91 rtnl_lock();
92 mutex_lock(&page_pools_lock);
93 for_each_netdev_dump(net, netdev, state->ifindex) {
94 hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
95 if (state->pp_id && state->pp_id < pool->user.id)
96 continue;
97
98 state->pp_id = pool->user.id;
99 err = fill(skb, pool, info);
100 if (err)
101 goto out;
102 }
103
104 state->pp_id = 0;
105 }
106 out:
107 mutex_unlock(&page_pools_lock);
108 rtnl_unlock();
109
110 return err;
111 }
112
113 static int
page_pool_nl_stats_fill(struct sk_buff * rsp,const struct page_pool * pool,const struct genl_info * info)114 page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
115 const struct genl_info *info)
116 {
117 #ifdef CONFIG_PAGE_POOL_STATS
118 struct page_pool_stats stats = {};
119 struct nlattr *nest;
120 void *hdr;
121
122 if (!page_pool_get_stats(pool, &stats))
123 return 0;
124
125 hdr = genlmsg_iput(rsp, info);
126 if (!hdr)
127 return -EMSGSIZE;
128
129 nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);
130
131 if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
132 (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
133 nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
134 pool->slow.netdev->ifindex)))
135 goto err_cancel_nest;
136
137 nla_nest_end(rsp, nest);
138
139 if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
140 stats.alloc_stats.fast) ||
141 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
142 stats.alloc_stats.slow) ||
143 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
144 stats.alloc_stats.slow_high_order) ||
145 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
146 stats.alloc_stats.empty) ||
147 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
148 stats.alloc_stats.refill) ||
149 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
150 stats.alloc_stats.waive) ||
151 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
152 stats.recycle_stats.cached) ||
153 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
154 stats.recycle_stats.cache_full) ||
155 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
156 stats.recycle_stats.ring) ||
157 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
158 stats.recycle_stats.ring_full) ||
159 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
160 stats.recycle_stats.released_refcnt))
161 goto err_cancel_msg;
162
163 genlmsg_end(rsp, hdr);
164
165 return 0;
166 err_cancel_nest:
167 nla_nest_cancel(rsp, nest);
168 err_cancel_msg:
169 genlmsg_cancel(rsp, hdr);
170 return -EMSGSIZE;
171 #else
172 GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
173 return -EOPNOTSUPP;
174 #endif
175 }
176
netdev_nl_page_pool_stats_get_doit(struct sk_buff * skb,struct genl_info * info)177 int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
178 struct genl_info *info)
179 {
180 struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
181 struct nlattr *nest;
182 int err;
183 u32 id;
184
185 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
186 return -EINVAL;
187
188 nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
189 err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
190 netdev_page_pool_info_nl_policy,
191 info->extack);
192 if (err)
193 return err;
194
195 if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
196 return -EINVAL;
197 if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
198 NL_SET_ERR_MSG_ATTR(info->extack,
199 tb[NETDEV_A_PAGE_POOL_IFINDEX],
200 "selecting by ifindex not supported");
201 return -EINVAL;
202 }
203
204 id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);
205
206 return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
207 }
208
netdev_nl_page_pool_stats_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)209 int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
210 struct netlink_callback *cb)
211 {
212 return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
213 }
214
215 static int
page_pool_nl_fill(struct sk_buff * rsp,const struct page_pool * pool,const struct genl_info * info)216 page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
217 const struct genl_info *info)
218 {
219 size_t inflight, refsz;
220 unsigned int napi_id;
221 void *hdr;
222
223 hdr = genlmsg_iput(rsp, info);
224 if (!hdr)
225 return -EMSGSIZE;
226
227 if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
228 goto err_cancel;
229
230 if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
231 nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
232 pool->slow.netdev->ifindex))
233 goto err_cancel;
234
235 napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0;
236 if (napi_id_valid(napi_id) &&
237 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id))
238 goto err_cancel;
239
240 inflight = page_pool_inflight(pool, false);
241 refsz = PAGE_SIZE << pool->p.order;
242 if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
243 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
244 inflight * refsz))
245 goto err_cancel;
246 if (pool->user.detach_time &&
247 nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
248 pool->user.detach_time))
249 goto err_cancel;
250
251 if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
252 goto err_cancel;
253
254 genlmsg_end(rsp, hdr);
255
256 return 0;
257 err_cancel:
258 genlmsg_cancel(rsp, hdr);
259 return -EMSGSIZE;
260 }
261
netdev_nl_page_pool_event(const struct page_pool * pool,u32 cmd)262 static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
263 {
264 struct genl_info info;
265 struct sk_buff *ntf;
266 struct net *net;
267
268 lockdep_assert_held(&page_pools_lock);
269
270 /* 'invisible' page pools don't matter */
271 if (hlist_unhashed(&pool->user.list))
272 return;
273 net = dev_net(pool->slow.netdev);
274
275 if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
276 return;
277
278 genl_info_init_ntf(&info, &netdev_nl_family, cmd);
279
280 ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
281 if (!ntf)
282 return;
283
284 if (page_pool_nl_fill(ntf, pool, &info)) {
285 nlmsg_free(ntf);
286 return;
287 }
288
289 genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
290 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
291 }
292
netdev_nl_page_pool_get_doit(struct sk_buff * skb,struct genl_info * info)293 int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
294 {
295 u32 id;
296
297 if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
298 return -EINVAL;
299
300 id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);
301
302 return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
303 }
304
netdev_nl_page_pool_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)305 int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
306 struct netlink_callback *cb)
307 {
308 return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
309 }
310
page_pool_list(struct page_pool * pool)311 int page_pool_list(struct page_pool *pool)
312 {
313 static u32 id_alloc_next;
314 int err;
315
316 mutex_lock(&page_pools_lock);
317 err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
318 &id_alloc_next, GFP_KERNEL);
319 if (err < 0)
320 goto err_unlock;
321
322 INIT_HLIST_NODE(&pool->user.list);
323 if (pool->slow.netdev) {
324 hlist_add_head(&pool->user.list,
325 &pool->slow.netdev->page_pools);
326 netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
327 }
328
329 mutex_unlock(&page_pools_lock);
330 return 0;
331
332 err_unlock:
333 mutex_unlock(&page_pools_lock);
334 return err;
335 }
336
page_pool_detached(struct page_pool * pool)337 void page_pool_detached(struct page_pool *pool)
338 {
339 mutex_lock(&page_pools_lock);
340 pool->user.detach_time = ktime_get_boottime_seconds();
341 netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
342 mutex_unlock(&page_pools_lock);
343 }
344
page_pool_unlist(struct page_pool * pool)345 void page_pool_unlist(struct page_pool *pool)
346 {
347 mutex_lock(&page_pools_lock);
348 netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
349 xa_erase(&page_pools, pool->user.id);
350 if (!hlist_unhashed(&pool->user.list))
351 hlist_del(&pool->user.list);
352 mutex_unlock(&page_pools_lock);
353 }
354
page_pool_check_memory_provider(struct net_device * dev,struct netdev_rx_queue * rxq)355 int page_pool_check_memory_provider(struct net_device *dev,
356 struct netdev_rx_queue *rxq)
357 {
358 void *binding = rxq->mp_params.mp_priv;
359 struct page_pool *pool;
360 struct hlist_node *n;
361
362 if (!binding)
363 return 0;
364
365 mutex_lock(&page_pools_lock);
366 hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
367 if (pool->mp_priv != binding)
368 continue;
369
370 if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
371 mutex_unlock(&page_pools_lock);
372 return 0;
373 }
374 }
375 mutex_unlock(&page_pools_lock);
376 return -ENODATA;
377 }
378
page_pool_unreg_netdev_wipe(struct net_device * netdev)379 static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
380 {
381 struct page_pool *pool;
382 struct hlist_node *n;
383
384 mutex_lock(&page_pools_lock);
385 hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
386 hlist_del_init(&pool->user.list);
387 pool->slow.netdev = NET_PTR_POISON;
388 }
389 mutex_unlock(&page_pools_lock);
390 }
391
page_pool_unreg_netdev(struct net_device * netdev)392 static void page_pool_unreg_netdev(struct net_device *netdev)
393 {
394 struct page_pool *pool, *last;
395 struct net_device *lo;
396
397 lo = dev_net(netdev)->loopback_dev;
398
399 mutex_lock(&page_pools_lock);
400 last = NULL;
401 hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
402 pool->slow.netdev = lo;
403 netdev_nl_page_pool_event(pool,
404 NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
405 last = pool;
406 }
407 if (last)
408 hlist_splice_init(&netdev->page_pools, &last->user.list,
409 &lo->page_pools);
410 mutex_unlock(&page_pools_lock);
411 }
412
413 static int
page_pool_netdevice_event(struct notifier_block * nb,unsigned long event,void * ptr)414 page_pool_netdevice_event(struct notifier_block *nb,
415 unsigned long event, void *ptr)
416 {
417 struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
418
419 if (event != NETDEV_UNREGISTER)
420 return NOTIFY_DONE;
421
422 if (hlist_empty(&netdev->page_pools))
423 return NOTIFY_OK;
424
425 if (netdev->ifindex != LOOPBACK_IFINDEX)
426 page_pool_unreg_netdev(netdev);
427 else
428 page_pool_unreg_netdev_wipe(netdev);
429 return NOTIFY_OK;
430 }
431
432 static struct notifier_block page_pool_netdevice_nb = {
433 .notifier_call = page_pool_netdevice_event,
434 };
435
page_pool_user_init(void)436 static int __init page_pool_user_init(void)
437 {
438 return register_netdevice_notifier(&page_pool_netdevice_nb);
439 }
440
441 subsys_initcall(page_pool_user_init);
442