xref: /linux/Documentation/networking/page_pool.rst (revision 3ba84ac69b53e6ee07c31d54554e00793d7b144f)
1.. SPDX-License-Identifier: GPL-2.0
2
3=============
4Page Pool API
5=============
6
7.. kernel-doc:: include/net/page_pool/helpers.h
8   :doc: page_pool allocator
9
10Architecture overview
11=====================
12
13.. code-block:: none
14
15    +------------------+
16    |       Driver     |
17    +------------------+
18            ^
19            |
20            |
21            |
22            v
23    +--------------------------------------------+
24    |                request memory              |
25    +--------------------------------------------+
26        ^                                  ^
27        |                                  |
28        | Pool empty                       | Pool has entries
29        |                                  |
30        v                                  v
31    +-----------------------+     +------------------------+
32    | alloc (and map) pages |     |  get page from cache   |
33    +-----------------------+     +------------------------+
34                                    ^                    ^
35                                    |                    |
36                                    | cache available    | No entries, refill
37                                    |                    | from ptr-ring
38                                    |                    |
39                                    v                    v
40                          +-----------------+     +------------------+
41                          |   Fast cache    |     |  ptr-ring cache  |
42                          +-----------------+     +------------------+
43
44Monitoring
45==========
46Information about page pools on the system can be accessed via the netdev
47genetlink family (see Documentation/netlink/specs/netdev.yaml).
48
49API interface
50=============
51The number of pools created **must** match the number of hardware queues
52unless hardware restrictions make that impossible. This would otherwise beat the
53purpose of page pool, which is allocate pages fast from cache without locking.
54This lockless guarantee naturally comes from running under a NAPI softirq.
55The protection doesn't strictly have to be NAPI, any guarantee that allocating
56a page will cause no race conditions is enough.
57
58.. kernel-doc:: net/core/page_pool.c
59   :identifiers: page_pool_create
60
61.. kernel-doc:: include/net/page_pool/types.h
62   :identifiers: struct page_pool_params
63
64.. kernel-doc:: include/net/page_pool/helpers.h
65   :identifiers: page_pool_put_page page_pool_put_full_page
66		 page_pool_recycle_direct page_pool_free_va
67		 page_pool_dev_alloc_pages page_pool_dev_alloc_frag
68		 page_pool_dev_alloc page_pool_dev_alloc_va
69		 page_pool_get_dma_addr page_pool_get_dma_dir
70
71.. kernel-doc:: net/core/page_pool.c
72   :identifiers: page_pool_put_page_bulk page_pool_get_stats
73
74DMA sync
75--------
76Driver is always responsible for syncing the pages for the CPU.
77Drivers may choose to take care of syncing for the device as well
78or set the ``PP_FLAG_DMA_SYNC_DEV`` flag to request that pages
79allocated from the page pool are already synced for the device.
80
81If ``PP_FLAG_DMA_SYNC_DEV`` is set, the driver must inform the core what portion
82of the buffer has to be synced. This allows the core to avoid syncing the entire
83page when the drivers knows that the device only accessed a portion of the page.
84
85Most drivers will reserve headroom in front of the frame. This part
86of the buffer is not touched by the device, so to avoid syncing
87it drivers can set the ``offset`` field in struct page_pool_params
88appropriately.
89
90For pages recycled on the XDP xmit and skb paths the page pool will
91use the ``max_len`` member of struct page_pool_params to decide how
92much of the page needs to be synced (starting at ``offset``).
93When directly freeing pages in the driver (page_pool_put_page())
94the ``dma_sync_size`` argument specifies how much of the buffer needs
95to be synced.
96
97If in doubt set ``offset`` to 0, ``max_len`` to ``PAGE_SIZE`` and
98pass -1 as ``dma_sync_size``. That combination of arguments is always
99correct.
100
101Note that the syncing parameters are for the entire page.
102This is important to remember when using fragments (``PP_FLAG_PAGE_FRAG``),
103where allocated buffers may be smaller than a full page.
104Unless the driver author really understands page pool internals
105it's recommended to always use ``offset = 0``, ``max_len = PAGE_SIZE``
106with fragmented page pools.
107
108Stats API and structures
109------------------------
110If the kernel is configured with ``CONFIG_PAGE_POOL_STATS=y``, the API
111page_pool_get_stats() and structures described below are available.
112It takes a  pointer to a ``struct page_pool`` and a pointer to a struct
113page_pool_stats allocated by the caller.
114
115Older drivers expose page pool statistics via ethtool or debugfs.
116The same statistics are accessible via the netlink netdev family
117in a driver-independent fashion.
118
119.. kernel-doc:: include/net/page_pool/types.h
120   :identifiers: struct page_pool_recycle_stats
121		 struct page_pool_alloc_stats
122		 struct page_pool_stats
123
124Coding examples
125===============
126
127Registration
128------------
129
130.. code-block:: c
131
132    /* Page pool registration */
133    struct page_pool_params pp_params = { 0 };
134    struct xdp_rxq_info xdp_rxq;
135    int err;
136
137    pp_params.order = 0;
138    /* internal DMA mapping in page_pool */
139    pp_params.flags = PP_FLAG_DMA_MAP;
140    pp_params.pool_size = DESC_NUM;
141    pp_params.nid = NUMA_NO_NODE;
142    pp_params.dev = priv->dev;
143    pp_params.napi = napi; /* only if locking is tied to NAPI */
144    pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
145    page_pool = page_pool_create(&pp_params);
146
147    err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0);
148    if (err)
149        goto err_out;
150
151    err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool);
152    if (err)
153        goto err_out;
154
155NAPI poller
156-----------
157
158
159.. code-block:: c
160
161    /* NAPI Rx poller */
162    enum dma_data_direction dma_dir;
163
164    dma_dir = page_pool_get_dma_dir(dring->page_pool);
165    while (done < budget) {
166        if (some error)
167            page_pool_recycle_direct(page_pool, page);
168        if (packet_is_xdp) {
169            if XDP_DROP:
170                page_pool_recycle_direct(page_pool, page);
171        } else (packet_is_skb) {
172            skb_mark_for_recycle(skb);
173            new_page = page_pool_dev_alloc_pages(page_pool);
174        }
175    }
176
177Stats
178-----
179
180.. code-block:: c
181
182	#ifdef CONFIG_PAGE_POOL_STATS
183	/* retrieve stats */
184	struct page_pool_stats stats = { 0 };
185	if (page_pool_get_stats(page_pool, &stats)) {
186		/* perhaps the driver reports statistics with ethool */
187		ethtool_print_allocation_stats(&stats.alloc_stats);
188		ethtool_print_recycle_stats(&stats.recycle_stats);
189	}
190	#endif
191
192Driver unload
193-------------
194
195.. code-block:: c
196
197    /* Driver unload */
198    page_pool_put_full_page(page_pool, page, false);
199    xdp_rxq_info_unreg(&xdp_rxq);
200