1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef _NET_PAGE_POOL_TYPES_H
4 #define _NET_PAGE_POOL_TYPES_H
5
6 #include <linux/dma-direction.h>
7 #include <linux/ptr_ring.h>
8 #include <linux/types.h>
9 #include <linux/xarray.h>
10 #include <net/netmem.h>
11
12 #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
13 * map/unmap
14 */
15 #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
16 * from page_pool will be
17 * DMA-synced-for-device according to
18 * the length provided by the device
19 * driver.
20 * Please note DMA-sync-for-CPU is still
21 * device driver responsibility
22 */
23 #define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
24
25 /* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting
26 * this must be able to support unreadable netmem, where netmem_address() would
27 * return NULL. This flag should not be set for header page_pools.
28 *
29 * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set
30 * page_pool_params.slow.queue_idx.
31 */
32 #define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3)
33
34 #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
35 PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
36
37 /* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
38 #define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
39
40 /*
41 * Fast allocation side cache array/stack
42 *
43 * The cache size and refill watermark is related to the network
44 * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
45 * ring is usually refilled and the max consumed elements will be 64,
46 * thus a natural max size of objects needed in the cache.
47 * The refill watermark is set to 64 for 4KB pages,
48 * and scales to balance its size in bytes across page sizes.
49 *
50 * Keeping room for more objects, is due to XDP_DROP use-case. As
51 * XDP_DROP allows the opportunity to recycle objects directly into
52 * this array, as it shares the same softirq/NAPI protection. If
53 * cache is already full (or partly full) then the XDP_DROP recycles
54 * would have to take a slower code path.
55 */
56 #if PAGE_SIZE >= SZ_64K
57 #define PP_ALLOC_CACHE_REFILL 4
58 #elif PAGE_SIZE >= SZ_16K
59 #define PP_ALLOC_CACHE_REFILL 16
60 #else
61 #define PP_ALLOC_CACHE_REFILL 64
62 #endif
63
64 #define PP_ALLOC_CACHE_SIZE (PP_ALLOC_CACHE_REFILL * 2)
65 struct pp_alloc_cache {
66 u32 count;
67 netmem_ref cache[PP_ALLOC_CACHE_SIZE];
68 };
69
70 /**
71 * struct page_pool_params - page pool parameters
72 * @fast: params accessed frequently on hotpath
73 * @order: 2^order pages on allocation
74 * @pool_size: size of the ptr_ring
75 * @nid: NUMA node id to allocate from pages from
76 * @dev: device, for DMA pre-mapping purposes
77 * @napi: NAPI which is the sole consumer of pages, otherwise NULL
78 * @dma_dir: DMA mapping direction
79 * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
80 * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
81 * @slow: params with slowpath access only (initialization and Netlink)
82 * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
83 * @queue_idx: queue idx this page_pool is being created for.
84 * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL,
85 * PP_FLAG_ALLOW_UNREADABLE_NETMEM.
86 */
87 struct page_pool_params {
88 struct_group_tagged(page_pool_params_fast, fast,
89 unsigned int order;
90 unsigned int pool_size;
91 int nid;
92 struct device *dev;
93 struct napi_struct *napi;
94 enum dma_data_direction dma_dir;
95 unsigned int max_len;
96 unsigned int offset;
97 );
98 struct_group_tagged(page_pool_params_slow, slow,
99 struct net_device *netdev;
100 unsigned int queue_idx;
101 unsigned int flags;
102 /* private: used by test code only */
103 void (*init_callback)(netmem_ref netmem, void *arg);
104 void *init_arg;
105 );
106 };
107
108 #ifdef CONFIG_PAGE_POOL_STATS
109 /**
110 * struct page_pool_alloc_stats - allocation statistics
111 * @fast: successful fast path allocations
112 * @slow: slow path order-0 allocations
113 * @slow_high_order: slow path high order allocations
114 * @empty: ptr ring is empty, so a slow path allocation was forced
115 * @refill: an allocation which triggered a refill of the cache
116 * @waive: pages obtained from the ptr ring that cannot be added to
117 * the cache due to a NUMA mismatch
118 */
119 struct page_pool_alloc_stats {
120 u64 fast;
121 u64 slow;
122 u64 slow_high_order;
123 u64 empty;
124 u64 refill;
125 u64 waive;
126 };
127
128 /**
129 * struct page_pool_recycle_stats - recycling (freeing) statistics
130 * @cached: recycling placed page in the page pool cache
131 * @cache_full: page pool cache was full
132 * @ring: page placed into the ptr ring
133 * @ring_full: page released from page pool because the ptr ring was full
134 * @released_refcnt: page released (and not recycled) because refcnt > 1
135 */
136 struct page_pool_recycle_stats {
137 u64 cached;
138 u64 cache_full;
139 u64 ring;
140 u64 ring_full;
141 u64 released_refcnt;
142 };
143
144 /**
145 * struct page_pool_stats - combined page pool use statistics
146 * @alloc_stats: see struct page_pool_alloc_stats
147 * @recycle_stats: see struct page_pool_recycle_stats
148 *
149 * Wrapper struct for combining page pool stats with different storage
150 * requirements.
151 */
152 struct page_pool_stats {
153 struct page_pool_alloc_stats alloc_stats;
154 struct page_pool_recycle_stats recycle_stats;
155 };
156 #endif
157
158 /* The whole frag API block must stay within one cacheline. On 32-bit systems,
159 * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
160 * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
161 * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
162 * one for simplicity.
163 * Having it aligned to a cacheline boundary may be excessive and doesn't bring
164 * any good.
165 */
166 #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
167
168 struct memory_provider_ops;
169
170 struct pp_memory_provider_params {
171 void *mp_priv;
172 const struct memory_provider_ops *mp_ops;
173 u32 rx_page_size;
174 };
175
176 struct page_pool {
177 struct page_pool_params_fast p;
178
179 int cpuid;
180 u32 pages_state_hold_cnt;
181
182 bool has_init_callback:1; /* slow::init_callback is set */
183 bool dma_map:1; /* Perform DMA mapping */
184 bool dma_sync:1; /* Perform DMA sync for device */
185 bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
186 #ifdef CONFIG_PAGE_POOL_STATS
187 bool system:1; /* This is a global percpu pool */
188 #endif
189
190 __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
191 long frag_users;
192 netmem_ref frag_page;
193 unsigned int frag_offset;
194 __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
195
196 struct delayed_work release_dw;
197 void (*disconnect)(void *pool);
198 unsigned long defer_start;
199 unsigned long defer_warn;
200
201 #ifdef CONFIG_PAGE_POOL_STATS
202 /* these stats are incremented while in softirq context */
203 struct page_pool_alloc_stats alloc_stats;
204 #endif
205 u32 xdp_mem_id;
206
207 /*
208 * Data structure for allocation side
209 *
210 * Drivers allocation side usually already perform some kind
211 * of resource protection. Piggyback on this protection, and
212 * require driver to protect allocation side.
213 *
214 * For NIC drivers this means, allocate a page_pool per
215 * RX-queue. As the RX-queue is already protected by
216 * Softirq/BH scheduling and napi_schedule. NAPI schedule
217 * guarantee that a single napi_struct will only be scheduled
218 * on a single CPU (see napi_schedule).
219 */
220 struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
221
222 /* Data structure for storing recycled pages.
223 *
224 * Returning/freeing pages is more complicated synchronization
225 * wise, because free's can happen on remote CPUs, with no
226 * association with allocation resource.
227 *
228 * Use ptr_ring, as it separates consumer and producer
229 * efficiently, it a way that doesn't bounce cache-lines.
230 *
231 * TODO: Implement bulk return pages into this structure.
232 */
233 struct ptr_ring ring;
234
235 void *mp_priv;
236 const struct memory_provider_ops *mp_ops;
237
238 struct xarray dma_mapped;
239
240 #ifdef CONFIG_PAGE_POOL_STATS
241 /* recycle stats are per-cpu to avoid locking */
242 struct page_pool_recycle_stats __percpu *recycle_stats;
243 #endif
244 atomic_t pages_state_release_cnt;
245
246 /* A page_pool is strictly tied to a single RX-queue being
247 * protected by NAPI, due to above pp_alloc_cache. This
248 * refcnt serves purpose is to simplify drivers error handling.
249 */
250 refcount_t user_cnt;
251
252 u64 destroy_cnt;
253
254 /* Slow/Control-path information follows */
255 struct page_pool_params_slow slow;
256 /* User-facing fields, protected by page_pools_lock */
257 struct {
258 struct hlist_node list;
259 ktime_t detach_time;
260 u32 id;
261 } user;
262 };
263
264 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
265 netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp);
266 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
267 unsigned int size, gfp_t gfp);
268 netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
269 unsigned int *offset, unsigned int size,
270 gfp_t gfp);
271 struct page_pool *page_pool_create(const struct page_pool_params *params);
272 struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
273 int cpuid);
274
275 struct xdp_mem_info;
276
277 #ifdef CONFIG_PAGE_POOL
278 void page_pool_enable_direct_recycling(struct page_pool *pool,
279 struct napi_struct *napi);
280 void page_pool_disable_direct_recycling(struct page_pool *pool);
281 void page_pool_destroy(struct page_pool *pool);
282 void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
283 const struct xdp_mem_info *mem);
284 void page_pool_put_netmem_bulk(netmem_ref *data, u32 count);
285 #else
page_pool_destroy(struct page_pool * pool)286 static inline void page_pool_destroy(struct page_pool *pool)
287 {
288 }
289
page_pool_use_xdp_mem(struct page_pool * pool,void (* disconnect)(void *),const struct xdp_mem_info * mem)290 static inline void page_pool_use_xdp_mem(struct page_pool *pool,
291 void (*disconnect)(void *),
292 const struct xdp_mem_info *mem)
293 {
294 }
295
page_pool_put_netmem_bulk(netmem_ref * data,u32 count)296 static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
297 {
298 }
299 #endif
300
301 void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
302 unsigned int dma_sync_size,
303 bool allow_direct);
304 void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
305 unsigned int dma_sync_size,
306 bool allow_direct);
307
is_page_pool_compiled_in(void)308 static inline bool is_page_pool_compiled_in(void)
309 {
310 #ifdef CONFIG_PAGE_POOL
311 return true;
312 #else
313 return false;
314 #endif
315 }
316
317 /* Caller must provide appropriate safe context, e.g. NAPI. */
318 void page_pool_update_nid(struct page_pool *pool, int new_nid);
319
320 #endif /* _NET_PAGE_POOL_H */
321