1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef _NET_PAGE_POOL_TYPES_H
4 #define _NET_PAGE_POOL_TYPES_H
5
6 #include <linux/dma-direction.h>
7 #include <linux/ptr_ring.h>
8 #include <linux/types.h>
9 #include <net/netmem.h>
10
11 #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
12 * map/unmap
13 */
14 #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets
15 * from page_pool will be
16 * DMA-synced-for-device according to
17 * the length provided by the device
18 * driver.
19 * Please note DMA-sync-for-CPU is still
20 * device driver responsibility
21 */
22 #define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
23
24 /* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting
25 * this must be able to support unreadable netmem, where netmem_address() would
26 * return NULL. This flag should not be set for header page_pools.
27 *
28 * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set
29 * page_pool_params.slow.queue_idx.
30 */
31 #define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3)
32
33 #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
34 PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
35
36 /*
37 * Fast allocation side cache array/stack
38 *
39 * The cache size and refill watermark is related to the network
40 * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
41 * ring is usually refilled and the max consumed elements will be 64,
42 * thus a natural max size of objects needed in the cache.
43 *
44 * Keeping room for more objects, is due to XDP_DROP use-case. As
45 * XDP_DROP allows the opportunity to recycle objects directly into
46 * this array, as it shares the same softirq/NAPI protection. If
47 * cache is already full (or partly full) then the XDP_DROP recycles
48 * would have to take a slower code path.
49 */
50 #define PP_ALLOC_CACHE_SIZE 128
51 #define PP_ALLOC_CACHE_REFILL 64
52 struct pp_alloc_cache {
53 u32 count;
54 netmem_ref cache[PP_ALLOC_CACHE_SIZE];
55 };
56
57 /**
58 * struct page_pool_params - page pool parameters
59 * @fast: params accessed frequently on hotpath
60 * @order: 2^order pages on allocation
61 * @pool_size: size of the ptr_ring
62 * @nid: NUMA node id to allocate from pages from
63 * @dev: device, for DMA pre-mapping purposes
64 * @napi: NAPI which is the sole consumer of pages, otherwise NULL
65 * @dma_dir: DMA mapping direction
66 * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
67 * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
68 * @slow: params with slowpath access only (initialization and Netlink)
69 * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
70 * @queue_idx: queue idx this page_pool is being created for.
71 * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL,
72 * PP_FLAG_ALLOW_UNREADABLE_NETMEM.
73 */
74 struct page_pool_params {
75 struct_group_tagged(page_pool_params_fast, fast,
76 unsigned int order;
77 unsigned int pool_size;
78 int nid;
79 struct device *dev;
80 struct napi_struct *napi;
81 enum dma_data_direction dma_dir;
82 unsigned int max_len;
83 unsigned int offset;
84 );
85 struct_group_tagged(page_pool_params_slow, slow,
86 struct net_device *netdev;
87 unsigned int queue_idx;
88 unsigned int flags;
89 /* private: used by test code only */
90 void (*init_callback)(netmem_ref netmem, void *arg);
91 void *init_arg;
92 );
93 };
94
95 #ifdef CONFIG_PAGE_POOL_STATS
96 /**
97 * struct page_pool_alloc_stats - allocation statistics
98 * @fast: successful fast path allocations
99 * @slow: slow path order-0 allocations
100 * @slow_high_order: slow path high order allocations
101 * @empty: ptr ring is empty, so a slow path allocation was forced
102 * @refill: an allocation which triggered a refill of the cache
103 * @waive: pages obtained from the ptr ring that cannot be added to
104 * the cache due to a NUMA mismatch
105 */
106 struct page_pool_alloc_stats {
107 u64 fast;
108 u64 slow;
109 u64 slow_high_order;
110 u64 empty;
111 u64 refill;
112 u64 waive;
113 };
114
115 /**
116 * struct page_pool_recycle_stats - recycling (freeing) statistics
117 * @cached: recycling placed page in the page pool cache
118 * @cache_full: page pool cache was full
119 * @ring: page placed into the ptr ring
120 * @ring_full: page released from page pool because the ptr ring was full
121 * @released_refcnt: page released (and not recycled) because refcnt > 1
122 */
123 struct page_pool_recycle_stats {
124 u64 cached;
125 u64 cache_full;
126 u64 ring;
127 u64 ring_full;
128 u64 released_refcnt;
129 };
130
131 /**
132 * struct page_pool_stats - combined page pool use statistics
133 * @alloc_stats: see struct page_pool_alloc_stats
134 * @recycle_stats: see struct page_pool_recycle_stats
135 *
136 * Wrapper struct for combining page pool stats with different storage
137 * requirements.
138 */
139 struct page_pool_stats {
140 struct page_pool_alloc_stats alloc_stats;
141 struct page_pool_recycle_stats recycle_stats;
142 };
143 #endif
144
145 /* The whole frag API block must stay within one cacheline. On 32-bit systems,
146 * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
147 * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
148 * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
149 * one for simplicity.
150 * Having it aligned to a cacheline boundary may be excessive and doesn't bring
151 * any good.
152 */
153 #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
154
155 struct pp_memory_provider_params {
156 void *mp_priv;
157 };
158
159 struct page_pool {
160 struct page_pool_params_fast p;
161
162 int cpuid;
163 u32 pages_state_hold_cnt;
164
165 bool has_init_callback:1; /* slow::init_callback is set */
166 bool dma_map:1; /* Perform DMA mapping */
167 bool dma_sync:1; /* Perform DMA sync for device */
168 bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
169 #ifdef CONFIG_PAGE_POOL_STATS
170 bool system:1; /* This is a global percpu pool */
171 #endif
172
173 __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
174 long frag_users;
175 netmem_ref frag_page;
176 unsigned int frag_offset;
177 __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
178
179 struct delayed_work release_dw;
180 void (*disconnect)(void *pool);
181 unsigned long defer_start;
182 unsigned long defer_warn;
183
184 #ifdef CONFIG_PAGE_POOL_STATS
185 /* these stats are incremented while in softirq context */
186 struct page_pool_alloc_stats alloc_stats;
187 #endif
188 u32 xdp_mem_id;
189
190 /*
191 * Data structure for allocation side
192 *
193 * Drivers allocation side usually already perform some kind
194 * of resource protection. Piggyback on this protection, and
195 * require driver to protect allocation side.
196 *
197 * For NIC drivers this means, allocate a page_pool per
198 * RX-queue. As the RX-queue is already protected by
199 * Softirq/BH scheduling and napi_schedule. NAPI schedule
200 * guarantee that a single napi_struct will only be scheduled
201 * on a single CPU (see napi_schedule).
202 */
203 struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
204
205 /* Data structure for storing recycled pages.
206 *
207 * Returning/freeing pages is more complicated synchronization
208 * wise, because free's can happen on remote CPUs, with no
209 * association with allocation resource.
210 *
211 * Use ptr_ring, as it separates consumer and producer
212 * efficiently, it a way that doesn't bounce cache-lines.
213 *
214 * TODO: Implement bulk return pages into this structure.
215 */
216 struct ptr_ring ring;
217
218 void *mp_priv;
219
220 #ifdef CONFIG_PAGE_POOL_STATS
221 /* recycle stats are per-cpu to avoid locking */
222 struct page_pool_recycle_stats __percpu *recycle_stats;
223 #endif
224 atomic_t pages_state_release_cnt;
225
226 /* A page_pool is strictly tied to a single RX-queue being
227 * protected by NAPI, due to above pp_alloc_cache. This
228 * refcnt serves purpose is to simplify drivers error handling.
229 */
230 refcount_t user_cnt;
231
232 u64 destroy_cnt;
233
234 /* Slow/Control-path information follows */
235 struct page_pool_params_slow slow;
236 /* User-facing fields, protected by page_pools_lock */
237 struct {
238 struct hlist_node list;
239 u64 detach_time;
240 u32 id;
241 } user;
242 };
243
244 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
245 netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp);
246 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
247 unsigned int size, gfp_t gfp);
248 netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
249 unsigned int *offset, unsigned int size,
250 gfp_t gfp);
251 struct page_pool *page_pool_create(const struct page_pool_params *params);
252 struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
253 int cpuid);
254
255 struct xdp_mem_info;
256
257 #ifdef CONFIG_PAGE_POOL
258 void page_pool_disable_direct_recycling(struct page_pool *pool);
259 void page_pool_destroy(struct page_pool *pool);
260 void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
261 const struct xdp_mem_info *mem);
262 void page_pool_put_netmem_bulk(netmem_ref *data, u32 count);
263 #else
page_pool_destroy(struct page_pool * pool)264 static inline void page_pool_destroy(struct page_pool *pool)
265 {
266 }
267
page_pool_use_xdp_mem(struct page_pool * pool,void (* disconnect)(void *),const struct xdp_mem_info * mem)268 static inline void page_pool_use_xdp_mem(struct page_pool *pool,
269 void (*disconnect)(void *),
270 const struct xdp_mem_info *mem)
271 {
272 }
273
page_pool_put_netmem_bulk(netmem_ref * data,u32 count)274 static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
275 {
276 }
277 #endif
278
279 void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
280 unsigned int dma_sync_size,
281 bool allow_direct);
282 void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
283 unsigned int dma_sync_size,
284 bool allow_direct);
285
is_page_pool_compiled_in(void)286 static inline bool is_page_pool_compiled_in(void)
287 {
288 #ifdef CONFIG_PAGE_POOL
289 return true;
290 #else
291 return false;
292 #endif
293 }
294
295 /* Caller must provide appropriate safe context, e.g. NAPI. */
296 void page_pool_update_nid(struct page_pool *pool, int new_nid);
297
298 #endif /* _NET_PAGE_POOL_H */
299