1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef _NET_PAGE_POOL_TYPES_H 4 #define _NET_PAGE_POOL_TYPES_H 5 6 #include <linux/dma-direction.h> 7 #include <linux/ptr_ring.h> 8 #include <linux/types.h> 9 #include <linux/xarray.h> 10 #include <net/netmem.h> 11 12 #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA 13 * map/unmap 14 */ 15 #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets 16 * from page_pool will be 17 * DMA-synced-for-device according to 18 * the length provided by the device 19 * driver. 20 * Please note DMA-sync-for-CPU is still 21 * device driver responsibility 22 */ 23 #define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */ 24 25 /* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting 26 * this must be able to support unreadable netmem, where netmem_address() would 27 * return NULL. This flag should not be set for header page_pools. 28 * 29 * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set 30 * page_pool_params.slow.queue_idx. 31 */ 32 #define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3) 33 34 #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ 35 PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) 36 37 /* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ 38 #define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) 39 40 /* 41 * Fast allocation side cache array/stack 42 * 43 * The cache size and refill watermark is related to the network 44 * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX 45 * ring is usually refilled and the max consumed elements will be 64, 46 * thus a natural max size of objects needed in the cache. 47 * 48 * Keeping room for more objects, is due to XDP_DROP use-case. As 49 * XDP_DROP allows the opportunity to recycle objects directly into 50 * this array, as it shares the same softirq/NAPI protection. If 51 * cache is already full (or partly full) then the XDP_DROP recycles 52 * would have to take a slower code path. 53 */ 54 #define PP_ALLOC_CACHE_SIZE 128 55 #define PP_ALLOC_CACHE_REFILL 64 56 struct pp_alloc_cache { 57 u32 count; 58 netmem_ref cache[PP_ALLOC_CACHE_SIZE]; 59 }; 60 61 /** 62 * struct page_pool_params - page pool parameters 63 * @fast: params accessed frequently on hotpath 64 * @order: 2^order pages on allocation 65 * @pool_size: size of the ptr_ring 66 * @nid: NUMA node id to allocate from pages from 67 * @dev: device, for DMA pre-mapping purposes 68 * @napi: NAPI which is the sole consumer of pages, otherwise NULL 69 * @dma_dir: DMA mapping direction 70 * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV 71 * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV 72 * @slow: params with slowpath access only (initialization and Netlink) 73 * @netdev: netdev this pool will serve (leave as NULL if none or multiple) 74 * @queue_idx: queue idx this page_pool is being created for. 75 * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL, 76 * PP_FLAG_ALLOW_UNREADABLE_NETMEM. 77 */ 78 struct page_pool_params { 79 struct_group_tagged(page_pool_params_fast, fast, 80 unsigned int order; 81 unsigned int pool_size; 82 int nid; 83 struct device *dev; 84 struct napi_struct *napi; 85 enum dma_data_direction dma_dir; 86 unsigned int max_len; 87 unsigned int offset; 88 ); 89 struct_group_tagged(page_pool_params_slow, slow, 90 struct net_device *netdev; 91 unsigned int queue_idx; 92 unsigned int flags; 93 /* private: used by test code only */ 94 void (*init_callback)(netmem_ref netmem, void *arg); 95 void *init_arg; 96 ); 97 }; 98 99 #ifdef CONFIG_PAGE_POOL_STATS 100 /** 101 * struct page_pool_alloc_stats - allocation statistics 102 * @fast: successful fast path allocations 103 * @slow: slow path order-0 allocations 104 * @slow_high_order: slow path high order allocations 105 * @empty: ptr ring is empty, so a slow path allocation was forced 106 * @refill: an allocation which triggered a refill of the cache 107 * @waive: pages obtained from the ptr ring that cannot be added to 108 * the cache due to a NUMA mismatch 109 */ 110 struct page_pool_alloc_stats { 111 u64 fast; 112 u64 slow; 113 u64 slow_high_order; 114 u64 empty; 115 u64 refill; 116 u64 waive; 117 }; 118 119 /** 120 * struct page_pool_recycle_stats - recycling (freeing) statistics 121 * @cached: recycling placed page in the page pool cache 122 * @cache_full: page pool cache was full 123 * @ring: page placed into the ptr ring 124 * @ring_full: page released from page pool because the ptr ring was full 125 * @released_refcnt: page released (and not recycled) because refcnt > 1 126 */ 127 struct page_pool_recycle_stats { 128 u64 cached; 129 u64 cache_full; 130 u64 ring; 131 u64 ring_full; 132 u64 released_refcnt; 133 }; 134 135 /** 136 * struct page_pool_stats - combined page pool use statistics 137 * @alloc_stats: see struct page_pool_alloc_stats 138 * @recycle_stats: see struct page_pool_recycle_stats 139 * 140 * Wrapper struct for combining page pool stats with different storage 141 * requirements. 142 */ 143 struct page_pool_stats { 144 struct page_pool_alloc_stats alloc_stats; 145 struct page_pool_recycle_stats recycle_stats; 146 }; 147 #endif 148 149 /* The whole frag API block must stay within one cacheline. On 32-bit systems, 150 * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``. 151 * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``. 152 * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that 153 * one for simplicity. 154 * Having it aligned to a cacheline boundary may be excessive and doesn't bring 155 * any good. 156 */ 157 #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) 158 159 struct memory_provider_ops; 160 161 struct pp_memory_provider_params { 162 void *mp_priv; 163 const struct memory_provider_ops *mp_ops; 164 u32 rx_page_size; 165 }; 166 167 struct page_pool { 168 struct page_pool_params_fast p; 169 170 int cpuid; 171 u32 pages_state_hold_cnt; 172 173 bool has_init_callback:1; /* slow::init_callback is set */ 174 bool dma_map:1; /* Perform DMA mapping */ 175 bool dma_sync:1; /* Perform DMA sync for device */ 176 bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ 177 #ifdef CONFIG_PAGE_POOL_STATS 178 bool system:1; /* This is a global percpu pool */ 179 #endif 180 181 __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); 182 long frag_users; 183 netmem_ref frag_page; 184 unsigned int frag_offset; 185 __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); 186 187 struct delayed_work release_dw; 188 void (*disconnect)(void *pool); 189 unsigned long defer_start; 190 unsigned long defer_warn; 191 192 #ifdef CONFIG_PAGE_POOL_STATS 193 /* these stats are incremented while in softirq context */ 194 struct page_pool_alloc_stats alloc_stats; 195 #endif 196 u32 xdp_mem_id; 197 198 /* 199 * Data structure for allocation side 200 * 201 * Drivers allocation side usually already perform some kind 202 * of resource protection. Piggyback on this protection, and 203 * require driver to protect allocation side. 204 * 205 * For NIC drivers this means, allocate a page_pool per 206 * RX-queue. As the RX-queue is already protected by 207 * Softirq/BH scheduling and napi_schedule. NAPI schedule 208 * guarantee that a single napi_struct will only be scheduled 209 * on a single CPU (see napi_schedule). 210 */ 211 struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; 212 213 /* Data structure for storing recycled pages. 214 * 215 * Returning/freeing pages is more complicated synchronization 216 * wise, because free's can happen on remote CPUs, with no 217 * association with allocation resource. 218 * 219 * Use ptr_ring, as it separates consumer and producer 220 * efficiently, it a way that doesn't bounce cache-lines. 221 * 222 * TODO: Implement bulk return pages into this structure. 223 */ 224 struct ptr_ring ring; 225 226 void *mp_priv; 227 const struct memory_provider_ops *mp_ops; 228 229 struct xarray dma_mapped; 230 231 #ifdef CONFIG_PAGE_POOL_STATS 232 /* recycle stats are per-cpu to avoid locking */ 233 struct page_pool_recycle_stats __percpu *recycle_stats; 234 #endif 235 atomic_t pages_state_release_cnt; 236 237 /* A page_pool is strictly tied to a single RX-queue being 238 * protected by NAPI, due to above pp_alloc_cache. This 239 * refcnt serves purpose is to simplify drivers error handling. 240 */ 241 refcount_t user_cnt; 242 243 u64 destroy_cnt; 244 245 /* Slow/Control-path information follows */ 246 struct page_pool_params_slow slow; 247 /* User-facing fields, protected by page_pools_lock */ 248 struct { 249 struct hlist_node list; 250 u64 detach_time; 251 u32 id; 252 } user; 253 }; 254 255 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); 256 netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); 257 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, 258 unsigned int size, gfp_t gfp); 259 netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, 260 unsigned int *offset, unsigned int size, 261 gfp_t gfp); 262 struct page_pool *page_pool_create(const struct page_pool_params *params); 263 struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, 264 int cpuid); 265 266 struct xdp_mem_info; 267 268 #ifdef CONFIG_PAGE_POOL 269 void page_pool_enable_direct_recycling(struct page_pool *pool, 270 struct napi_struct *napi); 271 void page_pool_disable_direct_recycling(struct page_pool *pool); 272 void page_pool_destroy(struct page_pool *pool); 273 void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), 274 const struct xdp_mem_info *mem); 275 void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); 276 #else 277 static inline void page_pool_destroy(struct page_pool *pool) 278 { 279 } 280 281 static inline void page_pool_use_xdp_mem(struct page_pool *pool, 282 void (*disconnect)(void *), 283 const struct xdp_mem_info *mem) 284 { 285 } 286 287 static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) 288 { 289 } 290 #endif 291 292 void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, 293 unsigned int dma_sync_size, 294 bool allow_direct); 295 void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, 296 unsigned int dma_sync_size, 297 bool allow_direct); 298 299 static inline bool is_page_pool_compiled_in(void) 300 { 301 #ifdef CONFIG_PAGE_POOL 302 return true; 303 #else 304 return false; 305 #endif 306 } 307 308 /* Caller must provide appropriate safe context, e.g. NAPI. */ 309 void page_pool_update_nid(struct page_pool *pool, int new_nid); 310 311 #endif /* _NET_PAGE_POOL_H */ 312