1 /* SPDX-License-Identifier: GPL-2.0 2 * 3 * page_pool.c 4 * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> 5 * Copyright (C) 2016 Red Hat, Inc. 6 */ 7 #include <linux/types.h> 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 11 #include <net/page_pool.h> 12 #include <linux/dma-direction.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/page-flags.h> 15 #include <linux/mm.h> /* for __put_page() */ 16 17 static int page_pool_init(struct page_pool *pool, 18 const struct page_pool_params *params) 19 { 20 unsigned int ring_qsize = 1024; /* Default */ 21 22 memcpy(&pool->p, params, sizeof(pool->p)); 23 24 /* Validate only known flags were used */ 25 if (pool->p.flags & ~(PP_FLAG_ALL)) 26 return -EINVAL; 27 28 if (pool->p.pool_size) 29 ring_qsize = pool->p.pool_size; 30 31 /* Sanity limit mem that can be pinned down */ 32 if (ring_qsize > 32768) 33 return -E2BIG; 34 35 /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. 36 * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, 37 * which is the XDP_TX use-case. 38 */ 39 if ((pool->p.dma_dir != DMA_FROM_DEVICE) && 40 (pool->p.dma_dir != DMA_BIDIRECTIONAL)) 41 return -EINVAL; 42 43 if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) 44 return -ENOMEM; 45 46 return 0; 47 } 48 49 struct page_pool *page_pool_create(const struct page_pool_params *params) 50 { 51 struct page_pool *pool; 52 int err = 0; 53 54 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); 55 if (!pool) 56 return ERR_PTR(-ENOMEM); 57 58 err = page_pool_init(pool, params); 59 if (err < 0) { 60 pr_warn("%s() gave up with errno %d\n", __func__, err); 61 kfree(pool); 62 return ERR_PTR(err); 63 } 64 return pool; 65 } 66 EXPORT_SYMBOL(page_pool_create); 67 68 /* fast path */ 69 static struct page *__page_pool_get_cached(struct page_pool *pool) 70 { 71 struct ptr_ring *r = &pool->ring; 72 struct page *page; 73 74 /* Quicker fallback, avoid locks when ring is empty */ 75 if (__ptr_ring_empty(r)) 76 return NULL; 77 78 /* Test for safe-context, caller should provide this guarantee */ 79 if (likely(in_serving_softirq())) { 80 if (likely(pool->alloc.count)) { 81 /* Fast-path */ 82 page = pool->alloc.cache[--pool->alloc.count]; 83 return page; 84 } 85 /* Slower-path: Alloc array empty, time to refill 86 * 87 * Open-coded bulk ptr_ring consumer. 88 * 89 * Discussion: the ring consumer lock is not really 90 * needed due to the softirq/NAPI protection, but 91 * later need the ability to reclaim pages on the 92 * ring. Thus, keeping the locks. 93 */ 94 spin_lock(&r->consumer_lock); 95 while ((page = __ptr_ring_consume(r))) { 96 if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) 97 break; 98 pool->alloc.cache[pool->alloc.count++] = page; 99 } 100 spin_unlock(&r->consumer_lock); 101 return page; 102 } 103 104 /* Slow-path: Get page from locked ring queue */ 105 page = ptr_ring_consume(&pool->ring); 106 return page; 107 } 108 109 /* slow path */ 110 noinline 111 static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, 112 gfp_t _gfp) 113 { 114 struct page *page; 115 gfp_t gfp = _gfp; 116 dma_addr_t dma; 117 118 /* We could always set __GFP_COMP, and avoid this branch, as 119 * prep_new_page() can handle order-0 with __GFP_COMP. 120 */ 121 if (pool->p.order) 122 gfp |= __GFP_COMP; 123 124 /* FUTURE development: 125 * 126 * Current slow-path essentially falls back to single page 127 * allocations, which doesn't improve performance. This code 128 * need bulk allocation support from the page allocator code. 129 */ 130 131 /* Cache was empty, do real allocation */ 132 page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); 133 if (!page) 134 return NULL; 135 136 if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 137 goto skip_dma_map; 138 139 /* Setup DMA mapping: use page->private for DMA-addr 140 * This mapping is kept for lifetime of page, until leaving pool. 141 */ 142 dma = dma_map_page(pool->p.dev, page, 0, 143 (PAGE_SIZE << pool->p.order), 144 pool->p.dma_dir); 145 if (dma_mapping_error(pool->p.dev, dma)) { 146 put_page(page); 147 return NULL; 148 } 149 set_page_private(page, dma); /* page->private = dma; */ 150 151 skip_dma_map: 152 /* When page just alloc'ed is should/must have refcnt 1. */ 153 return page; 154 } 155 156 /* For using page_pool replace: alloc_pages() API calls, but provide 157 * synchronization guarantee for allocation side. 158 */ 159 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) 160 { 161 struct page *page; 162 163 /* Fast-path: Get a page from cache */ 164 page = __page_pool_get_cached(pool); 165 if (page) 166 return page; 167 168 /* Slow-path: cache empty, do real allocation */ 169 page = __page_pool_alloc_pages_slow(pool, gfp); 170 return page; 171 } 172 EXPORT_SYMBOL(page_pool_alloc_pages); 173 174 /* Cleanup page_pool state from page */ 175 static void __page_pool_clean_page(struct page_pool *pool, 176 struct page *page) 177 { 178 if (!(pool->p.flags & PP_FLAG_DMA_MAP)) 179 return; 180 181 /* DMA unmap */ 182 dma_unmap_page(pool->p.dev, page_private(page), 183 PAGE_SIZE << pool->p.order, pool->p.dma_dir); 184 set_page_private(page, 0); 185 } 186 187 /* Return a page to the page allocator, cleaning up our state */ 188 static void __page_pool_return_page(struct page_pool *pool, struct page *page) 189 { 190 __page_pool_clean_page(pool, page); 191 put_page(page); 192 /* An optimization would be to call __free_pages(page, pool->p.order) 193 * knowing page is not part of page-cache (thus avoiding a 194 * __page_cache_release() call). 195 */ 196 } 197 198 static bool __page_pool_recycle_into_ring(struct page_pool *pool, 199 struct page *page) 200 { 201 int ret; 202 /* BH protection not needed if current is serving softirq */ 203 if (in_serving_softirq()) 204 ret = ptr_ring_produce(&pool->ring, page); 205 else 206 ret = ptr_ring_produce_bh(&pool->ring, page); 207 208 return (ret == 0) ? true : false; 209 } 210 211 /* Only allow direct recycling in special circumstances, into the 212 * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. 213 * 214 * Caller must provide appropriate safe context. 215 */ 216 static bool __page_pool_recycle_direct(struct page *page, 217 struct page_pool *pool) 218 { 219 if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) 220 return false; 221 222 /* Caller MUST have verified/know (page_ref_count(page) == 1) */ 223 pool->alloc.cache[pool->alloc.count++] = page; 224 return true; 225 } 226 227 void __page_pool_put_page(struct page_pool *pool, 228 struct page *page, bool allow_direct) 229 { 230 /* This allocator is optimized for the XDP mode that uses 231 * one-frame-per-page, but have fallbacks that act like the 232 * regular page allocator APIs. 233 * 234 * refcnt == 1 means page_pool owns page, and can recycle it. 235 */ 236 if (likely(page_ref_count(page) == 1)) { 237 /* Read barrier done in page_ref_count / READ_ONCE */ 238 239 if (allow_direct && in_serving_softirq()) 240 if (__page_pool_recycle_direct(page, pool)) 241 return; 242 243 if (!__page_pool_recycle_into_ring(pool, page)) { 244 /* Cache full, fallback to free pages */ 245 __page_pool_return_page(pool, page); 246 } 247 return; 248 } 249 /* Fallback/non-XDP mode: API user have elevated refcnt. 250 * 251 * Many drivers split up the page into fragments, and some 252 * want to keep doing this to save memory and do refcnt based 253 * recycling. Support this use case too, to ease drivers 254 * switching between XDP/non-XDP. 255 * 256 * In-case page_pool maintains the DMA mapping, API user must 257 * call page_pool_put_page once. In this elevated refcnt 258 * case, the DMA is unmapped/released, as driver is likely 259 * doing refcnt based recycle tricks, meaning another process 260 * will be invoking put_page. 261 */ 262 __page_pool_clean_page(pool, page); 263 put_page(page); 264 } 265 EXPORT_SYMBOL(__page_pool_put_page); 266 267 static void __page_pool_empty_ring(struct page_pool *pool) 268 { 269 struct page *page; 270 271 /* Empty recycle ring */ 272 while ((page = ptr_ring_consume(&pool->ring))) { 273 /* Verify the refcnt invariant of cached pages */ 274 if (!(page_ref_count(page) == 1)) 275 pr_crit("%s() page_pool refcnt %d violation\n", 276 __func__, page_ref_count(page)); 277 278 __page_pool_return_page(pool, page); 279 } 280 } 281 282 static void __page_pool_destroy_rcu(struct rcu_head *rcu) 283 { 284 struct page_pool *pool; 285 286 pool = container_of(rcu, struct page_pool, rcu); 287 288 WARN(pool->alloc.count, "API usage violation"); 289 290 __page_pool_empty_ring(pool); 291 ptr_ring_cleanup(&pool->ring, NULL); 292 kfree(pool); 293 } 294 295 /* Cleanup and release resources */ 296 void page_pool_destroy(struct page_pool *pool) 297 { 298 struct page *page; 299 300 /* Empty alloc cache, assume caller made sure this is 301 * no-longer in use, and page_pool_alloc_pages() cannot be 302 * call concurrently. 303 */ 304 while (pool->alloc.count) { 305 page = pool->alloc.cache[--pool->alloc.count]; 306 __page_pool_return_page(pool, page); 307 } 308 309 /* No more consumers should exist, but producers could still 310 * be in-flight. 311 */ 312 __page_pool_empty_ring(pool); 313 314 /* An xdp_mem_allocator can still ref page_pool pointer */ 315 call_rcu(&pool->rcu, __page_pool_destroy_rcu); 316 } 317 EXPORT_SYMBOL(page_pool_destroy); 318