1 // SPDX-License-Identifier: GPL-2.0-only 2 /* net/core/xdp.c 3 * 4 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 5 */ 6 #include <linux/bpf.h> 7 #include <linux/filter.h> 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/netdevice.h> 11 #include <linux/slab.h> 12 #include <linux/idr.h> 13 #include <linux/rhashtable.h> 14 #include <linux/bug.h> 15 #include <net/page_pool.h> 16 17 #include <net/xdp.h> 18 #include <net/xdp_priv.h> /* struct xdp_mem_allocator */ 19 #include <trace/events/xdp.h> 20 21 #define REG_STATE_NEW 0x0 22 #define REG_STATE_REGISTERED 0x1 23 #define REG_STATE_UNREGISTERED 0x2 24 #define REG_STATE_UNUSED 0x3 25 26 static DEFINE_IDA(mem_id_pool); 27 static DEFINE_MUTEX(mem_id_lock); 28 #define MEM_ID_MAX 0xFFFE 29 #define MEM_ID_MIN 1 30 static int mem_id_next = MEM_ID_MIN; 31 32 static bool mem_id_init; /* false */ 33 static struct rhashtable *mem_id_ht; 34 35 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 36 { 37 const u32 *k = data; 38 const u32 key = *k; 39 40 BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id) 41 != sizeof(u32)); 42 43 /* Use cyclic increasing ID as direct hash key */ 44 return key; 45 } 46 47 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 48 const void *ptr) 49 { 50 const struct xdp_mem_allocator *xa = ptr; 51 u32 mem_id = *(u32 *)arg->key; 52 53 return xa->mem.id != mem_id; 54 } 55 56 static const struct rhashtable_params mem_id_rht_params = { 57 .nelem_hint = 64, 58 .head_offset = offsetof(struct xdp_mem_allocator, node), 59 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 60 .key_len = sizeof_field(struct xdp_mem_allocator, mem.id), 61 .max_size = MEM_ID_MAX, 62 .min_size = 8, 63 .automatic_shrinking = true, 64 .hashfn = xdp_mem_id_hashfn, 65 .obj_cmpfn = xdp_mem_id_cmp, 66 }; 67 68 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 69 { 70 struct xdp_mem_allocator *xa; 71 72 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 73 74 /* Allow this ID to be reused */ 75 ida_simple_remove(&mem_id_pool, xa->mem.id); 76 77 kfree(xa); 78 } 79 80 static void mem_xa_remove(struct xdp_mem_allocator *xa) 81 { 82 trace_mem_disconnect(xa); 83 84 if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) 85 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 86 } 87 88 static void mem_allocator_disconnect(void *allocator) 89 { 90 struct xdp_mem_allocator *xa; 91 struct rhashtable_iter iter; 92 93 mutex_lock(&mem_id_lock); 94 95 rhashtable_walk_enter(mem_id_ht, &iter); 96 do { 97 rhashtable_walk_start(&iter); 98 99 while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) { 100 if (xa->allocator == allocator) 101 mem_xa_remove(xa); 102 } 103 104 rhashtable_walk_stop(&iter); 105 106 } while (xa == ERR_PTR(-EAGAIN)); 107 rhashtable_walk_exit(&iter); 108 109 mutex_unlock(&mem_id_lock); 110 } 111 112 static void mem_id_disconnect(int id) 113 { 114 struct xdp_mem_allocator *xa; 115 116 mutex_lock(&mem_id_lock); 117 118 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); 119 if (!xa) { 120 mutex_unlock(&mem_id_lock); 121 WARN(1, "Request remove non-existing id(%d), driver bug?", id); 122 return; 123 } 124 125 trace_mem_disconnect(xa); 126 127 if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) 128 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 129 130 mutex_unlock(&mem_id_lock); 131 } 132 133 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 134 { 135 struct xdp_mem_allocator *xa; 136 int id = xdp_rxq->mem.id; 137 138 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 139 WARN(1, "Missing register, driver bug"); 140 return; 141 } 142 143 if (id == 0) 144 return; 145 146 if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY) 147 return mem_id_disconnect(id); 148 149 if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) { 150 rcu_read_lock(); 151 xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); 152 page_pool_destroy(xa->page_pool); 153 rcu_read_unlock(); 154 } 155 } 156 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); 157 158 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 159 { 160 /* Simplify driver cleanup code paths, allow unreg "unused" */ 161 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 162 return; 163 164 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 165 166 xdp_rxq_info_unreg_mem_model(xdp_rxq); 167 168 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 169 xdp_rxq->dev = NULL; 170 171 /* Reset mem info to defaults */ 172 xdp_rxq->mem.id = 0; 173 xdp_rxq->mem.type = 0; 174 } 175 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 176 177 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 178 { 179 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 180 } 181 182 /* Returns 0 on success, negative on failure */ 183 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 184 struct net_device *dev, u32 queue_index) 185 { 186 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 187 WARN(1, "Driver promised not to register this"); 188 return -EINVAL; 189 } 190 191 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 192 WARN(1, "Missing unregister, handled but fix driver"); 193 xdp_rxq_info_unreg(xdp_rxq); 194 } 195 196 if (!dev) { 197 WARN(1, "Missing net_device from driver"); 198 return -ENODEV; 199 } 200 201 /* State either UNREGISTERED or NEW */ 202 xdp_rxq_info_init(xdp_rxq); 203 xdp_rxq->dev = dev; 204 xdp_rxq->queue_index = queue_index; 205 206 xdp_rxq->reg_state = REG_STATE_REGISTERED; 207 return 0; 208 } 209 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 210 211 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 212 { 213 xdp_rxq->reg_state = REG_STATE_UNUSED; 214 } 215 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 216 217 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 218 { 219 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 220 } 221 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 222 223 static int __mem_id_init_hash_table(void) 224 { 225 struct rhashtable *rht; 226 int ret; 227 228 if (unlikely(mem_id_init)) 229 return 0; 230 231 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 232 if (!rht) 233 return -ENOMEM; 234 235 ret = rhashtable_init(rht, &mem_id_rht_params); 236 if (ret < 0) { 237 kfree(rht); 238 return ret; 239 } 240 mem_id_ht = rht; 241 smp_mb(); /* mutex lock should provide enough pairing */ 242 mem_id_init = true; 243 244 return 0; 245 } 246 247 /* Allocate a cyclic ID that maps to allocator pointer. 248 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 249 * 250 * Caller must lock mem_id_lock. 251 */ 252 static int __mem_id_cyclic_get(gfp_t gfp) 253 { 254 int retries = 1; 255 int id; 256 257 again: 258 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 259 if (id < 0) { 260 if (id == -ENOSPC) { 261 /* Cyclic allocator, reset next id */ 262 if (retries--) { 263 mem_id_next = MEM_ID_MIN; 264 goto again; 265 } 266 } 267 return id; /* errno */ 268 } 269 mem_id_next = id + 1; 270 271 return id; 272 } 273 274 static bool __is_supported_mem_type(enum xdp_mem_type type) 275 { 276 if (type == MEM_TYPE_PAGE_POOL) 277 return is_page_pool_compiled_in(); 278 279 if (type >= MEM_TYPE_MAX) 280 return false; 281 282 return true; 283 } 284 285 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 286 enum xdp_mem_type type, void *allocator) 287 { 288 struct xdp_mem_allocator *xdp_alloc; 289 gfp_t gfp = GFP_KERNEL; 290 int id, errno, ret; 291 void *ptr; 292 293 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 294 WARN(1, "Missing register, driver bug"); 295 return -EFAULT; 296 } 297 298 if (!__is_supported_mem_type(type)) 299 return -EOPNOTSUPP; 300 301 xdp_rxq->mem.type = type; 302 303 if (!allocator) { 304 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) 305 return -EINVAL; /* Setup time check page_pool req */ 306 return 0; 307 } 308 309 /* Delay init of rhashtable to save memory if feature isn't used */ 310 if (!mem_id_init) { 311 mutex_lock(&mem_id_lock); 312 ret = __mem_id_init_hash_table(); 313 mutex_unlock(&mem_id_lock); 314 if (ret < 0) { 315 WARN_ON(1); 316 return ret; 317 } 318 } 319 320 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 321 if (!xdp_alloc) 322 return -ENOMEM; 323 324 mutex_lock(&mem_id_lock); 325 id = __mem_id_cyclic_get(gfp); 326 if (id < 0) { 327 errno = id; 328 goto err; 329 } 330 xdp_rxq->mem.id = id; 331 xdp_alloc->mem = xdp_rxq->mem; 332 xdp_alloc->allocator = allocator; 333 334 /* Insert allocator into ID lookup table */ 335 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 336 if (IS_ERR(ptr)) { 337 ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); 338 xdp_rxq->mem.id = 0; 339 errno = PTR_ERR(ptr); 340 goto err; 341 } 342 343 if (type == MEM_TYPE_PAGE_POOL) 344 page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); 345 346 mutex_unlock(&mem_id_lock); 347 348 trace_mem_connect(xdp_alloc, xdp_rxq); 349 return 0; 350 err: 351 mutex_unlock(&mem_id_lock); 352 kfree(xdp_alloc); 353 return errno; 354 } 355 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 356 357 /* XDP RX runs under NAPI protection, and in different delivery error 358 * scenarios (e.g. queue full), it is possible to return the xdp_frame 359 * while still leveraging this protection. The @napi_direct boolean 360 * is used for those calls sites. Thus, allowing for faster recycling 361 * of xdp_frames/pages in those cases. 362 */ 363 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 364 unsigned long handle) 365 { 366 struct xdp_mem_allocator *xa; 367 struct page *page; 368 369 switch (mem->type) { 370 case MEM_TYPE_PAGE_POOL: 371 rcu_read_lock(); 372 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 373 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 374 page = virt_to_head_page(data); 375 napi_direct &= !xdp_return_frame_no_direct(); 376 page_pool_put_full_page(xa->page_pool, page, napi_direct); 377 rcu_read_unlock(); 378 break; 379 case MEM_TYPE_PAGE_SHARED: 380 page_frag_free(data); 381 break; 382 case MEM_TYPE_PAGE_ORDER0: 383 page = virt_to_page(data); /* Assumes order0 page*/ 384 put_page(page); 385 break; 386 case MEM_TYPE_ZERO_COPY: 387 /* NB! Only valid from an xdp_buff! */ 388 rcu_read_lock(); 389 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 390 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 391 xa->zc_alloc->free(xa->zc_alloc, handle); 392 rcu_read_unlock(); 393 default: 394 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 395 break; 396 } 397 } 398 399 void xdp_return_frame(struct xdp_frame *xdpf) 400 { 401 __xdp_return(xdpf->data, &xdpf->mem, false, 0); 402 } 403 EXPORT_SYMBOL_GPL(xdp_return_frame); 404 405 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 406 { 407 __xdp_return(xdpf->data, &xdpf->mem, true, 0); 408 } 409 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 410 411 void xdp_return_buff(struct xdp_buff *xdp) 412 { 413 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); 414 } 415 EXPORT_SYMBOL_GPL(xdp_return_buff); 416 417 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ 418 void __xdp_release_frame(void *data, struct xdp_mem_info *mem) 419 { 420 struct xdp_mem_allocator *xa; 421 struct page *page; 422 423 rcu_read_lock(); 424 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 425 page = virt_to_head_page(data); 426 if (xa) 427 page_pool_release_page(xa->page_pool, page); 428 rcu_read_unlock(); 429 } 430 EXPORT_SYMBOL_GPL(__xdp_release_frame); 431 432 int xdp_attachment_query(struct xdp_attachment_info *info, 433 struct netdev_bpf *bpf) 434 { 435 bpf->prog_id = info->prog ? info->prog->aux->id : 0; 436 bpf->prog_flags = info->prog ? info->flags : 0; 437 return 0; 438 } 439 EXPORT_SYMBOL_GPL(xdp_attachment_query); 440 441 bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, 442 struct netdev_bpf *bpf) 443 { 444 if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { 445 NL_SET_ERR_MSG(bpf->extack, 446 "program loaded with different flags"); 447 return false; 448 } 449 return true; 450 } 451 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); 452 453 void xdp_attachment_setup(struct xdp_attachment_info *info, 454 struct netdev_bpf *bpf) 455 { 456 if (info->prog) 457 bpf_prog_put(info->prog); 458 info->prog = bpf->prog; 459 info->flags = bpf->flags; 460 } 461 EXPORT_SYMBOL_GPL(xdp_attachment_setup); 462 463 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) 464 { 465 unsigned int metasize, totsize; 466 void *addr, *data_to_copy; 467 struct xdp_frame *xdpf; 468 struct page *page; 469 470 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */ 471 metasize = xdp_data_meta_unsupported(xdp) ? 0 : 472 xdp->data - xdp->data_meta; 473 totsize = xdp->data_end - xdp->data + metasize; 474 475 if (sizeof(*xdpf) + totsize > PAGE_SIZE) 476 return NULL; 477 478 page = dev_alloc_page(); 479 if (!page) 480 return NULL; 481 482 addr = page_to_virt(page); 483 xdpf = addr; 484 memset(xdpf, 0, sizeof(*xdpf)); 485 486 addr += sizeof(*xdpf); 487 data_to_copy = metasize ? xdp->data_meta : xdp->data; 488 memcpy(addr, data_to_copy, totsize); 489 490 xdpf->data = addr + metasize; 491 xdpf->len = totsize - metasize; 492 xdpf->headroom = 0; 493 xdpf->metasize = metasize; 494 xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; 495 496 xdp_return_buff(xdp); 497 return xdpf; 498 } 499 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); 500 501 /* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */ 502 void xdp_warn(const char *msg, const char *func, const int line) 503 { 504 WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg); 505 }; 506 EXPORT_SYMBOL_GPL(xdp_warn); 507