1c0c77d8fSBjörn Töpel // SPDX-License-Identifier: GPL-2.0 2c0c77d8fSBjörn Töpel /* XDP user-space packet buffer 3c0c77d8fSBjörn Töpel * Copyright(c) 2018 Intel Corporation. 4c0c77d8fSBjörn Töpel */ 5c0c77d8fSBjörn Töpel 6c0c77d8fSBjörn Töpel #include <linux/init.h> 7c0c77d8fSBjörn Töpel #include <linux/sched/mm.h> 8c0c77d8fSBjörn Töpel #include <linux/sched/signal.h> 9c0c77d8fSBjörn Töpel #include <linux/sched/task.h> 10c0c77d8fSBjörn Töpel #include <linux/uaccess.h> 11c0c77d8fSBjörn Töpel #include <linux/slab.h> 12c0c77d8fSBjörn Töpel #include <linux/bpf.h> 13c0c77d8fSBjörn Töpel #include <linux/mm.h> 1484c6b868SJakub Kicinski #include <linux/netdevice.h> 1584c6b868SJakub Kicinski #include <linux/rtnetlink.h> 1650e74c01SBjörn Töpel #include <linux/idr.h> 17624676e7SIvan Khoronzhuk #include <linux/vmalloc.h> 18c0c77d8fSBjörn Töpel 19c0c77d8fSBjörn Töpel #include "xdp_umem.h" 20e61e62b9SBjörn Töpel #include "xsk_queue.h" 21c0c77d8fSBjörn Töpel 2250e74c01SBjörn Töpel static DEFINE_IDA(umem_ida); 2350e74c01SBjörn Töpel 24c0c77d8fSBjörn Töpel static void xdp_umem_unpin_pages(struct xdp_umem *umem) 25c0c77d8fSBjörn Töpel { 26f1f6a7ddSJohn Hubbard unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 27c0c77d8fSBjörn Töpel 28a720a2a0SMagnus Karlsson kvfree(umem->pgs); 29c0c77d8fSBjörn Töpel umem->pgs = NULL; 30c0c77d8fSBjörn Töpel } 31c0c77d8fSBjörn Töpel 32c0c77d8fSBjörn Töpel static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 33c0c77d8fSBjörn Töpel { 34c09290c5SDaniel Borkmann if (umem->user) { 35c0c77d8fSBjörn Töpel atomic_long_sub(umem->npgs, &umem->user->locked_vm); 36c0c77d8fSBjörn Töpel free_uid(umem->user); 37c0c77d8fSBjörn Töpel } 38c09290c5SDaniel Borkmann } 39c0c77d8fSBjörn Töpel 407f7ffa4eSMagnus Karlsson static void xdp_umem_addr_unmap(struct xdp_umem *umem) 417f7ffa4eSMagnus Karlsson { 427f7ffa4eSMagnus Karlsson vunmap(umem->addrs); 437f7ffa4eSMagnus Karlsson umem->addrs = NULL; 447f7ffa4eSMagnus Karlsson } 457f7ffa4eSMagnus Karlsson 467f7ffa4eSMagnus Karlsson static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 477f7ffa4eSMagnus Karlsson u32 nr_pages) 487f7ffa4eSMagnus Karlsson { 497f7ffa4eSMagnus Karlsson umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 507f7ffa4eSMagnus Karlsson if (!umem->addrs) 517f7ffa4eSMagnus Karlsson return -ENOMEM; 527f7ffa4eSMagnus Karlsson return 0; 537f7ffa4eSMagnus Karlsson } 547f7ffa4eSMagnus Karlsson 55c0c77d8fSBjörn Töpel static void xdp_umem_release(struct xdp_umem *umem) 56c0c77d8fSBjörn Töpel { 57c2d3d6a4SMagnus Karlsson umem->zc = false; 5821f1481aSKe Liu ida_free(&umem_ida, umem->id); 5950e74c01SBjörn Töpel 607f7ffa4eSMagnus Karlsson xdp_umem_addr_unmap(umem); 61c0c77d8fSBjörn Töpel xdp_umem_unpin_pages(umem); 62c0c77d8fSBjörn Töpel 63c0c77d8fSBjörn Töpel xdp_umem_unaccount_pages(umem); 64c0c77d8fSBjörn Töpel kfree(umem); 65c0c77d8fSBjörn Töpel } 66c0c77d8fSBjörn Töpel 67537cf4e3SMagnus Karlsson static void xdp_umem_release_deferred(struct work_struct *work) 68537cf4e3SMagnus Karlsson { 69537cf4e3SMagnus Karlsson struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 70537cf4e3SMagnus Karlsson 71537cf4e3SMagnus Karlsson xdp_umem_release(umem); 72537cf4e3SMagnus Karlsson } 73537cf4e3SMagnus Karlsson 74c0c77d8fSBjörn Töpel void xdp_get_umem(struct xdp_umem *umem) 75c0c77d8fSBjörn Töpel { 76d3b42f14SBjörn Töpel refcount_inc(&umem->users); 77c0c77d8fSBjörn Töpel } 78c0c77d8fSBjörn Töpel 79537cf4e3SMagnus Karlsson void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 80c0c77d8fSBjörn Töpel { 81c0c77d8fSBjörn Töpel if (!umem) 82c0c77d8fSBjörn Töpel return; 83c0c77d8fSBjörn Töpel 84537cf4e3SMagnus Karlsson if (refcount_dec_and_test(&umem->users)) { 85537cf4e3SMagnus Karlsson if (defer_cleanup) { 86537cf4e3SMagnus Karlsson INIT_WORK(&umem->work, xdp_umem_release_deferred); 87537cf4e3SMagnus Karlsson schedule_work(&umem->work); 88537cf4e3SMagnus Karlsson } else { 891c1efc2aSMagnus Karlsson xdp_umem_release(umem); 90c0c77d8fSBjörn Töpel } 91537cf4e3SMagnus Karlsson } 92537cf4e3SMagnus Karlsson } 93c0c77d8fSBjörn Töpel 9407bf2d97SMagnus Karlsson static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 95c0c77d8fSBjörn Töpel { 96c0c77d8fSBjörn Töpel unsigned int gup_flags = FOLL_WRITE; 97c0c77d8fSBjörn Töpel long npgs; 98c0c77d8fSBjörn Töpel int err; 99c0c77d8fSBjörn Töpel 100a720a2a0SMagnus Karlsson umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 101c0c77d8fSBjörn Töpel if (!umem->pgs) 102c0c77d8fSBjörn Töpel return -ENOMEM; 103c0c77d8fSBjörn Töpel 104d8ed45c5SMichel Lespinasse mmap_read_lock(current->mm); 10507bf2d97SMagnus Karlsson npgs = pin_user_pages(address, umem->npgs, 1064c630f30SLorenzo Stoakes gup_flags | FOLL_LONGTERM, &umem->pgs[0]); 107d8ed45c5SMichel Lespinasse mmap_read_unlock(current->mm); 108c0c77d8fSBjörn Töpel 109c0c77d8fSBjörn Töpel if (npgs != umem->npgs) { 110c0c77d8fSBjörn Töpel if (npgs >= 0) { 111c0c77d8fSBjörn Töpel umem->npgs = npgs; 112c0c77d8fSBjörn Töpel err = -ENOMEM; 113c0c77d8fSBjörn Töpel goto out_pin; 114c0c77d8fSBjörn Töpel } 115c0c77d8fSBjörn Töpel err = npgs; 116c0c77d8fSBjörn Töpel goto out_pgs; 117c0c77d8fSBjörn Töpel } 118c0c77d8fSBjörn Töpel return 0; 119c0c77d8fSBjörn Töpel 120c0c77d8fSBjörn Töpel out_pin: 121c0c77d8fSBjörn Töpel xdp_umem_unpin_pages(umem); 122c0c77d8fSBjörn Töpel out_pgs: 123a720a2a0SMagnus Karlsson kvfree(umem->pgs); 124c0c77d8fSBjörn Töpel umem->pgs = NULL; 125c0c77d8fSBjörn Töpel return err; 126c0c77d8fSBjörn Töpel } 127c0c77d8fSBjörn Töpel 128c0c77d8fSBjörn Töpel static int xdp_umem_account_pages(struct xdp_umem *umem) 129c0c77d8fSBjörn Töpel { 130c0c77d8fSBjörn Töpel unsigned long lock_limit, new_npgs, old_npgs; 131c0c77d8fSBjörn Töpel 132c0c77d8fSBjörn Töpel if (capable(CAP_IPC_LOCK)) 133c0c77d8fSBjörn Töpel return 0; 134c0c77d8fSBjörn Töpel 135c0c77d8fSBjörn Töpel lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 136c0c77d8fSBjörn Töpel umem->user = get_uid(current_user()); 137c0c77d8fSBjörn Töpel 138c0c77d8fSBjörn Töpel do { 139c0c77d8fSBjörn Töpel old_npgs = atomic_long_read(&umem->user->locked_vm); 140c0c77d8fSBjörn Töpel new_npgs = old_npgs + umem->npgs; 141c0c77d8fSBjörn Töpel if (new_npgs > lock_limit) { 142c0c77d8fSBjörn Töpel free_uid(umem->user); 143c0c77d8fSBjörn Töpel umem->user = NULL; 144c0c77d8fSBjörn Töpel return -ENOBUFS; 145c0c77d8fSBjörn Töpel } 146c0c77d8fSBjörn Töpel } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 147c0c77d8fSBjörn Töpel new_npgs) != old_npgs); 148c0c77d8fSBjörn Töpel return 0; 149c0c77d8fSBjörn Töpel } 150c0c77d8fSBjörn Töpel 15111614723SStanislav Fomichev #define XDP_UMEM_FLAGS_VALID ( \ 15211614723SStanislav Fomichev XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ 15311614723SStanislav Fomichev XDP_UMEM_TX_SW_CSUM | \ 154*d5e726d9SStanislav Fomichev XDP_UMEM_TX_METADATA_LEN | \ 15511614723SStanislav Fomichev 0) 15611614723SStanislav Fomichev 157a49049eaSBjörn Töpel static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 158c0c77d8fSBjörn Töpel { 159c05cd364SKevin Laatz bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 160c7df4813SKal Conley u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 161c7df4813SKal Conley u64 addr = mr->addr, size = mr->len; 162c7df4813SKal Conley u32 chunks_rem, npgs_rem; 163c7df4813SKal Conley u64 chunks, npgs; 16499e3a236SMagnus Karlsson int err; 165c0c77d8fSBjörn Töpel 166bbff2f32SBjörn Töpel if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 167c0c77d8fSBjörn Töpel /* Strictly speaking we could support this, if: 168c0c77d8fSBjörn Töpel * - huge pages, or* 169c0c77d8fSBjörn Töpel * - using an IOMMU, or 170c0c77d8fSBjörn Töpel * - making sure the memory area is consecutive 171c0c77d8fSBjörn Töpel * but for now, we simply say "computer says no". 172c0c77d8fSBjörn Töpel */ 173c0c77d8fSBjörn Töpel return -EINVAL; 174c0c77d8fSBjörn Töpel } 175c0c77d8fSBjörn Töpel 17611614723SStanislav Fomichev if (mr->flags & ~XDP_UMEM_FLAGS_VALID) 177c05cd364SKevin Laatz return -EINVAL; 178c05cd364SKevin Laatz 179c05cd364SKevin Laatz if (!unaligned_chunks && !is_power_of_2(chunk_size)) 180c0c77d8fSBjörn Töpel return -EINVAL; 181c0c77d8fSBjörn Töpel 182c0c77d8fSBjörn Töpel if (!PAGE_ALIGNED(addr)) { 183c0c77d8fSBjörn Töpel /* Memory area has to be page size aligned. For 184c0c77d8fSBjörn Töpel * simplicity, this might change. 185c0c77d8fSBjörn Töpel */ 186c0c77d8fSBjörn Töpel return -EINVAL; 187c0c77d8fSBjörn Töpel } 188c0c77d8fSBjörn Töpel 189c0c77d8fSBjörn Töpel if ((addr + size) < addr) 190c0c77d8fSBjörn Töpel return -EINVAL; 191c0c77d8fSBjörn Töpel 1922b1667e5SBjörn Töpel npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 1932b1667e5SBjörn Töpel if (npgs_rem) 1942b1667e5SBjörn Töpel npgs++; 195b16a87d0SBjörn Töpel if (npgs > U32_MAX) 196b16a87d0SBjörn Töpel return -EINVAL; 197b16a87d0SBjörn Töpel 198c7df4813SKal Conley chunks = div_u64_rem(size, chunk_size, &chunks_rem); 199c7df4813SKal Conley if (!chunks || chunks > U32_MAX) 200c0c77d8fSBjörn Töpel return -EINVAL; 201c0c77d8fSBjörn Töpel 2022b1667e5SBjörn Töpel if (!unaligned_chunks && chunks_rem) 203c0c77d8fSBjörn Töpel return -EINVAL; 204c0c77d8fSBjörn Töpel 20599e3a236SMagnus Karlsson if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 206c0c77d8fSBjörn Töpel return -EINVAL; 207c0c77d8fSBjörn Töpel 208*d5e726d9SStanislav Fomichev if (mr->flags & XDP_UMEM_TX_METADATA_LEN) { 209341ac980SStanislav Fomichev if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) 210341ac980SStanislav Fomichev return -EINVAL; 211*d5e726d9SStanislav Fomichev umem->tx_metadata_len = mr->tx_metadata_len; 212*d5e726d9SStanislav Fomichev } 213341ac980SStanislav Fomichev 21493ee30f3SMagnus Karlsson umem->size = size; 215bbff2f32SBjörn Töpel umem->headroom = headroom; 2162b43470aSBjörn Töpel umem->chunk_size = chunk_size; 2171c1efc2aSMagnus Karlsson umem->chunks = chunks; 218c7df4813SKal Conley umem->npgs = npgs; 219c0c77d8fSBjörn Töpel umem->pgs = NULL; 220c0c77d8fSBjörn Töpel umem->user = NULL; 221c05cd364SKevin Laatz umem->flags = mr->flags; 222c0c77d8fSBjörn Töpel 223921b6869SMagnus Karlsson INIT_LIST_HEAD(&umem->xsk_dma_list); 224d3b42f14SBjörn Töpel refcount_set(&umem->users, 1); 225c0c77d8fSBjörn Töpel 226c0c77d8fSBjörn Töpel err = xdp_umem_account_pages(umem); 227c0c77d8fSBjörn Töpel if (err) 228044175a0SBjörn Töpel return err; 229c0c77d8fSBjörn Töpel 23007bf2d97SMagnus Karlsson err = xdp_umem_pin_pages(umem, (unsigned long)addr); 231c0c77d8fSBjörn Töpel if (err) 232c0c77d8fSBjörn Töpel goto out_account; 2338aef7340SBjörn Töpel 2347f7ffa4eSMagnus Karlsson err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 2357f7ffa4eSMagnus Karlsson if (err) 2367f7ffa4eSMagnus Karlsson goto out_unpin; 2377f7ffa4eSMagnus Karlsson 238c0c77d8fSBjörn Töpel return 0; 239c0c77d8fSBjörn Töpel 2407f7ffa4eSMagnus Karlsson out_unpin: 2417f7ffa4eSMagnus Karlsson xdp_umem_unpin_pages(umem); 242c0c77d8fSBjörn Töpel out_account: 243c0c77d8fSBjörn Töpel xdp_umem_unaccount_pages(umem); 244c0c77d8fSBjörn Töpel return err; 245c0c77d8fSBjörn Töpel } 246965a9909SMagnus Karlsson 247a49049eaSBjörn Töpel struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 248a49049eaSBjörn Töpel { 249a49049eaSBjörn Töpel struct xdp_umem *umem; 250a49049eaSBjörn Töpel int err; 251a49049eaSBjörn Töpel 252a49049eaSBjörn Töpel umem = kzalloc(sizeof(*umem), GFP_KERNEL); 253a49049eaSBjörn Töpel if (!umem) 254a49049eaSBjörn Töpel return ERR_PTR(-ENOMEM); 255a49049eaSBjörn Töpel 25621f1481aSKe Liu err = ida_alloc(&umem_ida, GFP_KERNEL); 25750e74c01SBjörn Töpel if (err < 0) { 25850e74c01SBjörn Töpel kfree(umem); 25950e74c01SBjörn Töpel return ERR_PTR(err); 26050e74c01SBjörn Töpel } 26150e74c01SBjörn Töpel umem->id = err; 26250e74c01SBjörn Töpel 263a49049eaSBjörn Töpel err = xdp_umem_reg(umem, mr); 264a49049eaSBjörn Töpel if (err) { 26521f1481aSKe Liu ida_free(&umem_ida, umem->id); 266a49049eaSBjörn Töpel kfree(umem); 267a49049eaSBjörn Töpel return ERR_PTR(err); 268a49049eaSBjörn Töpel } 269a49049eaSBjörn Töpel 270a49049eaSBjörn Töpel return umem; 271a49049eaSBjörn Töpel } 272