1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 static DEFINE_IDA(umem_ida); 23 24 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 25 { 26 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 27 28 kvfree(umem->pgs); 29 umem->pgs = NULL; 30 } 31 32 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 33 { 34 if (umem->user) { 35 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 36 free_uid(umem->user); 37 } 38 } 39 40 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 41 { 42 vunmap(umem->addrs); 43 umem->addrs = NULL; 44 } 45 46 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 47 u32 nr_pages) 48 { 49 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 50 if (!umem->addrs) 51 return -ENOMEM; 52 return 0; 53 } 54 55 static void xdp_umem_release(struct xdp_umem *umem) 56 { 57 umem->zc = false; 58 ida_free(&umem_ida, umem->id); 59 60 xdp_umem_addr_unmap(umem); 61 xdp_umem_unpin_pages(umem); 62 63 xdp_umem_unaccount_pages(umem); 64 kfree(umem); 65 } 66 67 static void xdp_umem_release_deferred(struct work_struct *work) 68 { 69 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 70 71 xdp_umem_release(umem); 72 } 73 74 void xdp_get_umem(struct xdp_umem *umem) 75 { 76 refcount_inc(&umem->users); 77 } 78 79 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 80 { 81 if (!umem) 82 return; 83 84 if (refcount_dec_and_test(&umem->users)) { 85 if (defer_cleanup) { 86 INIT_WORK(&umem->work, xdp_umem_release_deferred); 87 schedule_work(&umem->work); 88 } else { 89 xdp_umem_release(umem); 90 } 91 } 92 } 93 94 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 95 { 96 unsigned int gup_flags = FOLL_WRITE; 97 long npgs; 98 int err; 99 100 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 101 if (!umem->pgs) 102 return -ENOMEM; 103 104 mmap_read_lock(current->mm); 105 npgs = pin_user_pages(address, umem->npgs, 106 gup_flags | FOLL_LONGTERM, &umem->pgs[0]); 107 mmap_read_unlock(current->mm); 108 109 if (npgs != umem->npgs) { 110 if (npgs >= 0) { 111 umem->npgs = npgs; 112 err = -ENOMEM; 113 goto out_pin; 114 } 115 err = npgs; 116 goto out_pgs; 117 } 118 return 0; 119 120 out_pin: 121 xdp_umem_unpin_pages(umem); 122 out_pgs: 123 kvfree(umem->pgs); 124 umem->pgs = NULL; 125 return err; 126 } 127 128 static int xdp_umem_account_pages(struct xdp_umem *umem) 129 { 130 unsigned long lock_limit, new_npgs, old_npgs; 131 132 if (capable(CAP_IPC_LOCK)) 133 return 0; 134 135 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 136 umem->user = get_uid(current_user()); 137 138 do { 139 old_npgs = atomic_long_read(&umem->user->locked_vm); 140 new_npgs = old_npgs + umem->npgs; 141 if (new_npgs > lock_limit) { 142 free_uid(umem->user); 143 umem->user = NULL; 144 return -ENOBUFS; 145 } 146 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 147 new_npgs) != old_npgs); 148 return 0; 149 } 150 151 #define XDP_UMEM_FLAGS_VALID ( \ 152 XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ 153 XDP_UMEM_TX_SW_CSUM | \ 154 0) 155 156 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 157 { 158 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 159 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 160 u64 addr = mr->addr, size = mr->len; 161 u32 chunks_rem, npgs_rem; 162 u64 chunks, npgs; 163 int err; 164 165 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 166 /* Strictly speaking we could support this, if: 167 * - huge pages, or* 168 * - using an IOMMU, or 169 * - making sure the memory area is consecutive 170 * but for now, we simply say "computer says no". 171 */ 172 return -EINVAL; 173 } 174 175 if (mr->flags & ~XDP_UMEM_FLAGS_VALID) 176 return -EINVAL; 177 178 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 179 return -EINVAL; 180 181 if (!PAGE_ALIGNED(addr)) { 182 /* Memory area has to be page size aligned. For 183 * simplicity, this might change. 184 */ 185 return -EINVAL; 186 } 187 188 if ((addr + size) < addr) 189 return -EINVAL; 190 191 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 192 if (npgs_rem) 193 npgs++; 194 if (npgs > U32_MAX) 195 return -EINVAL; 196 197 chunks = div_u64_rem(size, chunk_size, &chunks_rem); 198 if (!chunks || chunks > U32_MAX) 199 return -EINVAL; 200 201 if (!unaligned_chunks && chunks_rem) 202 return -EINVAL; 203 204 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 205 return -EINVAL; 206 207 if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) 208 return -EINVAL; 209 210 umem->size = size; 211 umem->headroom = headroom; 212 umem->chunk_size = chunk_size; 213 umem->chunks = chunks; 214 umem->npgs = npgs; 215 umem->pgs = NULL; 216 umem->user = NULL; 217 umem->flags = mr->flags; 218 umem->tx_metadata_len = mr->tx_metadata_len; 219 220 INIT_LIST_HEAD(&umem->xsk_dma_list); 221 refcount_set(&umem->users, 1); 222 223 err = xdp_umem_account_pages(umem); 224 if (err) 225 return err; 226 227 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 228 if (err) 229 goto out_account; 230 231 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 232 if (err) 233 goto out_unpin; 234 235 return 0; 236 237 out_unpin: 238 xdp_umem_unpin_pages(umem); 239 out_account: 240 xdp_umem_unaccount_pages(umem); 241 return err; 242 } 243 244 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 245 { 246 struct xdp_umem *umem; 247 int err; 248 249 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 250 if (!umem) 251 return ERR_PTR(-ENOMEM); 252 253 err = ida_alloc(&umem_ida, GFP_KERNEL); 254 if (err < 0) { 255 kfree(umem); 256 return ERR_PTR(err); 257 } 258 umem->id = err; 259 260 err = xdp_umem_reg(umem, mr); 261 if (err) { 262 ida_free(&umem_ida, umem->id); 263 kfree(umem); 264 return ERR_PTR(err); 265 } 266 267 return umem; 268 } 269