1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 static DEFINE_IDA(umem_ida); 23 24 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 25 { 26 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 27 28 kvfree(umem->pgs); 29 umem->pgs = NULL; 30 } 31 32 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 33 { 34 if (umem->user) { 35 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 36 free_uid(umem->user); 37 } 38 } 39 40 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 41 { 42 vunmap(umem->addrs); 43 umem->addrs = NULL; 44 } 45 46 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 47 u32 nr_pages) 48 { 49 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 50 if (!umem->addrs) 51 return -ENOMEM; 52 return 0; 53 } 54 55 static void xdp_umem_release(struct xdp_umem *umem) 56 { 57 umem->zc = false; 58 ida_free(&umem_ida, umem->id); 59 60 xdp_umem_addr_unmap(umem); 61 xdp_umem_unpin_pages(umem); 62 63 xdp_umem_unaccount_pages(umem); 64 kfree(umem); 65 } 66 67 static void xdp_umem_release_deferred(struct work_struct *work) 68 { 69 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 70 71 xdp_umem_release(umem); 72 } 73 74 void xdp_get_umem(struct xdp_umem *umem) 75 { 76 refcount_inc(&umem->users); 77 } 78 79 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 80 { 81 if (!umem) 82 return; 83 84 if (refcount_dec_and_test(&umem->users)) { 85 if (defer_cleanup) { 86 INIT_WORK(&umem->work, xdp_umem_release_deferred); 87 schedule_work(&umem->work); 88 } else { 89 xdp_umem_release(umem); 90 } 91 } 92 } 93 94 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 95 { 96 unsigned int gup_flags = FOLL_WRITE; 97 long npgs; 98 int err; 99 100 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 101 if (!umem->pgs) 102 return -ENOMEM; 103 104 mmap_read_lock(current->mm); 105 npgs = pin_user_pages(address, umem->npgs, 106 gup_flags | FOLL_LONGTERM, &umem->pgs[0]); 107 mmap_read_unlock(current->mm); 108 109 if (npgs != umem->npgs) { 110 if (npgs >= 0) { 111 umem->npgs = npgs; 112 err = -ENOMEM; 113 goto out_pin; 114 } 115 err = npgs; 116 goto out_pgs; 117 } 118 return 0; 119 120 out_pin: 121 xdp_umem_unpin_pages(umem); 122 out_pgs: 123 kvfree(umem->pgs); 124 umem->pgs = NULL; 125 return err; 126 } 127 128 static int xdp_umem_account_pages(struct xdp_umem *umem) 129 { 130 unsigned long lock_limit, new_npgs, old_npgs; 131 132 if (capable(CAP_IPC_LOCK)) 133 return 0; 134 135 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 136 umem->user = get_uid(current_user()); 137 138 do { 139 old_npgs = atomic_long_read(&umem->user->locked_vm); 140 new_npgs = old_npgs + umem->npgs; 141 if (new_npgs > lock_limit) { 142 free_uid(umem->user); 143 umem->user = NULL; 144 return -ENOBUFS; 145 } 146 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 147 new_npgs) != old_npgs); 148 return 0; 149 } 150 151 #define XDP_UMEM_FLAGS_VALID ( \ 152 XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ 153 XDP_UMEM_TX_SW_CSUM | \ 154 XDP_UMEM_TX_METADATA_LEN | \ 155 0) 156 157 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 158 { 159 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 160 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 161 u64 addr = mr->addr, size = mr->len; 162 u32 chunks_rem, npgs_rem; 163 u64 chunks, npgs; 164 int err; 165 166 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 167 /* Strictly speaking we could support this, if: 168 * - huge pages, or* 169 * - using an IOMMU, or 170 * - making sure the memory area is consecutive 171 * but for now, we simply say "computer says no". 172 */ 173 return -EINVAL; 174 } 175 176 if (mr->flags & ~XDP_UMEM_FLAGS_VALID) 177 return -EINVAL; 178 179 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 180 return -EINVAL; 181 182 if (!PAGE_ALIGNED(addr)) { 183 /* Memory area has to be page size aligned. For 184 * simplicity, this might change. 185 */ 186 return -EINVAL; 187 } 188 189 if ((addr + size) < addr) 190 return -EINVAL; 191 192 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 193 if (npgs_rem) 194 npgs++; 195 if (npgs > U32_MAX) 196 return -EINVAL; 197 198 chunks = div_u64_rem(size, chunk_size, &chunks_rem); 199 if (!chunks || chunks > U32_MAX) 200 return -EINVAL; 201 202 if (!unaligned_chunks && chunks_rem) 203 return -EINVAL; 204 205 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 206 return -EINVAL; 207 208 if (mr->flags & XDP_UMEM_TX_METADATA_LEN) { 209 if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) 210 return -EINVAL; 211 umem->tx_metadata_len = mr->tx_metadata_len; 212 } 213 214 umem->size = size; 215 umem->headroom = headroom; 216 umem->chunk_size = chunk_size; 217 umem->chunks = chunks; 218 umem->npgs = npgs; 219 umem->pgs = NULL; 220 umem->user = NULL; 221 umem->flags = mr->flags; 222 223 INIT_LIST_HEAD(&umem->xsk_dma_list); 224 refcount_set(&umem->users, 1); 225 226 err = xdp_umem_account_pages(umem); 227 if (err) 228 return err; 229 230 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 231 if (err) 232 goto out_account; 233 234 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 235 if (err) 236 goto out_unpin; 237 238 return 0; 239 240 out_unpin: 241 xdp_umem_unpin_pages(umem); 242 out_account: 243 xdp_umem_unaccount_pages(umem); 244 return err; 245 } 246 247 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 248 { 249 struct xdp_umem *umem; 250 int err; 251 252 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 253 if (!umem) 254 return ERR_PTR(-ENOMEM); 255 256 err = ida_alloc(&umem_ida, GFP_KERNEL); 257 if (err < 0) { 258 kfree(umem); 259 return ERR_PTR(err); 260 } 261 umem->id = err; 262 263 err = xdp_umem_reg(umem, mr); 264 if (err) { 265 ida_free(&umem_ida, umem->id); 266 kfree(umem); 267 return ERR_PTR(err); 268 } 269 270 return umem; 271 } 272