1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 static DEFINE_IDA(umem_ida); 23 24 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 25 { 26 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 27 28 kvfree(umem->pgs); 29 umem->pgs = NULL; 30 } 31 32 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 33 { 34 if (umem->user) { 35 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 36 free_uid(umem->user); 37 } 38 } 39 40 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 41 { 42 vunmap(umem->addrs); 43 umem->addrs = NULL; 44 } 45 46 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 47 u32 nr_pages) 48 { 49 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 50 if (!umem->addrs) 51 return -ENOMEM; 52 return 0; 53 } 54 55 static void xdp_umem_release(struct xdp_umem *umem) 56 { 57 umem->zc = false; 58 ida_free(&umem_ida, umem->id); 59 60 xdp_umem_addr_unmap(umem); 61 xdp_umem_unpin_pages(umem); 62 63 xdp_umem_unaccount_pages(umem); 64 kfree(umem); 65 } 66 67 static void xdp_umem_release_deferred(struct work_struct *work) 68 { 69 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 70 71 xdp_umem_release(umem); 72 } 73 74 void xdp_get_umem(struct xdp_umem *umem) 75 { 76 refcount_inc(&umem->users); 77 } 78 79 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 80 { 81 if (!umem) 82 return; 83 84 if (refcount_dec_and_test(&umem->users)) { 85 if (defer_cleanup) { 86 INIT_WORK(&umem->work, xdp_umem_release_deferred); 87 schedule_work(&umem->work); 88 } else { 89 xdp_umem_release(umem); 90 } 91 } 92 } 93 94 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 95 { 96 unsigned int gup_flags = FOLL_WRITE; 97 long npgs; 98 int err; 99 100 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 101 if (!umem->pgs) 102 return -ENOMEM; 103 104 mmap_read_lock(current->mm); 105 npgs = pin_user_pages(address, umem->npgs, 106 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 107 mmap_read_unlock(current->mm); 108 109 if (npgs != umem->npgs) { 110 if (npgs >= 0) { 111 umem->npgs = npgs; 112 err = -ENOMEM; 113 goto out_pin; 114 } 115 err = npgs; 116 goto out_pgs; 117 } 118 return 0; 119 120 out_pin: 121 xdp_umem_unpin_pages(umem); 122 out_pgs: 123 kvfree(umem->pgs); 124 umem->pgs = NULL; 125 return err; 126 } 127 128 static int xdp_umem_account_pages(struct xdp_umem *umem) 129 { 130 unsigned long lock_limit, new_npgs, old_npgs; 131 132 if (capable(CAP_IPC_LOCK)) 133 return 0; 134 135 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 136 umem->user = get_uid(current_user()); 137 138 do { 139 old_npgs = atomic_long_read(&umem->user->locked_vm); 140 new_npgs = old_npgs + umem->npgs; 141 if (new_npgs > lock_limit) { 142 free_uid(umem->user); 143 umem->user = NULL; 144 return -ENOBUFS; 145 } 146 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 147 new_npgs) != old_npgs); 148 return 0; 149 } 150 151 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 152 { 153 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; 154 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 155 u64 npgs, addr = mr->addr, size = mr->len; 156 unsigned int chunks, chunks_rem; 157 int err; 158 159 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 160 /* Strictly speaking we could support this, if: 161 * - huge pages, or* 162 * - using an IOMMU, or 163 * - making sure the memory area is consecutive 164 * but for now, we simply say "computer says no". 165 */ 166 return -EINVAL; 167 } 168 169 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 170 return -EINVAL; 171 172 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 173 return -EINVAL; 174 175 if (!PAGE_ALIGNED(addr)) { 176 /* Memory area has to be page size aligned. For 177 * simplicity, this might change. 178 */ 179 return -EINVAL; 180 } 181 182 if ((addr + size) < addr) 183 return -EINVAL; 184 185 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 186 if (npgs_rem) 187 npgs++; 188 if (npgs > U32_MAX) 189 return -EINVAL; 190 191 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); 192 if (chunks == 0) 193 return -EINVAL; 194 195 if (!unaligned_chunks && chunks_rem) 196 return -EINVAL; 197 198 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 199 return -EINVAL; 200 201 umem->size = size; 202 umem->headroom = headroom; 203 umem->chunk_size = chunk_size; 204 umem->chunks = chunks; 205 umem->npgs = (u32)npgs; 206 umem->pgs = NULL; 207 umem->user = NULL; 208 umem->flags = mr->flags; 209 210 INIT_LIST_HEAD(&umem->xsk_dma_list); 211 refcount_set(&umem->users, 1); 212 213 err = xdp_umem_account_pages(umem); 214 if (err) 215 return err; 216 217 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 218 if (err) 219 goto out_account; 220 221 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 222 if (err) 223 goto out_unpin; 224 225 return 0; 226 227 out_unpin: 228 xdp_umem_unpin_pages(umem); 229 out_account: 230 xdp_umem_unaccount_pages(umem); 231 return err; 232 } 233 234 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 235 { 236 struct xdp_umem *umem; 237 int err; 238 239 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 240 if (!umem) 241 return ERR_PTR(-ENOMEM); 242 243 err = ida_alloc(&umem_ida, GFP_KERNEL); 244 if (err < 0) { 245 kfree(umem); 246 return ERR_PTR(err); 247 } 248 umem->id = err; 249 250 err = xdp_umem_reg(umem, mr); 251 if (err) { 252 ida_free(&umem_ida, umem->id); 253 kfree(umem); 254 return ERR_PTR(err); 255 } 256 257 return umem; 258 } 259