1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 15 #include "xdp_umem.h" 16 17 #define XDP_UMEM_MIN_FRAME_SIZE 2048 18 19 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 20 { 21 unsigned int i; 22 23 for (i = 0; i < umem->npgs; i++) { 24 struct page *page = umem->pgs[i]; 25 26 set_page_dirty_lock(page); 27 put_page(page); 28 } 29 30 kfree(umem->pgs); 31 umem->pgs = NULL; 32 } 33 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35 { 36 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 37 free_uid(umem->user); 38 } 39 40 static void xdp_umem_release(struct xdp_umem *umem) 41 { 42 struct task_struct *task; 43 struct mm_struct *mm; 44 45 if (umem->fq) { 46 xskq_destroy(umem->fq); 47 umem->fq = NULL; 48 } 49 50 if (umem->cq) { 51 xskq_destroy(umem->cq); 52 umem->cq = NULL; 53 } 54 55 xdp_umem_unpin_pages(umem); 56 57 task = get_pid_task(umem->pid, PIDTYPE_PID); 58 put_pid(umem->pid); 59 if (!task) 60 goto out; 61 mm = get_task_mm(task); 62 put_task_struct(task); 63 if (!mm) 64 goto out; 65 66 mmput(mm); 67 xdp_umem_unaccount_pages(umem); 68 out: 69 kfree(umem); 70 } 71 72 static void xdp_umem_release_deferred(struct work_struct *work) 73 { 74 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 75 76 xdp_umem_release(umem); 77 } 78 79 void xdp_get_umem(struct xdp_umem *umem) 80 { 81 refcount_inc(&umem->users); 82 } 83 84 void xdp_put_umem(struct xdp_umem *umem) 85 { 86 if (!umem) 87 return; 88 89 if (refcount_dec_and_test(&umem->users)) { 90 INIT_WORK(&umem->work, xdp_umem_release_deferred); 91 schedule_work(&umem->work); 92 } 93 } 94 95 static int xdp_umem_pin_pages(struct xdp_umem *umem) 96 { 97 unsigned int gup_flags = FOLL_WRITE; 98 long npgs; 99 int err; 100 101 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL); 102 if (!umem->pgs) 103 return -ENOMEM; 104 105 down_write(¤t->mm->mmap_sem); 106 npgs = get_user_pages(umem->address, umem->npgs, 107 gup_flags, &umem->pgs[0], NULL); 108 up_write(¤t->mm->mmap_sem); 109 110 if (npgs != umem->npgs) { 111 if (npgs >= 0) { 112 umem->npgs = npgs; 113 err = -ENOMEM; 114 goto out_pin; 115 } 116 err = npgs; 117 goto out_pgs; 118 } 119 return 0; 120 121 out_pin: 122 xdp_umem_unpin_pages(umem); 123 out_pgs: 124 kfree(umem->pgs); 125 umem->pgs = NULL; 126 return err; 127 } 128 129 static int xdp_umem_account_pages(struct xdp_umem *umem) 130 { 131 unsigned long lock_limit, new_npgs, old_npgs; 132 133 if (capable(CAP_IPC_LOCK)) 134 return 0; 135 136 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 137 umem->user = get_uid(current_user()); 138 139 do { 140 old_npgs = atomic_long_read(&umem->user->locked_vm); 141 new_npgs = old_npgs + umem->npgs; 142 if (new_npgs > lock_limit) { 143 free_uid(umem->user); 144 umem->user = NULL; 145 return -ENOBUFS; 146 } 147 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 148 new_npgs) != old_npgs); 149 return 0; 150 } 151 152 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 153 { 154 u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; 155 u64 addr = mr->addr, size = mr->len; 156 unsigned int nframes, nfpp; 157 int size_chk, err; 158 159 if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { 160 /* Strictly speaking we could support this, if: 161 * - huge pages, or* 162 * - using an IOMMU, or 163 * - making sure the memory area is consecutive 164 * but for now, we simply say "computer says no". 165 */ 166 return -EINVAL; 167 } 168 169 if (!is_power_of_2(frame_size)) 170 return -EINVAL; 171 172 if (!PAGE_ALIGNED(addr)) { 173 /* Memory area has to be page size aligned. For 174 * simplicity, this might change. 175 */ 176 return -EINVAL; 177 } 178 179 if ((addr + size) < addr) 180 return -EINVAL; 181 182 nframes = (unsigned int)div_u64(size, frame_size); 183 if (nframes == 0 || nframes > UINT_MAX) 184 return -EINVAL; 185 186 nfpp = PAGE_SIZE / frame_size; 187 if (nframes < nfpp || nframes % nfpp) 188 return -EINVAL; 189 190 frame_headroom = ALIGN(frame_headroom, 64); 191 192 size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM; 193 if (size_chk < 0) 194 return -EINVAL; 195 196 umem->pid = get_task_pid(current, PIDTYPE_PID); 197 umem->size = (size_t)size; 198 umem->address = (unsigned long)addr; 199 umem->props.frame_size = frame_size; 200 umem->props.nframes = nframes; 201 umem->frame_headroom = frame_headroom; 202 umem->npgs = size / PAGE_SIZE; 203 umem->pgs = NULL; 204 umem->user = NULL; 205 206 umem->frame_size_log2 = ilog2(frame_size); 207 umem->nfpp_mask = nfpp - 1; 208 umem->nfpplog2 = ilog2(nfpp); 209 refcount_set(&umem->users, 1); 210 211 err = xdp_umem_account_pages(umem); 212 if (err) 213 goto out; 214 215 err = xdp_umem_pin_pages(umem); 216 if (err) 217 goto out_account; 218 return 0; 219 220 out_account: 221 xdp_umem_unaccount_pages(umem); 222 out: 223 put_pid(umem->pid); 224 return err; 225 } 226 227 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 228 { 229 struct xdp_umem *umem; 230 int err; 231 232 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 233 if (!umem) 234 return ERR_PTR(-ENOMEM); 235 236 err = xdp_umem_reg(umem, mr); 237 if (err) { 238 kfree(umem); 239 return ERR_PTR(err); 240 } 241 242 return umem; 243 } 244 245 bool xdp_umem_validate_queues(struct xdp_umem *umem) 246 { 247 return umem->fq && umem->cq; 248 } 249