xref: /linux/net/xdp/xdp_umem.c (revision d148eac0e70f06485dbd4cce6ed01cb07c650cec)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
13 #include <linux/mm.h>
14 
15 #include "xdp_umem.h"
16 #include "xsk_queue.h"
17 
18 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
19 
20 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
21 {
22 	unsigned long flags;
23 
24 	spin_lock_irqsave(&umem->xsk_list_lock, flags);
25 	list_add_rcu(&xs->list, &umem->xsk_list);
26 	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
27 }
28 
29 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
30 {
31 	unsigned long flags;
32 
33 	if (xs->dev) {
34 		spin_lock_irqsave(&umem->xsk_list_lock, flags);
35 		list_del_rcu(&xs->list);
36 		spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
37 
38 		if (umem->zc)
39 			synchronize_net();
40 	}
41 }
42 
43 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
44 			u32 queue_id, u16 flags)
45 {
46 	bool force_zc, force_copy;
47 	struct netdev_bpf bpf;
48 	int err;
49 
50 	force_zc = flags & XDP_ZEROCOPY;
51 	force_copy = flags & XDP_COPY;
52 
53 	if (force_zc && force_copy)
54 		return -EINVAL;
55 
56 	if (force_copy)
57 		return 0;
58 
59 	dev_hold(dev);
60 
61 	if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
62 		bpf.command = XDP_QUERY_XSK_UMEM;
63 
64 		rtnl_lock();
65 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
66 		rtnl_unlock();
67 
68 		if (err) {
69 			dev_put(dev);
70 			return force_zc ? -ENOTSUPP : 0;
71 		}
72 
73 		bpf.command = XDP_SETUP_XSK_UMEM;
74 		bpf.xsk.umem = umem;
75 		bpf.xsk.queue_id = queue_id;
76 
77 		rtnl_lock();
78 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
79 		rtnl_unlock();
80 
81 		if (err) {
82 			dev_put(dev);
83 			return force_zc ? err : 0; /* fail or fallback */
84 		}
85 
86 		umem->dev = dev;
87 		umem->queue_id = queue_id;
88 		umem->zc = true;
89 		return 0;
90 	}
91 
92 	dev_put(dev);
93 	return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
94 }
95 
96 static void xdp_umem_clear_dev(struct xdp_umem *umem)
97 {
98 	struct netdev_bpf bpf;
99 	int err;
100 
101 	if (umem->dev) {
102 		bpf.command = XDP_SETUP_XSK_UMEM;
103 		bpf.xsk.umem = NULL;
104 		bpf.xsk.queue_id = umem->queue_id;
105 
106 		rtnl_lock();
107 		err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
108 		rtnl_unlock();
109 
110 		if (err)
111 			WARN(1, "failed to disable umem!\n");
112 
113 		dev_put(umem->dev);
114 		umem->dev = NULL;
115 	}
116 }
117 
118 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
119 {
120 	unsigned int i;
121 
122 	for (i = 0; i < umem->npgs; i++) {
123 		struct page *page = umem->pgs[i];
124 
125 		set_page_dirty_lock(page);
126 		put_page(page);
127 	}
128 
129 	kfree(umem->pgs);
130 	umem->pgs = NULL;
131 }
132 
133 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
134 {
135 	if (umem->user) {
136 		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
137 		free_uid(umem->user);
138 	}
139 }
140 
141 static void xdp_umem_release(struct xdp_umem *umem)
142 {
143 	struct task_struct *task;
144 	struct mm_struct *mm;
145 
146 	xdp_umem_clear_dev(umem);
147 
148 	if (umem->fq) {
149 		xskq_destroy(umem->fq);
150 		umem->fq = NULL;
151 	}
152 
153 	if (umem->cq) {
154 		xskq_destroy(umem->cq);
155 		umem->cq = NULL;
156 	}
157 
158 	xdp_umem_unpin_pages(umem);
159 
160 	task = get_pid_task(umem->pid, PIDTYPE_PID);
161 	put_pid(umem->pid);
162 	if (!task)
163 		goto out;
164 	mm = get_task_mm(task);
165 	put_task_struct(task);
166 	if (!mm)
167 		goto out;
168 
169 	mmput(mm);
170 	kfree(umem->pages);
171 	umem->pages = NULL;
172 
173 	xdp_umem_unaccount_pages(umem);
174 out:
175 	kfree(umem);
176 }
177 
178 static void xdp_umem_release_deferred(struct work_struct *work)
179 {
180 	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
181 
182 	xdp_umem_release(umem);
183 }
184 
185 void xdp_get_umem(struct xdp_umem *umem)
186 {
187 	refcount_inc(&umem->users);
188 }
189 
190 void xdp_put_umem(struct xdp_umem *umem)
191 {
192 	if (!umem)
193 		return;
194 
195 	if (refcount_dec_and_test(&umem->users)) {
196 		INIT_WORK(&umem->work, xdp_umem_release_deferred);
197 		schedule_work(&umem->work);
198 	}
199 }
200 
201 static int xdp_umem_pin_pages(struct xdp_umem *umem)
202 {
203 	unsigned int gup_flags = FOLL_WRITE;
204 	long npgs;
205 	int err;
206 
207 	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
208 	if (!umem->pgs)
209 		return -ENOMEM;
210 
211 	down_write(&current->mm->mmap_sem);
212 	npgs = get_user_pages(umem->address, umem->npgs,
213 			      gup_flags, &umem->pgs[0], NULL);
214 	up_write(&current->mm->mmap_sem);
215 
216 	if (npgs != umem->npgs) {
217 		if (npgs >= 0) {
218 			umem->npgs = npgs;
219 			err = -ENOMEM;
220 			goto out_pin;
221 		}
222 		err = npgs;
223 		goto out_pgs;
224 	}
225 	return 0;
226 
227 out_pin:
228 	xdp_umem_unpin_pages(umem);
229 out_pgs:
230 	kfree(umem->pgs);
231 	umem->pgs = NULL;
232 	return err;
233 }
234 
235 static int xdp_umem_account_pages(struct xdp_umem *umem)
236 {
237 	unsigned long lock_limit, new_npgs, old_npgs;
238 
239 	if (capable(CAP_IPC_LOCK))
240 		return 0;
241 
242 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
243 	umem->user = get_uid(current_user());
244 
245 	do {
246 		old_npgs = atomic_long_read(&umem->user->locked_vm);
247 		new_npgs = old_npgs + umem->npgs;
248 		if (new_npgs > lock_limit) {
249 			free_uid(umem->user);
250 			umem->user = NULL;
251 			return -ENOBUFS;
252 		}
253 	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
254 				     new_npgs) != old_npgs);
255 	return 0;
256 }
257 
258 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
259 {
260 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
261 	unsigned int chunks, chunks_per_page;
262 	u64 addr = mr->addr, size = mr->len;
263 	int size_chk, err, i;
264 
265 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
266 		/* Strictly speaking we could support this, if:
267 		 * - huge pages, or*
268 		 * - using an IOMMU, or
269 		 * - making sure the memory area is consecutive
270 		 * but for now, we simply say "computer says no".
271 		 */
272 		return -EINVAL;
273 	}
274 
275 	if (!is_power_of_2(chunk_size))
276 		return -EINVAL;
277 
278 	if (!PAGE_ALIGNED(addr)) {
279 		/* Memory area has to be page size aligned. For
280 		 * simplicity, this might change.
281 		 */
282 		return -EINVAL;
283 	}
284 
285 	if ((addr + size) < addr)
286 		return -EINVAL;
287 
288 	chunks = (unsigned int)div_u64(size, chunk_size);
289 	if (chunks == 0)
290 		return -EINVAL;
291 
292 	chunks_per_page = PAGE_SIZE / chunk_size;
293 	if (chunks < chunks_per_page || chunks % chunks_per_page)
294 		return -EINVAL;
295 
296 	headroom = ALIGN(headroom, 64);
297 
298 	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
299 	if (size_chk < 0)
300 		return -EINVAL;
301 
302 	umem->pid = get_task_pid(current, PIDTYPE_PID);
303 	umem->address = (unsigned long)addr;
304 	umem->props.chunk_mask = ~((u64)chunk_size - 1);
305 	umem->props.size = size;
306 	umem->headroom = headroom;
307 	umem->chunk_size_nohr = chunk_size - headroom;
308 	umem->npgs = size / PAGE_SIZE;
309 	umem->pgs = NULL;
310 	umem->user = NULL;
311 	INIT_LIST_HEAD(&umem->xsk_list);
312 	spin_lock_init(&umem->xsk_list_lock);
313 
314 	refcount_set(&umem->users, 1);
315 
316 	err = xdp_umem_account_pages(umem);
317 	if (err)
318 		goto out;
319 
320 	err = xdp_umem_pin_pages(umem);
321 	if (err)
322 		goto out_account;
323 
324 	umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
325 	if (!umem->pages) {
326 		err = -ENOMEM;
327 		goto out_account;
328 	}
329 
330 	for (i = 0; i < umem->npgs; i++)
331 		umem->pages[i].addr = page_address(umem->pgs[i]);
332 
333 	return 0;
334 
335 out_account:
336 	xdp_umem_unaccount_pages(umem);
337 out:
338 	put_pid(umem->pid);
339 	return err;
340 }
341 
342 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
343 {
344 	struct xdp_umem *umem;
345 	int err;
346 
347 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
348 	if (!umem)
349 		return ERR_PTR(-ENOMEM);
350 
351 	err = xdp_umem_reg(umem, mr);
352 	if (err) {
353 		kfree(umem);
354 		return ERR_PTR(err);
355 	}
356 
357 	return umem;
358 }
359 
360 bool xdp_umem_validate_queues(struct xdp_umem *umem)
361 {
362 	return umem->fq && umem->cq;
363 }
364