xref: /linux/net/xdp/xdp_umem.c (revision 6a61b70b43c9c4cbc7314bf6c8b5ba8b0d6e1e7b)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
13 #include <linux/mm.h>
14 
15 #include "xdp_umem.h"
16 #include "xsk_queue.h"
17 
18 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
19 
20 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
21 {
22 	unsigned long flags;
23 
24 	spin_lock_irqsave(&umem->xsk_list_lock, flags);
25 	list_add_rcu(&xs->list, &umem->xsk_list);
26 	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
27 }
28 
29 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
30 {
31 	unsigned long flags;
32 
33 	if (xs->dev) {
34 		spin_lock_irqsave(&umem->xsk_list_lock, flags);
35 		list_del_rcu(&xs->list);
36 		spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
37 
38 		if (umem->zc)
39 			synchronize_net();
40 	}
41 }
42 
43 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
44 			u32 queue_id, u16 flags)
45 {
46 	bool force_zc, force_copy;
47 	struct netdev_bpf bpf;
48 	int err;
49 
50 	force_zc = flags & XDP_ZEROCOPY;
51 	force_copy = flags & XDP_COPY;
52 
53 	if (force_zc && force_copy)
54 		return -EINVAL;
55 
56 	if (force_copy)
57 		return 0;
58 
59 	dev_hold(dev);
60 
61 	if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
62 		bpf.command = XDP_QUERY_XSK_UMEM;
63 
64 		rtnl_lock();
65 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
66 		rtnl_unlock();
67 
68 		if (err) {
69 			dev_put(dev);
70 			return force_zc ? -ENOTSUPP : 0;
71 		}
72 
73 		bpf.command = XDP_SETUP_XSK_UMEM;
74 		bpf.xsk.umem = umem;
75 		bpf.xsk.queue_id = queue_id;
76 
77 		rtnl_lock();
78 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
79 		rtnl_unlock();
80 
81 		if (err) {
82 			dev_put(dev);
83 			return force_zc ? err : 0; /* fail or fallback */
84 		}
85 
86 		umem->dev = dev;
87 		umem->queue_id = queue_id;
88 		umem->zc = true;
89 		return 0;
90 	}
91 
92 	dev_put(dev);
93 	return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
94 }
95 
96 static void xdp_umem_clear_dev(struct xdp_umem *umem)
97 {
98 	struct netdev_bpf bpf;
99 	int err;
100 
101 	if (umem->dev) {
102 		bpf.command = XDP_SETUP_XSK_UMEM;
103 		bpf.xsk.umem = NULL;
104 		bpf.xsk.queue_id = umem->queue_id;
105 
106 		rtnl_lock();
107 		err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
108 		rtnl_unlock();
109 
110 		if (err)
111 			WARN(1, "failed to disable umem!\n");
112 
113 		dev_put(umem->dev);
114 		umem->dev = NULL;
115 	}
116 }
117 
118 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
119 {
120 	unsigned int i;
121 
122 	for (i = 0; i < umem->npgs; i++) {
123 		struct page *page = umem->pgs[i];
124 
125 		set_page_dirty_lock(page);
126 		put_page(page);
127 	}
128 
129 	kfree(umem->pgs);
130 	umem->pgs = NULL;
131 }
132 
133 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
134 {
135 	atomic_long_sub(umem->npgs, &umem->user->locked_vm);
136 	free_uid(umem->user);
137 }
138 
139 static void xdp_umem_release(struct xdp_umem *umem)
140 {
141 	struct task_struct *task;
142 	struct mm_struct *mm;
143 
144 	xdp_umem_clear_dev(umem);
145 
146 	if (umem->fq) {
147 		xskq_destroy(umem->fq);
148 		umem->fq = NULL;
149 	}
150 
151 	if (umem->cq) {
152 		xskq_destroy(umem->cq);
153 		umem->cq = NULL;
154 	}
155 
156 	xdp_umem_unpin_pages(umem);
157 
158 	task = get_pid_task(umem->pid, PIDTYPE_PID);
159 	put_pid(umem->pid);
160 	if (!task)
161 		goto out;
162 	mm = get_task_mm(task);
163 	put_task_struct(task);
164 	if (!mm)
165 		goto out;
166 
167 	mmput(mm);
168 	kfree(umem->pages);
169 	umem->pages = NULL;
170 
171 	xdp_umem_unaccount_pages(umem);
172 out:
173 	kfree(umem);
174 }
175 
176 static void xdp_umem_release_deferred(struct work_struct *work)
177 {
178 	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
179 
180 	xdp_umem_release(umem);
181 }
182 
183 void xdp_get_umem(struct xdp_umem *umem)
184 {
185 	refcount_inc(&umem->users);
186 }
187 
188 void xdp_put_umem(struct xdp_umem *umem)
189 {
190 	if (!umem)
191 		return;
192 
193 	if (refcount_dec_and_test(&umem->users)) {
194 		INIT_WORK(&umem->work, xdp_umem_release_deferred);
195 		schedule_work(&umem->work);
196 	}
197 }
198 
199 static int xdp_umem_pin_pages(struct xdp_umem *umem)
200 {
201 	unsigned int gup_flags = FOLL_WRITE;
202 	long npgs;
203 	int err;
204 
205 	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
206 	if (!umem->pgs)
207 		return -ENOMEM;
208 
209 	down_write(&current->mm->mmap_sem);
210 	npgs = get_user_pages(umem->address, umem->npgs,
211 			      gup_flags, &umem->pgs[0], NULL);
212 	up_write(&current->mm->mmap_sem);
213 
214 	if (npgs != umem->npgs) {
215 		if (npgs >= 0) {
216 			umem->npgs = npgs;
217 			err = -ENOMEM;
218 			goto out_pin;
219 		}
220 		err = npgs;
221 		goto out_pgs;
222 	}
223 	return 0;
224 
225 out_pin:
226 	xdp_umem_unpin_pages(umem);
227 out_pgs:
228 	kfree(umem->pgs);
229 	umem->pgs = NULL;
230 	return err;
231 }
232 
233 static int xdp_umem_account_pages(struct xdp_umem *umem)
234 {
235 	unsigned long lock_limit, new_npgs, old_npgs;
236 
237 	if (capable(CAP_IPC_LOCK))
238 		return 0;
239 
240 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
241 	umem->user = get_uid(current_user());
242 
243 	do {
244 		old_npgs = atomic_long_read(&umem->user->locked_vm);
245 		new_npgs = old_npgs + umem->npgs;
246 		if (new_npgs > lock_limit) {
247 			free_uid(umem->user);
248 			umem->user = NULL;
249 			return -ENOBUFS;
250 		}
251 	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
252 				     new_npgs) != old_npgs);
253 	return 0;
254 }
255 
256 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
257 {
258 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
259 	unsigned int chunks, chunks_per_page;
260 	u64 addr = mr->addr, size = mr->len;
261 	int size_chk, err, i;
262 
263 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
264 		/* Strictly speaking we could support this, if:
265 		 * - huge pages, or*
266 		 * - using an IOMMU, or
267 		 * - making sure the memory area is consecutive
268 		 * but for now, we simply say "computer says no".
269 		 */
270 		return -EINVAL;
271 	}
272 
273 	if (!is_power_of_2(chunk_size))
274 		return -EINVAL;
275 
276 	if (!PAGE_ALIGNED(addr)) {
277 		/* Memory area has to be page size aligned. For
278 		 * simplicity, this might change.
279 		 */
280 		return -EINVAL;
281 	}
282 
283 	if ((addr + size) < addr)
284 		return -EINVAL;
285 
286 	chunks = (unsigned int)div_u64(size, chunk_size);
287 	if (chunks == 0)
288 		return -EINVAL;
289 
290 	chunks_per_page = PAGE_SIZE / chunk_size;
291 	if (chunks < chunks_per_page || chunks % chunks_per_page)
292 		return -EINVAL;
293 
294 	headroom = ALIGN(headroom, 64);
295 
296 	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
297 	if (size_chk < 0)
298 		return -EINVAL;
299 
300 	umem->pid = get_task_pid(current, PIDTYPE_PID);
301 	umem->address = (unsigned long)addr;
302 	umem->props.chunk_mask = ~((u64)chunk_size - 1);
303 	umem->props.size = size;
304 	umem->headroom = headroom;
305 	umem->chunk_size_nohr = chunk_size - headroom;
306 	umem->npgs = size / PAGE_SIZE;
307 	umem->pgs = NULL;
308 	umem->user = NULL;
309 	INIT_LIST_HEAD(&umem->xsk_list);
310 	spin_lock_init(&umem->xsk_list_lock);
311 
312 	refcount_set(&umem->users, 1);
313 
314 	err = xdp_umem_account_pages(umem);
315 	if (err)
316 		goto out;
317 
318 	err = xdp_umem_pin_pages(umem);
319 	if (err)
320 		goto out_account;
321 
322 	umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
323 	if (!umem->pages) {
324 		err = -ENOMEM;
325 		goto out_account;
326 	}
327 
328 	for (i = 0; i < umem->npgs; i++)
329 		umem->pages[i].addr = page_address(umem->pgs[i]);
330 
331 	return 0;
332 
333 out_account:
334 	xdp_umem_unaccount_pages(umem);
335 out:
336 	put_pid(umem->pid);
337 	return err;
338 }
339 
340 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
341 {
342 	struct xdp_umem *umem;
343 	int err;
344 
345 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
346 	if (!umem)
347 		return ERR_PTR(-ENOMEM);
348 
349 	err = xdp_umem_reg(umem, mr);
350 	if (err) {
351 		kfree(umem);
352 		return ERR_PTR(err);
353 	}
354 
355 	return umem;
356 }
357 
358 bool xdp_umem_validate_queues(struct xdp_umem *umem)
359 {
360 	return umem->fq && umem->cq;
361 }
362