xref: /linux/net/xdp/xdp_umem.c (revision f6f3bac08ff9855d803081a353a1fafaa8845739)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
13 #include <linux/mm.h>
14 #include <linux/netdevice.h>
15 #include <linux/rtnetlink.h>
16 
17 #include "xdp_umem.h"
18 #include "xsk_queue.h"
19 
20 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
21 
22 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23 {
24 	unsigned long flags;
25 
26 	spin_lock_irqsave(&umem->xsk_list_lock, flags);
27 	list_add_rcu(&xs->list, &umem->xsk_list);
28 	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
29 }
30 
31 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
32 {
33 	unsigned long flags;
34 
35 	if (xs->dev) {
36 		spin_lock_irqsave(&umem->xsk_list_lock, flags);
37 		list_del_rcu(&xs->list);
38 		spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
39 
40 		if (umem->zc)
41 			synchronize_net();
42 	}
43 }
44 
45 int xdp_umem_query(struct net_device *dev, u16 queue_id)
46 {
47 	struct netdev_bpf bpf;
48 
49 	ASSERT_RTNL();
50 
51 	memset(&bpf, 0, sizeof(bpf));
52 	bpf.command = XDP_QUERY_XSK_UMEM;
53 	bpf.xsk.queue_id = queue_id;
54 
55 	if (!dev->netdev_ops->ndo_bpf)
56 		return 0;
57 	return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
58 }
59 
60 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
61 			u32 queue_id, u16 flags)
62 {
63 	bool force_zc, force_copy;
64 	struct netdev_bpf bpf;
65 	int err;
66 
67 	force_zc = flags & XDP_ZEROCOPY;
68 	force_copy = flags & XDP_COPY;
69 
70 	if (force_zc && force_copy)
71 		return -EINVAL;
72 
73 	if (force_copy)
74 		return 0;
75 
76 	if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
77 		return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
78 
79 	rtnl_lock();
80 	err = xdp_umem_query(dev, queue_id);
81 	if (err) {
82 		err = err < 0 ? -EOPNOTSUPP : -EBUSY;
83 		goto err_rtnl_unlock;
84 	}
85 
86 	bpf.command = XDP_SETUP_XSK_UMEM;
87 	bpf.xsk.umem = umem;
88 	bpf.xsk.queue_id = queue_id;
89 
90 	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
91 	if (err)
92 		goto err_rtnl_unlock;
93 	rtnl_unlock();
94 
95 	dev_hold(dev);
96 	umem->dev = dev;
97 	umem->queue_id = queue_id;
98 	umem->zc = true;
99 	return 0;
100 
101 err_rtnl_unlock:
102 	rtnl_unlock();
103 	return force_zc ? err : 0; /* fail or fallback */
104 }
105 
106 static void xdp_umem_clear_dev(struct xdp_umem *umem)
107 {
108 	struct netdev_bpf bpf;
109 	int err;
110 
111 	if (umem->dev) {
112 		bpf.command = XDP_SETUP_XSK_UMEM;
113 		bpf.xsk.umem = NULL;
114 		bpf.xsk.queue_id = umem->queue_id;
115 
116 		rtnl_lock();
117 		err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
118 		rtnl_unlock();
119 
120 		if (err)
121 			WARN(1, "failed to disable umem!\n");
122 
123 		dev_put(umem->dev);
124 		umem->dev = NULL;
125 	}
126 }
127 
128 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
129 {
130 	unsigned int i;
131 
132 	for (i = 0; i < umem->npgs; i++) {
133 		struct page *page = umem->pgs[i];
134 
135 		set_page_dirty_lock(page);
136 		put_page(page);
137 	}
138 
139 	kfree(umem->pgs);
140 	umem->pgs = NULL;
141 }
142 
143 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
144 {
145 	if (umem->user) {
146 		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
147 		free_uid(umem->user);
148 	}
149 }
150 
151 static void xdp_umem_release(struct xdp_umem *umem)
152 {
153 	struct task_struct *task;
154 	struct mm_struct *mm;
155 
156 	xdp_umem_clear_dev(umem);
157 
158 	if (umem->fq) {
159 		xskq_destroy(umem->fq);
160 		umem->fq = NULL;
161 	}
162 
163 	if (umem->cq) {
164 		xskq_destroy(umem->cq);
165 		umem->cq = NULL;
166 	}
167 
168 	xdp_umem_unpin_pages(umem);
169 
170 	task = get_pid_task(umem->pid, PIDTYPE_PID);
171 	put_pid(umem->pid);
172 	if (!task)
173 		goto out;
174 	mm = get_task_mm(task);
175 	put_task_struct(task);
176 	if (!mm)
177 		goto out;
178 
179 	mmput(mm);
180 	kfree(umem->pages);
181 	umem->pages = NULL;
182 
183 	xdp_umem_unaccount_pages(umem);
184 out:
185 	kfree(umem);
186 }
187 
188 static void xdp_umem_release_deferred(struct work_struct *work)
189 {
190 	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
191 
192 	xdp_umem_release(umem);
193 }
194 
195 void xdp_get_umem(struct xdp_umem *umem)
196 {
197 	refcount_inc(&umem->users);
198 }
199 
200 void xdp_put_umem(struct xdp_umem *umem)
201 {
202 	if (!umem)
203 		return;
204 
205 	if (refcount_dec_and_test(&umem->users)) {
206 		INIT_WORK(&umem->work, xdp_umem_release_deferred);
207 		schedule_work(&umem->work);
208 	}
209 }
210 
211 static int xdp_umem_pin_pages(struct xdp_umem *umem)
212 {
213 	unsigned int gup_flags = FOLL_WRITE;
214 	long npgs;
215 	int err;
216 
217 	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
218 			    GFP_KERNEL | __GFP_NOWARN);
219 	if (!umem->pgs)
220 		return -ENOMEM;
221 
222 	down_write(&current->mm->mmap_sem);
223 	npgs = get_user_pages(umem->address, umem->npgs,
224 			      gup_flags, &umem->pgs[0], NULL);
225 	up_write(&current->mm->mmap_sem);
226 
227 	if (npgs != umem->npgs) {
228 		if (npgs >= 0) {
229 			umem->npgs = npgs;
230 			err = -ENOMEM;
231 			goto out_pin;
232 		}
233 		err = npgs;
234 		goto out_pgs;
235 	}
236 	return 0;
237 
238 out_pin:
239 	xdp_umem_unpin_pages(umem);
240 out_pgs:
241 	kfree(umem->pgs);
242 	umem->pgs = NULL;
243 	return err;
244 }
245 
246 static int xdp_umem_account_pages(struct xdp_umem *umem)
247 {
248 	unsigned long lock_limit, new_npgs, old_npgs;
249 
250 	if (capable(CAP_IPC_LOCK))
251 		return 0;
252 
253 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
254 	umem->user = get_uid(current_user());
255 
256 	do {
257 		old_npgs = atomic_long_read(&umem->user->locked_vm);
258 		new_npgs = old_npgs + umem->npgs;
259 		if (new_npgs > lock_limit) {
260 			free_uid(umem->user);
261 			umem->user = NULL;
262 			return -ENOBUFS;
263 		}
264 	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
265 				     new_npgs) != old_npgs);
266 	return 0;
267 }
268 
269 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
270 {
271 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
272 	unsigned int chunks, chunks_per_page;
273 	u64 addr = mr->addr, size = mr->len;
274 	int size_chk, err, i;
275 
276 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
277 		/* Strictly speaking we could support this, if:
278 		 * - huge pages, or*
279 		 * - using an IOMMU, or
280 		 * - making sure the memory area is consecutive
281 		 * but for now, we simply say "computer says no".
282 		 */
283 		return -EINVAL;
284 	}
285 
286 	if (!is_power_of_2(chunk_size))
287 		return -EINVAL;
288 
289 	if (!PAGE_ALIGNED(addr)) {
290 		/* Memory area has to be page size aligned. For
291 		 * simplicity, this might change.
292 		 */
293 		return -EINVAL;
294 	}
295 
296 	if ((addr + size) < addr)
297 		return -EINVAL;
298 
299 	chunks = (unsigned int)div_u64(size, chunk_size);
300 	if (chunks == 0)
301 		return -EINVAL;
302 
303 	chunks_per_page = PAGE_SIZE / chunk_size;
304 	if (chunks < chunks_per_page || chunks % chunks_per_page)
305 		return -EINVAL;
306 
307 	headroom = ALIGN(headroom, 64);
308 
309 	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
310 	if (size_chk < 0)
311 		return -EINVAL;
312 
313 	umem->pid = get_task_pid(current, PIDTYPE_PID);
314 	umem->address = (unsigned long)addr;
315 	umem->chunk_mask = ~((u64)chunk_size - 1);
316 	umem->size = size;
317 	umem->headroom = headroom;
318 	umem->chunk_size_nohr = chunk_size - headroom;
319 	umem->npgs = size / PAGE_SIZE;
320 	umem->pgs = NULL;
321 	umem->user = NULL;
322 	INIT_LIST_HEAD(&umem->xsk_list);
323 	spin_lock_init(&umem->xsk_list_lock);
324 
325 	refcount_set(&umem->users, 1);
326 
327 	err = xdp_umem_account_pages(umem);
328 	if (err)
329 		goto out;
330 
331 	err = xdp_umem_pin_pages(umem);
332 	if (err)
333 		goto out_account;
334 
335 	umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
336 	if (!umem->pages) {
337 		err = -ENOMEM;
338 		goto out_account;
339 	}
340 
341 	for (i = 0; i < umem->npgs; i++)
342 		umem->pages[i].addr = page_address(umem->pgs[i]);
343 
344 	return 0;
345 
346 out_account:
347 	xdp_umem_unaccount_pages(umem);
348 out:
349 	put_pid(umem->pid);
350 	return err;
351 }
352 
353 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
354 {
355 	struct xdp_umem *umem;
356 	int err;
357 
358 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
359 	if (!umem)
360 		return ERR_PTR(-ENOMEM);
361 
362 	err = xdp_umem_reg(umem, mr);
363 	if (err) {
364 		kfree(umem);
365 		return ERR_PTR(err);
366 	}
367 
368 	return umem;
369 }
370 
371 bool xdp_umem_validate_queues(struct xdp_umem *umem)
372 {
373 	return umem->fq && umem->cq;
374 }
375