xref: /linux/drivers/infiniband/core/ib_core_uverbs.c (revision ab868c10971c5d2cd27b3709d11225941eabe78e)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5  * Copyright 2019 Marvell. All rights reserved.
6  */
7 #include <linux/xarray.h>
8 #include <linux/dma-buf.h>
9 #include <linux/dma-resv.h>
10 #include "uverbs.h"
11 #include "core_priv.h"
12 #include "rdma_core.h"
13 
14 MODULE_IMPORT_NS("DMA_BUF");
15 
16 /**
17  * rdma_umap_priv_init() - Initialize the private data of a vma
18  *
19  * @priv: The already allocated private data
20  * @vma: The vm area struct that needs private data
21  * @entry: entry into the mmap_xa that needs to be linked with
22  *       this vma
23  *
24  * Each time we map IO memory into user space this keeps track of the
25  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
26  * to point to the zero page and allow the hot unplug to proceed.
27  *
28  * This is necessary for cases like PCI physical hot unplug as the actual BAR
29  * memory may vanish after this and access to it from userspace could MCE.
30  *
31  * RDMA drivers supporting disassociation must have their user space designed
32  * to cope in some way with their IO pages going to the zero page.
33  *
34  */
rdma_umap_priv_init(struct rdma_umap_priv * priv,struct vm_area_struct * vma,struct rdma_user_mmap_entry * entry)35 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
36 			 struct vm_area_struct *vma,
37 			 struct rdma_user_mmap_entry *entry)
38 {
39 	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
40 
41 	priv->vma = vma;
42 	if (entry) {
43 		kref_get(&entry->ref);
44 		priv->entry = entry;
45 	}
46 	vma->vm_private_data = priv;
47 	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
48 
49 	mutex_lock(&ufile->umap_lock);
50 	list_add(&priv->list, &ufile->umaps);
51 	mutex_unlock(&ufile->umap_lock);
52 }
53 EXPORT_SYMBOL(rdma_umap_priv_init);
54 
55 /**
56  * rdma_user_mmap_io() - Map IO memory into a process
57  *
58  * @ucontext: associated user context
59  * @vma: the vma related to the current mmap call
60  * @pfn: pfn to map
61  * @size: size to map
62  * @prot: pgprot to use in remap call
63  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
64  *         if mmap_entry is not used by the driver
65  *
66  * This is to be called by drivers as part of their mmap() functions if they
67  * wish to send something like PCI-E BAR memory to userspace.
68  *
69  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
70  * success.
71  */
rdma_user_mmap_io(struct ib_ucontext * ucontext,struct vm_area_struct * vma,unsigned long pfn,unsigned long size,pgprot_t prot,struct rdma_user_mmap_entry * entry)72 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
73 		      unsigned long pfn, unsigned long size, pgprot_t prot,
74 		      struct rdma_user_mmap_entry *entry)
75 {
76 	struct ib_uverbs_file *ufile = ucontext->ufile;
77 	struct rdma_umap_priv *priv;
78 
79 	if (!(vma->vm_flags & VM_SHARED))
80 		return -EINVAL;
81 
82 	if (vma->vm_end - vma->vm_start != size)
83 		return -EINVAL;
84 
85 	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
86 	if (WARN_ON(!vma->vm_file ||
87 		    vma->vm_file->private_data != ufile))
88 		return -EINVAL;
89 	lockdep_assert_held(&ufile->device->disassociate_srcu);
90 
91 	priv = kzalloc_obj(*priv);
92 	if (!priv)
93 		return -ENOMEM;
94 
95 	vma->vm_page_prot = prot;
96 	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
97 		kfree(priv);
98 		return -EAGAIN;
99 	}
100 
101 	rdma_umap_priv_init(priv, vma, entry);
102 	return 0;
103 }
104 EXPORT_SYMBOL(rdma_user_mmap_io);
105 
106 /**
107  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
108  *
109  * @ucontext: associated user context
110  * @pgoff: The mmap offset >> PAGE_SHIFT
111  *
112  * This function is called when a user tries to mmap with an offset (returned
113  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
114  * rdma_user_mmap_entry was created by the function
115  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
116  * entry so that it won't be deleted from the xarray in the meantime.
117  *
118  * Return an reference to an entry if exists or NULL if there is no
119  * match. rdma_user_mmap_entry_put() must be called to put the reference.
120  */
121 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext * ucontext,unsigned long pgoff)122 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
123 			       unsigned long pgoff)
124 {
125 	struct rdma_user_mmap_entry *entry;
126 
127 	if (pgoff > U32_MAX)
128 		return NULL;
129 
130 	xa_lock(&ucontext->mmap_xa);
131 
132 	entry = xa_load(&ucontext->mmap_xa, pgoff);
133 
134 	/*
135 	 * If refcount is zero, entry is already being deleted, driver_removed
136 	 * indicates that the no further mmaps are possible and we waiting for
137 	 * the active VMAs to be closed.
138 	 */
139 	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
140 	    !kref_get_unless_zero(&entry->ref))
141 		goto err;
142 
143 	xa_unlock(&ucontext->mmap_xa);
144 
145 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
146 		  pgoff, entry->npages);
147 
148 	return entry;
149 
150 err:
151 	xa_unlock(&ucontext->mmap_xa);
152 	return NULL;
153 }
154 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
155 
156 /**
157  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
158  *
159  * @ucontext: associated user context
160  * @vma: the vma being mmap'd into
161  *
162  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
163  * checks that the VMA is correct.
164  */
165 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get(struct ib_ucontext * ucontext,struct vm_area_struct * vma)166 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
167 			 struct vm_area_struct *vma)
168 {
169 	struct rdma_user_mmap_entry *entry;
170 
171 	if (!(vma->vm_flags & VM_SHARED))
172 		return NULL;
173 	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
174 	if (!entry)
175 		return NULL;
176 	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
177 		rdma_user_mmap_entry_put(entry);
178 		return NULL;
179 	}
180 	return entry;
181 }
182 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
183 
rdma_user_mmap_entry_free(struct kref * kref)184 static void rdma_user_mmap_entry_free(struct kref *kref)
185 {
186 	struct rdma_user_mmap_entry *entry =
187 		container_of(kref, struct rdma_user_mmap_entry, ref);
188 	struct ib_ucontext *ucontext = entry->ucontext;
189 	unsigned long i;
190 
191 	/*
192 	 * Erase all entries occupied by this single entry, this is deferred
193 	 * until all VMA are closed so that the mmap offsets remain unique.
194 	 */
195 	xa_lock(&ucontext->mmap_xa);
196 	for (i = 0; i < entry->npages; i++)
197 		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
198 	xa_unlock(&ucontext->mmap_xa);
199 
200 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
201 		  entry->start_pgoff, entry->npages);
202 
203 	if (ucontext->device->ops.mmap_free)
204 		ucontext->device->ops.mmap_free(entry);
205 }
206 
207 /**
208  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
209  *
210  * @entry: an entry in the mmap_xa
211  *
212  * This function is called when the mapping is closed if it was
213  * an io mapping or when the driver is done with the entry for
214  * some other reason.
215  * Should be called after rdma_user_mmap_entry_get was called
216  * and entry is no longer needed. This function will erase the
217  * entry and free it if its refcnt reaches zero.
218  */
rdma_user_mmap_entry_put(struct rdma_user_mmap_entry * entry)219 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
220 {
221 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
222 }
223 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
224 
225 /**
226  * rdma_user_mmap_entry_remove() - Drop reference to entry and
227  *				   mark it as unmmapable
228  *
229  * @entry: the entry to insert into the mmap_xa
230  *
231  * Drivers can call this to prevent userspace from creating more mappings for
232  * entry, however existing mmaps continue to exist and ops->mmap_free() will
233  * not be called until all user mmaps are destroyed.
234  */
rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry * entry)235 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
236 {
237 	struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
238 
239 	if (!entry)
240 		return;
241 
242 	mutex_lock(&entry->dmabufs_lock);
243 	xa_lock(&entry->ucontext->mmap_xa);
244 	entry->driver_removed = true;
245 	xa_unlock(&entry->ucontext->mmap_xa);
246 	list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
247 		dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
248 		list_del(&uverbs_dmabuf->dmabufs_elm);
249 		uverbs_dmabuf->revoked = true;
250 		dma_buf_invalidate_mappings(uverbs_dmabuf->dmabuf);
251 		dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
252 				      DMA_RESV_USAGE_BOOKKEEP, false,
253 				      MAX_SCHEDULE_TIMEOUT);
254 		dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
255 		kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
256 		wait_for_completion(&uverbs_dmabuf->comp);
257 	}
258 	mutex_unlock(&entry->dmabufs_lock);
259 
260 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
261 }
262 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
263 
264 /**
265  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
266  *					 in a given range.
267  *
268  * @ucontext: associated user context.
269  * @entry: the entry to insert into the mmap_xa
270  * @length: length of the address that will be mmapped
271  * @min_pgoff: minimum pgoff to be returned
272  * @max_pgoff: maximum pgoff to be returned
273  *
274  * This function should be called by drivers that use the rdma_user_mmap
275  * interface for implementing their mmap syscall A database of mmap offsets is
276  * handled in the core and helper functions are provided to insert entries
277  * into the database and extract entries when the user calls mmap with the
278  * given offset. The function allocates a unique page offset in a given range
279  * that should be provided to user, the user will use the offset to retrieve
280  * information such as address to be mapped and how.
281  *
282  * Return: 0 on success and -ENOMEM on failure
283  */
rdma_user_mmap_entry_insert_range(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length,u32 min_pgoff,u32 max_pgoff)284 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
285 				      struct rdma_user_mmap_entry *entry,
286 				      size_t length, u32 min_pgoff,
287 				      u32 max_pgoff)
288 {
289 	struct ib_uverbs_file *ufile = ucontext->ufile;
290 	XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
291 	u32 xa_first, xa_last, npages;
292 	int err;
293 	u32 i;
294 
295 	if (!entry)
296 		return -EINVAL;
297 
298 	kref_init(&entry->ref);
299 	INIT_LIST_HEAD(&entry->dmabufs);
300 	mutex_init(&entry->dmabufs_lock);
301 
302 	entry->ucontext = ucontext;
303 
304 	/*
305 	 * We want the whole allocation to be done without interruption from a
306 	 * different thread. The allocation requires finding a free range and
307 	 * storing. During the xa_insert the lock could be released, possibly
308 	 * allowing another thread to choose the same range.
309 	 */
310 	mutex_lock(&ufile->umap_lock);
311 
312 	xa_lock(&ucontext->mmap_xa);
313 
314 	/* We want to find an empty range */
315 	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
316 	entry->npages = npages;
317 	while (true) {
318 		/* First find an empty index */
319 		xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
320 		if (xas.xa_node == XAS_RESTART)
321 			goto err_unlock;
322 
323 		xa_first = xas.xa_index;
324 
325 		/* Is there enough room to have the range? */
326 		if (check_add_overflow(xa_first, npages, &xa_last))
327 			goto err_unlock;
328 
329 		/*
330 		 * Now look for the next present entry. If an entry doesn't
331 		 * exist, we found an empty range and can proceed.
332 		 */
333 		xas_next_entry(&xas, xa_last - 1);
334 		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
335 			break;
336 	}
337 
338 	for (i = xa_first; i < xa_last; i++) {
339 		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
340 		if (err)
341 			goto err_undo;
342 	}
343 
344 	/*
345 	 * Internally the kernel uses a page offset, in libc this is a byte
346 	 * offset. Drivers should not return pgoff to userspace.
347 	 */
348 	entry->start_pgoff = xa_first;
349 	xa_unlock(&ucontext->mmap_xa);
350 	mutex_unlock(&ufile->umap_lock);
351 
352 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
353 		  entry->start_pgoff, npages);
354 
355 	return 0;
356 
357 err_undo:
358 	for (; i > xa_first; i--)
359 		__xa_erase(&ucontext->mmap_xa, i - 1);
360 
361 err_unlock:
362 	xa_unlock(&ucontext->mmap_xa);
363 	mutex_unlock(&ufile->umap_lock);
364 	return -ENOMEM;
365 }
366 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
367 
368 /**
369  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
370  *
371  * @ucontext: associated user context.
372  * @entry: the entry to insert into the mmap_xa
373  * @length: length of the address that will be mmapped
374  *
375  * This function should be called by drivers that use the rdma_user_mmap
376  * interface for handling user mmapped addresses. The database is handled in
377  * the core and helper functions are provided to insert entries into the
378  * database and extract entries when the user calls mmap with the given offset.
379  * The function allocates a unique page offset that should be provided to user,
380  * the user will use the offset to retrieve information such as address to
381  * be mapped and how.
382  *
383  * Return: 0 on success and -ENOMEM on failure
384  */
rdma_user_mmap_entry_insert(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length)385 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
386 				struct rdma_user_mmap_entry *entry,
387 				size_t length)
388 {
389 	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
390 						 U32_MAX);
391 }
392 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
393 
394 /**
395  * rdma_udata_to_dev - Get a ib_device from a udata
396  * @udata: The system calls ib_udata struct
397  *
398  * The struct ib_device that is handling the uverbs call. Must not be called if
399  * udata is NULL. The result can be NULL.
400  */
rdma_udata_to_dev(struct ib_udata * udata)401 struct ib_device *rdma_udata_to_dev(struct ib_udata *udata)
402 {
403 	struct uverbs_attr_bundle *bundle =
404 		rdma_udata_to_uverbs_attr_bundle(udata);
405 
406 	lockdep_assert_held(&bundle->ufile->device->disassociate_srcu);
407 
408 	if (bundle->context)
409 		return bundle->context->device;
410 
411 	/*
412 	 * If the context hasn't been created yet use the ufile's dev, but it
413 	 * might be NULL if we are racing with disassociate.
414 	 */
415 	return srcu_dereference(bundle->ufile->device->ib_dev,
416 				&bundle->ufile->device->disassociate_srcu);
417 }
418 EXPORT_SYMBOL(rdma_udata_to_dev);
419 
420 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
uverbs_get_handler_fn(struct ib_udata * udata)421 uverbs_api_ioctl_handler_fn uverbs_get_handler_fn(struct ib_udata *udata)
422 {
423 	struct uverbs_attr_bundle *bundle =
424 		rdma_udata_to_uverbs_attr_bundle(udata);
425 
426 	lockdep_assert_held(&bundle->ufile->device->disassociate_srcu);
427 
428 	return srcu_dereference(bundle->method_elm->handler,
429 				&bundle->ufile->device->disassociate_srcu);
430 }
431 
_ib_copy_validate_udata_in(struct ib_udata * udata,void * req,size_t kernel_size,size_t minimum_size)432 int _ib_copy_validate_udata_in(struct ib_udata *udata, void *req,
433 			       size_t kernel_size, size_t minimum_size)
434 {
435 	int err;
436 
437 	if (udata->inlen < minimum_size) {
438 		ibdev_dbg(
439 			rdma_udata_to_dev(udata),
440 			"System call driver input udata too small (%zu < %zu) for ioctl %ps called by %pSR\n",
441 			udata->inlen, minimum_size,
442 			uverbs_get_handler_fn(udata),
443 			__builtin_return_address(0));
444 		return -EINVAL;
445 	}
446 
447 	err = copy_struct_from_user(req, kernel_size, udata->inbuf,
448 				    udata->inlen);
449 	if (err) {
450 		if (err == -E2BIG) {
451 			ibdev_dbg(
452 				rdma_udata_to_dev(udata),
453 				"System call driver input udata not zero from %zu -> %zu for ioctl %ps called by %pSR\n",
454 				minimum_size, udata->inlen,
455 				uverbs_get_handler_fn(udata),
456 				__builtin_return_address(0));
457 			return -EOPNOTSUPP;
458 		}
459 		ibdev_dbg(
460 			rdma_udata_to_dev(udata),
461 			"System call driver input udata EFAULT for ioctl %ps called by %pSR\n",
462 			uverbs_get_handler_fn(udata),
463 			__builtin_return_address(0));
464 		return err;
465 	}
466 	return 0;
467 }
468 EXPORT_SYMBOL(_ib_copy_validate_udata_in);
469 
_ib_copy_validate_udata_cm_fail(struct ib_udata * udata,u64 req_cm,u64 valid_cm)470 int _ib_copy_validate_udata_cm_fail(struct ib_udata *udata, u64 req_cm,
471 				    u64 valid_cm)
472 {
473 	ibdev_dbg(
474 		rdma_udata_to_dev(udata),
475 		"System call driver input udata has unsupported comp_mask %llx & ~%llx = %llx for ioctl %ps called by %pSR\n",
476 		req_cm, valid_cm, req_cm & ~valid_cm,
477 		uverbs_get_handler_fn(udata), __builtin_return_address(0));
478 	return -EOPNOTSUPP;
479 }
480 EXPORT_SYMBOL(_ib_copy_validate_udata_cm_fail);
481 
_ib_respond_udata(struct ib_udata * udata,const void * src,size_t len)482 int _ib_respond_udata(struct ib_udata *udata, const void *src, size_t len)
483 {
484 	size_t copy_len;
485 
486 	/* 0 length copy_len is a NOP for copy_to_user() and doesn't fail. */
487 	copy_len = min(len, udata->outlen);
488 	if (copy_to_user(udata->outbuf, src, copy_len))
489 		goto err_fault;
490 	if (copy_len < udata->outlen) {
491 		if (clear_user(udata->outbuf + copy_len,
492 			       udata->outlen - copy_len))
493 			goto err_fault;
494 	}
495 	return 0;
496 err_fault:
497 	ibdev_dbg(
498 		rdma_udata_to_dev(udata),
499 		"System call driver out udata has EFAULT (%zu into %zu) for ioctl %ps called by %pSR\n",
500 		len, udata->outlen, uverbs_get_handler_fn(udata),
501 		__builtin_return_address(0));
502 	return -EFAULT;
503 }
504 EXPORT_SYMBOL(_ib_respond_udata);
505 #endif
506