xref: /linux/drivers/infiniband/core/ib_core_uverbs.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5  * Copyright 2019 Marvell. All rights reserved.
6  */
7 #include <linux/xarray.h>
8 #include <linux/dma-buf.h>
9 #include <linux/dma-resv.h>
10 #include "uverbs.h"
11 #include "core_priv.h"
12 
13 MODULE_IMPORT_NS("DMA_BUF");
14 
15 /**
16  * rdma_umap_priv_init() - Initialize the private data of a vma
17  *
18  * @priv: The already allocated private data
19  * @vma: The vm area struct that needs private data
20  * @entry: entry into the mmap_xa that needs to be linked with
21  *       this vma
22  *
23  * Each time we map IO memory into user space this keeps track of the
24  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
25  * to point to the zero page and allow the hot unplug to proceed.
26  *
27  * This is necessary for cases like PCI physical hot unplug as the actual BAR
28  * memory may vanish after this and access to it from userspace could MCE.
29  *
30  * RDMA drivers supporting disassociation must have their user space designed
31  * to cope in some way with their IO pages going to the zero page.
32  *
33  */
rdma_umap_priv_init(struct rdma_umap_priv * priv,struct vm_area_struct * vma,struct rdma_user_mmap_entry * entry)34 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
35 			 struct vm_area_struct *vma,
36 			 struct rdma_user_mmap_entry *entry)
37 {
38 	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
39 
40 	priv->vma = vma;
41 	if (entry) {
42 		kref_get(&entry->ref);
43 		priv->entry = entry;
44 	}
45 	vma->vm_private_data = priv;
46 	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
47 
48 	mutex_lock(&ufile->umap_lock);
49 	list_add(&priv->list, &ufile->umaps);
50 	mutex_unlock(&ufile->umap_lock);
51 }
52 EXPORT_SYMBOL(rdma_umap_priv_init);
53 
54 /**
55  * rdma_user_mmap_io() - Map IO memory into a process
56  *
57  * @ucontext: associated user context
58  * @vma: the vma related to the current mmap call
59  * @pfn: pfn to map
60  * @size: size to map
61  * @prot: pgprot to use in remap call
62  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
63  *         if mmap_entry is not used by the driver
64  *
65  * This is to be called by drivers as part of their mmap() functions if they
66  * wish to send something like PCI-E BAR memory to userspace.
67  *
68  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
69  * success.
70  */
rdma_user_mmap_io(struct ib_ucontext * ucontext,struct vm_area_struct * vma,unsigned long pfn,unsigned long size,pgprot_t prot,struct rdma_user_mmap_entry * entry)71 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
72 		      unsigned long pfn, unsigned long size, pgprot_t prot,
73 		      struct rdma_user_mmap_entry *entry)
74 {
75 	struct ib_uverbs_file *ufile = ucontext->ufile;
76 	struct rdma_umap_priv *priv;
77 
78 	if (!(vma->vm_flags & VM_SHARED))
79 		return -EINVAL;
80 
81 	if (vma->vm_end - vma->vm_start != size)
82 		return -EINVAL;
83 
84 	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
85 	if (WARN_ON(!vma->vm_file ||
86 		    vma->vm_file->private_data != ufile))
87 		return -EINVAL;
88 	lockdep_assert_held(&ufile->device->disassociate_srcu);
89 
90 	priv = kzalloc_obj(*priv);
91 	if (!priv)
92 		return -ENOMEM;
93 
94 	vma->vm_page_prot = prot;
95 	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
96 		kfree(priv);
97 		return -EAGAIN;
98 	}
99 
100 	rdma_umap_priv_init(priv, vma, entry);
101 	return 0;
102 }
103 EXPORT_SYMBOL(rdma_user_mmap_io);
104 
105 /**
106  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
107  *
108  * @ucontext: associated user context
109  * @pgoff: The mmap offset >> PAGE_SHIFT
110  *
111  * This function is called when a user tries to mmap with an offset (returned
112  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
113  * rdma_user_mmap_entry was created by the function
114  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
115  * entry so that it won't be deleted from the xarray in the meantime.
116  *
117  * Return an reference to an entry if exists or NULL if there is no
118  * match. rdma_user_mmap_entry_put() must be called to put the reference.
119  */
120 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext * ucontext,unsigned long pgoff)121 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
122 			       unsigned long pgoff)
123 {
124 	struct rdma_user_mmap_entry *entry;
125 
126 	if (pgoff > U32_MAX)
127 		return NULL;
128 
129 	xa_lock(&ucontext->mmap_xa);
130 
131 	entry = xa_load(&ucontext->mmap_xa, pgoff);
132 
133 	/*
134 	 * If refcount is zero, entry is already being deleted, driver_removed
135 	 * indicates that the no further mmaps are possible and we waiting for
136 	 * the active VMAs to be closed.
137 	 */
138 	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
139 	    !kref_get_unless_zero(&entry->ref))
140 		goto err;
141 
142 	xa_unlock(&ucontext->mmap_xa);
143 
144 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
145 		  pgoff, entry->npages);
146 
147 	return entry;
148 
149 err:
150 	xa_unlock(&ucontext->mmap_xa);
151 	return NULL;
152 }
153 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
154 
155 /**
156  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
157  *
158  * @ucontext: associated user context
159  * @vma: the vma being mmap'd into
160  *
161  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
162  * checks that the VMA is correct.
163  */
164 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get(struct ib_ucontext * ucontext,struct vm_area_struct * vma)165 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
166 			 struct vm_area_struct *vma)
167 {
168 	struct rdma_user_mmap_entry *entry;
169 
170 	if (!(vma->vm_flags & VM_SHARED))
171 		return NULL;
172 	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
173 	if (!entry)
174 		return NULL;
175 	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
176 		rdma_user_mmap_entry_put(entry);
177 		return NULL;
178 	}
179 	return entry;
180 }
181 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
182 
rdma_user_mmap_entry_free(struct kref * kref)183 static void rdma_user_mmap_entry_free(struct kref *kref)
184 {
185 	struct rdma_user_mmap_entry *entry =
186 		container_of(kref, struct rdma_user_mmap_entry, ref);
187 	struct ib_ucontext *ucontext = entry->ucontext;
188 	unsigned long i;
189 
190 	/*
191 	 * Erase all entries occupied by this single entry, this is deferred
192 	 * until all VMA are closed so that the mmap offsets remain unique.
193 	 */
194 	xa_lock(&ucontext->mmap_xa);
195 	for (i = 0; i < entry->npages; i++)
196 		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
197 	xa_unlock(&ucontext->mmap_xa);
198 
199 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
200 		  entry->start_pgoff, entry->npages);
201 
202 	if (ucontext->device->ops.mmap_free)
203 		ucontext->device->ops.mmap_free(entry);
204 }
205 
206 /**
207  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
208  *
209  * @entry: an entry in the mmap_xa
210  *
211  * This function is called when the mapping is closed if it was
212  * an io mapping or when the driver is done with the entry for
213  * some other reason.
214  * Should be called after rdma_user_mmap_entry_get was called
215  * and entry is no longer needed. This function will erase the
216  * entry and free it if its refcnt reaches zero.
217  */
rdma_user_mmap_entry_put(struct rdma_user_mmap_entry * entry)218 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
219 {
220 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
221 }
222 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
223 
224 /**
225  * rdma_user_mmap_entry_remove() - Drop reference to entry and
226  *				   mark it as unmmapable
227  *
228  * @entry: the entry to insert into the mmap_xa
229  *
230  * Drivers can call this to prevent userspace from creating more mappings for
231  * entry, however existing mmaps continue to exist and ops->mmap_free() will
232  * not be called until all user mmaps are destroyed.
233  */
rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry * entry)234 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
235 {
236 	struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
237 
238 	if (!entry)
239 		return;
240 
241 	mutex_lock(&entry->dmabufs_lock);
242 	xa_lock(&entry->ucontext->mmap_xa);
243 	entry->driver_removed = true;
244 	xa_unlock(&entry->ucontext->mmap_xa);
245 	list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
246 		dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
247 		list_del(&uverbs_dmabuf->dmabufs_elm);
248 		uverbs_dmabuf->revoked = true;
249 		dma_buf_move_notify(uverbs_dmabuf->dmabuf);
250 		dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
251 				      DMA_RESV_USAGE_BOOKKEEP, false,
252 				      MAX_SCHEDULE_TIMEOUT);
253 		dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
254 		kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
255 		wait_for_completion(&uverbs_dmabuf->comp);
256 	}
257 	mutex_unlock(&entry->dmabufs_lock);
258 
259 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
260 }
261 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
262 
263 /**
264  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
265  *					 in a given range.
266  *
267  * @ucontext: associated user context.
268  * @entry: the entry to insert into the mmap_xa
269  * @length: length of the address that will be mmapped
270  * @min_pgoff: minimum pgoff to be returned
271  * @max_pgoff: maximum pgoff to be returned
272  *
273  * This function should be called by drivers that use the rdma_user_mmap
274  * interface for implementing their mmap syscall A database of mmap offsets is
275  * handled in the core and helper functions are provided to insert entries
276  * into the database and extract entries when the user calls mmap with the
277  * given offset. The function allocates a unique page offset in a given range
278  * that should be provided to user, the user will use the offset to retrieve
279  * information such as address to be mapped and how.
280  *
281  * Return: 0 on success and -ENOMEM on failure
282  */
rdma_user_mmap_entry_insert_range(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length,u32 min_pgoff,u32 max_pgoff)283 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
284 				      struct rdma_user_mmap_entry *entry,
285 				      size_t length, u32 min_pgoff,
286 				      u32 max_pgoff)
287 {
288 	struct ib_uverbs_file *ufile = ucontext->ufile;
289 	XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
290 	u32 xa_first, xa_last, npages;
291 	int err;
292 	u32 i;
293 
294 	if (!entry)
295 		return -EINVAL;
296 
297 	kref_init(&entry->ref);
298 	INIT_LIST_HEAD(&entry->dmabufs);
299 	mutex_init(&entry->dmabufs_lock);
300 
301 	entry->ucontext = ucontext;
302 
303 	/*
304 	 * We want the whole allocation to be done without interruption from a
305 	 * different thread. The allocation requires finding a free range and
306 	 * storing. During the xa_insert the lock could be released, possibly
307 	 * allowing another thread to choose the same range.
308 	 */
309 	mutex_lock(&ufile->umap_lock);
310 
311 	xa_lock(&ucontext->mmap_xa);
312 
313 	/* We want to find an empty range */
314 	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
315 	entry->npages = npages;
316 	while (true) {
317 		/* First find an empty index */
318 		xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
319 		if (xas.xa_node == XAS_RESTART)
320 			goto err_unlock;
321 
322 		xa_first = xas.xa_index;
323 
324 		/* Is there enough room to have the range? */
325 		if (check_add_overflow(xa_first, npages, &xa_last))
326 			goto err_unlock;
327 
328 		/*
329 		 * Now look for the next present entry. If an entry doesn't
330 		 * exist, we found an empty range and can proceed.
331 		 */
332 		xas_next_entry(&xas, xa_last - 1);
333 		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
334 			break;
335 	}
336 
337 	for (i = xa_first; i < xa_last; i++) {
338 		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
339 		if (err)
340 			goto err_undo;
341 	}
342 
343 	/*
344 	 * Internally the kernel uses a page offset, in libc this is a byte
345 	 * offset. Drivers should not return pgoff to userspace.
346 	 */
347 	entry->start_pgoff = xa_first;
348 	xa_unlock(&ucontext->mmap_xa);
349 	mutex_unlock(&ufile->umap_lock);
350 
351 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
352 		  entry->start_pgoff, npages);
353 
354 	return 0;
355 
356 err_undo:
357 	for (; i > xa_first; i--)
358 		__xa_erase(&ucontext->mmap_xa, i - 1);
359 
360 err_unlock:
361 	xa_unlock(&ucontext->mmap_xa);
362 	mutex_unlock(&ufile->umap_lock);
363 	return -ENOMEM;
364 }
365 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
366 
367 /**
368  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
369  *
370  * @ucontext: associated user context.
371  * @entry: the entry to insert into the mmap_xa
372  * @length: length of the address that will be mmapped
373  *
374  * This function should be called by drivers that use the rdma_user_mmap
375  * interface for handling user mmapped addresses. The database is handled in
376  * the core and helper functions are provided to insert entries into the
377  * database and extract entries when the user calls mmap with the given offset.
378  * The function allocates a unique page offset that should be provided to user,
379  * the user will use the offset to retrieve information such as address to
380  * be mapped and how.
381  *
382  * Return: 0 on success and -ENOMEM on failure
383  */
rdma_user_mmap_entry_insert(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length)384 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
385 				struct rdma_user_mmap_entry *entry,
386 				size_t length)
387 {
388 	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
389 						 U32_MAX);
390 }
391 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
392