1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5 * Copyright 2019 Marvell. All rights reserved.
6 */
7 #include <linux/xarray.h>
8 #include <linux/dma-buf.h>
9 #include <linux/dma-resv.h>
10 #include "uverbs.h"
11 #include "core_priv.h"
12
13 MODULE_IMPORT_NS("DMA_BUF");
14
15 /**
16 * rdma_umap_priv_init() - Initialize the private data of a vma
17 *
18 * @priv: The already allocated private data
19 * @vma: The vm area struct that needs private data
20 * @entry: entry into the mmap_xa that needs to be linked with
21 * this vma
22 *
23 * Each time we map IO memory into user space this keeps track of the
24 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
25 * to point to the zero page and allow the hot unplug to proceed.
26 *
27 * This is necessary for cases like PCI physical hot unplug as the actual BAR
28 * memory may vanish after this and access to it from userspace could MCE.
29 *
30 * RDMA drivers supporting disassociation must have their user space designed
31 * to cope in some way with their IO pages going to the zero page.
32 *
33 */
rdma_umap_priv_init(struct rdma_umap_priv * priv,struct vm_area_struct * vma,struct rdma_user_mmap_entry * entry)34 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
35 struct vm_area_struct *vma,
36 struct rdma_user_mmap_entry *entry)
37 {
38 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
39
40 priv->vma = vma;
41 if (entry) {
42 kref_get(&entry->ref);
43 priv->entry = entry;
44 }
45 vma->vm_private_data = priv;
46 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
47
48 mutex_lock(&ufile->umap_lock);
49 list_add(&priv->list, &ufile->umaps);
50 mutex_unlock(&ufile->umap_lock);
51 }
52 EXPORT_SYMBOL(rdma_umap_priv_init);
53
54 /**
55 * rdma_user_mmap_io() - Map IO memory into a process
56 *
57 * @ucontext: associated user context
58 * @vma: the vma related to the current mmap call
59 * @pfn: pfn to map
60 * @size: size to map
61 * @prot: pgprot to use in remap call
62 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
63 * if mmap_entry is not used by the driver
64 *
65 * This is to be called by drivers as part of their mmap() functions if they
66 * wish to send something like PCI-E BAR memory to userspace.
67 *
68 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
69 * success.
70 */
rdma_user_mmap_io(struct ib_ucontext * ucontext,struct vm_area_struct * vma,unsigned long pfn,unsigned long size,pgprot_t prot,struct rdma_user_mmap_entry * entry)71 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
72 unsigned long pfn, unsigned long size, pgprot_t prot,
73 struct rdma_user_mmap_entry *entry)
74 {
75 struct ib_uverbs_file *ufile = ucontext->ufile;
76 struct rdma_umap_priv *priv;
77
78 if (!(vma->vm_flags & VM_SHARED))
79 return -EINVAL;
80
81 if (vma->vm_end - vma->vm_start != size)
82 return -EINVAL;
83
84 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
85 if (WARN_ON(!vma->vm_file ||
86 vma->vm_file->private_data != ufile))
87 return -EINVAL;
88 lockdep_assert_held(&ufile->device->disassociate_srcu);
89
90 priv = kzalloc_obj(*priv);
91 if (!priv)
92 return -ENOMEM;
93
94 vma->vm_page_prot = prot;
95 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
96 kfree(priv);
97 return -EAGAIN;
98 }
99
100 rdma_umap_priv_init(priv, vma, entry);
101 return 0;
102 }
103 EXPORT_SYMBOL(rdma_user_mmap_io);
104
105 /**
106 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
107 *
108 * @ucontext: associated user context
109 * @pgoff: The mmap offset >> PAGE_SHIFT
110 *
111 * This function is called when a user tries to mmap with an offset (returned
112 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
113 * rdma_user_mmap_entry was created by the function
114 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
115 * entry so that it won't be deleted from the xarray in the meantime.
116 *
117 * Return an reference to an entry if exists or NULL if there is no
118 * match. rdma_user_mmap_entry_put() must be called to put the reference.
119 */
120 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext * ucontext,unsigned long pgoff)121 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
122 unsigned long pgoff)
123 {
124 struct rdma_user_mmap_entry *entry;
125
126 if (pgoff > U32_MAX)
127 return NULL;
128
129 xa_lock(&ucontext->mmap_xa);
130
131 entry = xa_load(&ucontext->mmap_xa, pgoff);
132
133 /*
134 * If refcount is zero, entry is already being deleted, driver_removed
135 * indicates that the no further mmaps are possible and we waiting for
136 * the active VMAs to be closed.
137 */
138 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
139 !kref_get_unless_zero(&entry->ref))
140 goto err;
141
142 xa_unlock(&ucontext->mmap_xa);
143
144 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
145 pgoff, entry->npages);
146
147 return entry;
148
149 err:
150 xa_unlock(&ucontext->mmap_xa);
151 return NULL;
152 }
153 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
154
155 /**
156 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
157 *
158 * @ucontext: associated user context
159 * @vma: the vma being mmap'd into
160 *
161 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
162 * checks that the VMA is correct.
163 */
164 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get(struct ib_ucontext * ucontext,struct vm_area_struct * vma)165 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
166 struct vm_area_struct *vma)
167 {
168 struct rdma_user_mmap_entry *entry;
169
170 if (!(vma->vm_flags & VM_SHARED))
171 return NULL;
172 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
173 if (!entry)
174 return NULL;
175 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
176 rdma_user_mmap_entry_put(entry);
177 return NULL;
178 }
179 return entry;
180 }
181 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
182
rdma_user_mmap_entry_free(struct kref * kref)183 static void rdma_user_mmap_entry_free(struct kref *kref)
184 {
185 struct rdma_user_mmap_entry *entry =
186 container_of(kref, struct rdma_user_mmap_entry, ref);
187 struct ib_ucontext *ucontext = entry->ucontext;
188 unsigned long i;
189
190 /*
191 * Erase all entries occupied by this single entry, this is deferred
192 * until all VMA are closed so that the mmap offsets remain unique.
193 */
194 xa_lock(&ucontext->mmap_xa);
195 for (i = 0; i < entry->npages; i++)
196 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
197 xa_unlock(&ucontext->mmap_xa);
198
199 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
200 entry->start_pgoff, entry->npages);
201
202 if (ucontext->device->ops.mmap_free)
203 ucontext->device->ops.mmap_free(entry);
204 }
205
206 /**
207 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
208 *
209 * @entry: an entry in the mmap_xa
210 *
211 * This function is called when the mapping is closed if it was
212 * an io mapping or when the driver is done with the entry for
213 * some other reason.
214 * Should be called after rdma_user_mmap_entry_get was called
215 * and entry is no longer needed. This function will erase the
216 * entry and free it if its refcnt reaches zero.
217 */
rdma_user_mmap_entry_put(struct rdma_user_mmap_entry * entry)218 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
219 {
220 kref_put(&entry->ref, rdma_user_mmap_entry_free);
221 }
222 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
223
224 /**
225 * rdma_user_mmap_entry_remove() - Drop reference to entry and
226 * mark it as unmmapable
227 *
228 * @entry: the entry to insert into the mmap_xa
229 *
230 * Drivers can call this to prevent userspace from creating more mappings for
231 * entry, however existing mmaps continue to exist and ops->mmap_free() will
232 * not be called until all user mmaps are destroyed.
233 */
rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry * entry)234 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
235 {
236 struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
237
238 if (!entry)
239 return;
240
241 mutex_lock(&entry->dmabufs_lock);
242 xa_lock(&entry->ucontext->mmap_xa);
243 entry->driver_removed = true;
244 xa_unlock(&entry->ucontext->mmap_xa);
245 list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
246 dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
247 list_del(&uverbs_dmabuf->dmabufs_elm);
248 uverbs_dmabuf->revoked = true;
249 dma_buf_move_notify(uverbs_dmabuf->dmabuf);
250 dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
251 DMA_RESV_USAGE_BOOKKEEP, false,
252 MAX_SCHEDULE_TIMEOUT);
253 dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
254 kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
255 wait_for_completion(&uverbs_dmabuf->comp);
256 }
257 mutex_unlock(&entry->dmabufs_lock);
258
259 kref_put(&entry->ref, rdma_user_mmap_entry_free);
260 }
261 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
262
263 /**
264 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
265 * in a given range.
266 *
267 * @ucontext: associated user context.
268 * @entry: the entry to insert into the mmap_xa
269 * @length: length of the address that will be mmapped
270 * @min_pgoff: minimum pgoff to be returned
271 * @max_pgoff: maximum pgoff to be returned
272 *
273 * This function should be called by drivers that use the rdma_user_mmap
274 * interface for implementing their mmap syscall A database of mmap offsets is
275 * handled in the core and helper functions are provided to insert entries
276 * into the database and extract entries when the user calls mmap with the
277 * given offset. The function allocates a unique page offset in a given range
278 * that should be provided to user, the user will use the offset to retrieve
279 * information such as address to be mapped and how.
280 *
281 * Return: 0 on success and -ENOMEM on failure
282 */
rdma_user_mmap_entry_insert_range(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length,u32 min_pgoff,u32 max_pgoff)283 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
284 struct rdma_user_mmap_entry *entry,
285 size_t length, u32 min_pgoff,
286 u32 max_pgoff)
287 {
288 struct ib_uverbs_file *ufile = ucontext->ufile;
289 XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
290 u32 xa_first, xa_last, npages;
291 int err;
292 u32 i;
293
294 if (!entry)
295 return -EINVAL;
296
297 kref_init(&entry->ref);
298 INIT_LIST_HEAD(&entry->dmabufs);
299 mutex_init(&entry->dmabufs_lock);
300
301 entry->ucontext = ucontext;
302
303 /*
304 * We want the whole allocation to be done without interruption from a
305 * different thread. The allocation requires finding a free range and
306 * storing. During the xa_insert the lock could be released, possibly
307 * allowing another thread to choose the same range.
308 */
309 mutex_lock(&ufile->umap_lock);
310
311 xa_lock(&ucontext->mmap_xa);
312
313 /* We want to find an empty range */
314 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
315 entry->npages = npages;
316 while (true) {
317 /* First find an empty index */
318 xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
319 if (xas.xa_node == XAS_RESTART)
320 goto err_unlock;
321
322 xa_first = xas.xa_index;
323
324 /* Is there enough room to have the range? */
325 if (check_add_overflow(xa_first, npages, &xa_last))
326 goto err_unlock;
327
328 /*
329 * Now look for the next present entry. If an entry doesn't
330 * exist, we found an empty range and can proceed.
331 */
332 xas_next_entry(&xas, xa_last - 1);
333 if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
334 break;
335 }
336
337 for (i = xa_first; i < xa_last; i++) {
338 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
339 if (err)
340 goto err_undo;
341 }
342
343 /*
344 * Internally the kernel uses a page offset, in libc this is a byte
345 * offset. Drivers should not return pgoff to userspace.
346 */
347 entry->start_pgoff = xa_first;
348 xa_unlock(&ucontext->mmap_xa);
349 mutex_unlock(&ufile->umap_lock);
350
351 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
352 entry->start_pgoff, npages);
353
354 return 0;
355
356 err_undo:
357 for (; i > xa_first; i--)
358 __xa_erase(&ucontext->mmap_xa, i - 1);
359
360 err_unlock:
361 xa_unlock(&ucontext->mmap_xa);
362 mutex_unlock(&ufile->umap_lock);
363 return -ENOMEM;
364 }
365 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
366
367 /**
368 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
369 *
370 * @ucontext: associated user context.
371 * @entry: the entry to insert into the mmap_xa
372 * @length: length of the address that will be mmapped
373 *
374 * This function should be called by drivers that use the rdma_user_mmap
375 * interface for handling user mmapped addresses. The database is handled in
376 * the core and helper functions are provided to insert entries into the
377 * database and extract entries when the user calls mmap with the given offset.
378 * The function allocates a unique page offset that should be provided to user,
379 * the user will use the offset to retrieve information such as address to
380 * be mapped and how.
381 *
382 * Return: 0 on success and -ENOMEM on failure
383 */
rdma_user_mmap_entry_insert(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length)384 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
385 struct rdma_user_mmap_entry *entry,
386 size_t length)
387 {
388 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
389 U32_MAX);
390 }
391 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
392