1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
6 * Copyright 2019 Marvell. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37 #include <sys/cdefs.h>
38 #include <linux/xarray.h>
39 #include "uverbs.h"
40 #include "core_priv.h"
41
42 /**
43 * rdma_umap_priv_init() - Initialize the private data of a vma
44 *
45 * @priv: The already allocated private data
46 * @vma: The vm area struct that needs private data
47 * @entry: entry into the mmap_xa that needs to be linked with
48 * this vma
49 *
50 * Each time we map IO memory into user space this keeps track of the
51 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
52 * to point to the zero page and allow the hot unplug to proceed.
53 *
54 * This is necessary for cases like PCI physical hot unplug as the actual BAR
55 * memory may vanish after this and access to it from userspace could MCE.
56 *
57 * RDMA drivers supporting disassociation must have their user space designed
58 * to cope in some way with their IO pages going to the zero page.
59 *
60 */
rdma_umap_priv_init(struct rdma_umap_priv * priv,struct vm_area_struct * vma,struct rdma_user_mmap_entry * entry)61 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
62 struct vm_area_struct *vma,
63 struct rdma_user_mmap_entry *entry)
64 {
65 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
66
67 priv->vma = vma;
68 if (entry) {
69 kref_get(&entry->ref);
70 priv->entry = entry;
71 }
72 vma->vm_private_data = priv;
73 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
74
75 mutex_lock(&ufile->umap_lock);
76 list_add(&priv->list, &ufile->umaps);
77 mutex_unlock(&ufile->umap_lock);
78 }
79 EXPORT_SYMBOL(rdma_umap_priv_init);
80
81 /**
82 * rdma_user_mmap_io() - Map IO memory into a process
83 *
84 * @ucontext: associated user context
85 * @vma: the vma related to the current mmap call
86 * @pfn: pfn to map
87 * @size: size to map
88 * @prot: pgprot to use in remap call
89 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
90 * if mmap_entry is not used by the driver
91 *
92 * This is to be called by drivers as part of their mmap() functions if they
93 * wish to send something like PCI-E BAR memory to userspace.
94 *
95 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
96 * success.
97 */
rdma_user_mmap_io(struct ib_ucontext * ucontext,struct vm_area_struct * vma,unsigned long pfn,unsigned long size,pgprot_t prot,struct rdma_user_mmap_entry * entry)98 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
99 unsigned long pfn, unsigned long size, pgprot_t prot,
100 struct rdma_user_mmap_entry *entry)
101 {
102 struct ib_uverbs_file *ufile = ucontext->ufile;
103 struct rdma_umap_priv *priv;
104
105 if (!(vma->vm_flags & VM_SHARED))
106 return -EINVAL;
107
108 if (vma->vm_end - vma->vm_start != size)
109 return -EINVAL;
110
111 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
112 if (WARN_ON(!vma->vm_file ||
113 vma->vm_file->private_data != ufile))
114 return -EINVAL;
115
116 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
117 if (!priv)
118 return -ENOMEM;
119
120 vma->vm_page_prot = prot;
121 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
122 kfree(priv);
123 return -EAGAIN;
124 }
125
126 rdma_umap_priv_init(priv, vma, entry);
127 return 0;
128 }
129 EXPORT_SYMBOL(rdma_user_mmap_io);
130
131 /**
132 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
133 *
134 * @ucontext: associated user context
135 * @pgoff: The mmap offset >> PAGE_SHIFT
136 *
137 * This function is called when a user tries to mmap with an offset (returned
138 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
139 * rdma_user_mmap_entry was created by the function
140 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
141 * entry so that it won't be deleted from the xarray in the meantime.
142 *
143 * Return an reference to an entry if exists or NULL if there is no
144 * match. rdma_user_mmap_entry_put() must be called to put the reference.
145 */
146 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext * ucontext,unsigned long pgoff)147 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
148 unsigned long pgoff)
149 {
150 struct rdma_user_mmap_entry *entry;
151
152 if (pgoff > U32_MAX)
153 return NULL;
154
155 xa_lock(&ucontext->mmap_xa);
156
157 entry = xa_load(&ucontext->mmap_xa, pgoff);
158
159 /*
160 * If refcount is zero, entry is already being deleted, driver_removed
161 * indicates that the no further mmaps are possible and we waiting for
162 * the active VMAs to be closed.
163 */
164 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
165 !kref_get_unless_zero(&entry->ref))
166 goto err;
167
168 xa_unlock(&ucontext->mmap_xa);
169
170 return entry;
171
172 err:
173 xa_unlock(&ucontext->mmap_xa);
174 return NULL;
175 }
176 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
177
178 /**
179 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
180 *
181 * @ucontext: associated user context
182 * @vma: the vma being mmap'd into
183 *
184 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
185 * checks that the VMA is correct.
186 */
187 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get(struct ib_ucontext * ucontext,struct vm_area_struct * vma)188 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
189 struct vm_area_struct *vma)
190 {
191 struct rdma_user_mmap_entry *entry;
192
193 if (!(vma->vm_flags & VM_SHARED))
194 return NULL;
195 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
196 if (!entry)
197 return NULL;
198 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
199 rdma_user_mmap_entry_put(entry);
200 return NULL;
201 }
202 return entry;
203 }
204 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
205
rdma_user_mmap_entry_free(struct kref * kref)206 static void rdma_user_mmap_entry_free(struct kref *kref)
207 {
208 struct rdma_user_mmap_entry *entry =
209 container_of(kref, struct rdma_user_mmap_entry, ref);
210 struct ib_ucontext *ucontext = entry->ucontext;
211 unsigned long i;
212
213 /*
214 * Erase all entries occupied by this single entry, this is deferred
215 * until all VMA are closed so that the mmap offsets remain unique.
216 */
217 xa_lock(&ucontext->mmap_xa);
218 for (i = 0; i < entry->npages; i++)
219 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
220 xa_unlock(&ucontext->mmap_xa);
221
222 if (ucontext->device->mmap_free)
223 ucontext->device->mmap_free(entry);
224 }
225
226 /**
227 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
228 *
229 * @entry: an entry in the mmap_xa
230 *
231 * This function is called when the mapping is closed if it was
232 * an io mapping or when the driver is done with the entry for
233 * some other reason.
234 * Should be called after rdma_user_mmap_entry_get was called
235 * and entry is no longer needed. This function will erase the
236 * entry and free it if its refcnt reaches zero.
237 */
rdma_user_mmap_entry_put(struct rdma_user_mmap_entry * entry)238 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
239 {
240 kref_put(&entry->ref, rdma_user_mmap_entry_free);
241 }
242 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
243
244 /**
245 * rdma_user_mmap_entry_remove() - Drop reference to entry and
246 * mark it as unmmapable
247 *
248 * @entry: the entry to insert into the mmap_xa
249 *
250 * Drivers can call this to prevent userspace from creating more mappings for
251 * entry, however existing mmaps continue to exist and ops->mmap_free() will
252 * not be called until all user mmaps are destroyed.
253 */
rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry * entry)254 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
255 {
256 if (!entry)
257 return;
258
259 xa_lock(&entry->ucontext->mmap_xa);
260 entry->driver_removed = true;
261 xa_unlock(&entry->ucontext->mmap_xa);
262 kref_put(&entry->ref, rdma_user_mmap_entry_free);
263 }
264 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
265
266 /**
267 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
268 * in a given range.
269 *
270 * @ucontext: associated user context.
271 * @entry: the entry to insert into the mmap_xa
272 * @length: length of the address that will be mmapped
273 * @min_pgoff: minimum pgoff to be returned
274 * @max_pgoff: maximum pgoff to be returned
275 *
276 * This function should be called by drivers that use the rdma_user_mmap
277 * interface for implementing their mmap syscall A database of mmap offsets is
278 * handled in the core and helper functions are provided to insert entries
279 * into the database and extract entries when the user calls mmap with the
280 * given offset. The function allocates a unique page offset in a given range
281 * that should be provided to user, the user will use the offset to retrieve
282 * information such as address to be mapped and how.
283 *
284 * Return: 0 on success and -ENOMEM on failure
285 */
rdma_user_mmap_entry_insert_range(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length,u32 min_pgoff,u32 max_pgoff)286 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
287 struct rdma_user_mmap_entry *entry,
288 size_t length, u32 min_pgoff,
289 u32 max_pgoff)
290 {
291 struct ib_uverbs_file *ufile = ucontext->ufile;
292 u32 xa_first, xa_last, npages;
293 int err;
294 u32 i;
295 u32 j;
296
297 if (!entry)
298 return -EINVAL;
299
300 kref_init(&entry->ref);
301 entry->ucontext = ucontext;
302
303 /*
304 * We want the whole allocation to be done without interruption from a
305 * different thread. The allocation requires finding a free range and
306 * storing. During the xa_insert the lock could be released, possibly
307 * allowing another thread to choose the same range.
308 */
309 mutex_lock(&ufile->umap_lock);
310
311 xa_lock(&ucontext->mmap_xa);
312
313 /* We want to find an empty range */
314 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
315 entry->npages = npages;
316
317 /* Find an empty range */
318 for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
319 if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
320 if (unlikely(i + j == max_pgoff))
321 break;
322 i = i + j + 1;
323 j = 0;
324 } else {
325 if (unlikely(i + j == max_pgoff))
326 break;
327 j++;
328 }
329 }
330
331 if (j != npages)
332 goto err_unlock;
333
334 xa_first = i;
335 xa_last = i + j;
336
337 for (i = xa_first; i < xa_last; i++) {
338 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
339 if (err)
340 goto err_undo;
341 }
342
343 /*
344 * Internally the kernel uses a page offset, in libc this is a byte
345 * offset. Drivers should not return pgoff to userspace.
346 */
347 entry->start_pgoff = xa_first;
348 xa_unlock(&ucontext->mmap_xa);
349 mutex_unlock(&ufile->umap_lock);
350
351 return 0;
352
353 err_undo:
354 for (; i > xa_first; i--)
355 __xa_erase(&ucontext->mmap_xa, i - 1);
356
357 err_unlock:
358 xa_unlock(&ucontext->mmap_xa);
359 mutex_unlock(&ufile->umap_lock);
360 return -ENOMEM;
361 }
362 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
363
364 /**
365 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
366 *
367 * @ucontext: associated user context.
368 * @entry: the entry to insert into the mmap_xa
369 * @length: length of the address that will be mmapped
370 *
371 * This function should be called by drivers that use the rdma_user_mmap
372 * interface for handling user mmapped addresses. The database is handled in
373 * the core and helper functions are provided to insert entries into the
374 * database and extract entries when the user calls mmap with the given offset.
375 * The function allocates a unique page offset that should be provided to user,
376 * the user will use the offset to retrieve information such as address to
377 * be mapped and how.
378 *
379 * Return: 0 on success and -ENOMEM on failure
380 */
rdma_user_mmap_entry_insert(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length)381 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
382 struct rdma_user_mmap_entry *entry,
383 size_t length)
384 {
385 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
386 U32_MAX);
387 }
388 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
389