xref: /linux/drivers/infiniband/core/umem.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2020 Intel Corporation. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/mm.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/sched/signal.h>
39 #include <linux/sched/mm.h>
40 #include <linux/export.h>
41 #include <linux/slab.h>
42 #include <linux/pagemap.h>
43 #include <linux/count_zeros.h>
44 #include <rdma/ib_umem_odp.h>
45 
46 #include "uverbs.h"
47 
48 #define RESCHED_LOOP_CNT_THRESHOLD 0x1000
49 
50 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
51 {
52 	bool make_dirty = umem->writable && dirty;
53 	struct scatterlist *sg;
54 	unsigned int i;
55 
56 	if (dirty)
57 		ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
58 					   DMA_BIDIRECTIONAL, umem->dma_attrs);
59 
60 	for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
61 		unpin_user_page_range_dirty_lock(sg_page(sg),
62 			DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
63 
64 		if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD))
65 			cond_resched();
66 	}
67 
68 	sg_free_append_table(&umem->sgt_append);
69 }
70 
71 /**
72  * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
73  *
74  * @umem: umem struct
75  * @pgsz_bitmap: bitmap of HW supported page sizes
76  * @virt: IOVA
77  *
78  * This helper is intended for HW that support multiple page
79  * sizes but can do only a single page size in an MR.
80  *
81  * Returns 0 if the umem requires page sizes not supported by
82  * the driver to be mapped. Drivers always supporting PAGE_SIZE
83  * or smaller will never see a 0 result.
84  */
85 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
86 				     unsigned long pgsz_bitmap,
87 				     u64 virt)
88 {
89 	unsigned long curr_len = 0;
90 	dma_addr_t curr_base = ~0;
91 	unsigned long pgoff;
92 	struct scatterlist *sg;
93 	unsigned long mask = 0;
94 	unsigned int bits;
95 	dma_addr_t end;
96 	u64 last_va;
97 	u64 va;
98 	int i;
99 
100 	umem->iova = va = virt;
101 
102 	if (umem->is_odp) {
103 		unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
104 
105 		/* ODP must always be self consistent. */
106 		if (!(pgsz_bitmap & page_size))
107 			return 0;
108 		return page_size;
109 	}
110 
111 	/* The best result is the smallest page size that results in the minimum
112 	 * number of required pages. Compute the largest page size that could
113 	 * work based on VA address bits that don't change.
114 	 */
115 	if (check_add_overflow(umem->length - 1, virt, &last_va))
116 		return 0;
117 	bits = bits_per(virt ^ last_va);
118 	if (bits < BITS_PER_LONG)
119 		mask = pgsz_bitmap & GENMASK(BITS_PER_LONG - 1, bits);
120 
121 	/* offset into first SGL */
122 	pgoff = umem->address & ~PAGE_MASK;
123 
124 	for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
125 		/* If the current entry is physically contiguous with the previous
126 		 * one, no need to take its start addresses into consideration.
127 		 */
128 		if (check_add_overflow(curr_base, curr_len, &end) ||
129 		    end != sg_dma_address(sg)) {
130 
131 			curr_base = sg_dma_address(sg);
132 			curr_len = 0;
133 
134 			/* Reduce max page size if VA/PA bits differ */
135 			mask |= (curr_base + pgoff) ^ va;
136 
137 			/* The alignment of any VA matching a discontinuity point
138 			* in the physical memory sets the maximum possible page
139 			* size as this must be a starting point of a new page that
140 			* needs to be aligned.
141 			*/
142 			if (i != 0)
143 				mask |= va;
144 		}
145 
146 		curr_len += sg_dma_len(sg);
147 		va += sg_dma_len(sg) - pgoff;
148 
149 		pgoff = 0;
150 	}
151 
152 	/* The mask accumulates 1's in each position where the VA and physical
153 	 * address differ, thus the length of trailing 0 is the largest page
154 	 * size that can pass the VA through to the physical.
155 	 */
156 	if (mask)
157 		pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
158 	return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
159 }
160 EXPORT_SYMBOL(ib_umem_find_best_pgsz);
161 
162 static struct ib_umem *__ib_umem_get_va(struct ib_device *device,
163 					unsigned long addr, size_t size,
164 					int access)
165 {
166 	struct ib_umem *umem;
167 	struct page **page_list;
168 	unsigned long lock_limit;
169 	unsigned long new_pinned;
170 	unsigned long cur_base;
171 	struct mm_struct *mm;
172 	unsigned long npages;
173 	int pinned, ret;
174 	unsigned int gup_flags = FOLL_LONGTERM;
175 
176 	if (device->cc_dma_bounce)
177 		return ERR_PTR(-EOPNOTSUPP);
178 
179 	/*
180 	 * If the combination of the addr and size requested for this memory
181 	 * region causes an integer overflow, return error.
182 	 */
183 	if (((addr + size) < addr) ||
184 	    PAGE_ALIGN(addr + size) < (addr + size))
185 		return ERR_PTR(-EINVAL);
186 
187 	if (!can_do_mlock())
188 		return ERR_PTR(-EPERM);
189 
190 	if (access & IB_ACCESS_ON_DEMAND)
191 		return ERR_PTR(-EOPNOTSUPP);
192 
193 	umem = kzalloc_obj(*umem);
194 	if (!umem)
195 		return ERR_PTR(-ENOMEM);
196 	umem->ibdev      = device;
197 	umem->length     = size;
198 	umem->address    = addr;
199 	/*
200 	 * Drivers should call ib_umem_find_best_pgsz() to set the iova
201 	 * correctly.
202 	 */
203 	umem->iova = addr;
204 	umem->writable   = ib_access_writable(access);
205 	umem->owning_mm = mm = current->mm;
206 	umem->dma_attrs = DMA_ATTR_REQUIRE_COHERENT;
207 	if (access & IB_ACCESS_RELAXED_ORDERING)
208 		umem->dma_attrs |= DMA_ATTR_WEAK_ORDERING;
209 
210 	mmgrab(mm);
211 
212 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
213 	if (!page_list) {
214 		ret = -ENOMEM;
215 		goto umem_kfree;
216 	}
217 
218 	npages = ib_umem_num_pages(umem);
219 	if (npages == 0 || npages > UINT_MAX) {
220 		ret = -EINVAL;
221 		goto out;
222 	}
223 
224 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
225 
226 	new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
227 	if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
228 		atomic64_sub(npages, &mm->pinned_vm);
229 		ret = -ENOMEM;
230 		goto out;
231 	}
232 
233 	cur_base = addr & PAGE_MASK;
234 
235 	if (umem->writable)
236 		gup_flags |= FOLL_WRITE;
237 
238 	while (npages) {
239 		cond_resched();
240 		pinned = pin_user_pages_fast(cur_base,
241 					  min_t(unsigned long, npages,
242 						PAGE_SIZE /
243 						sizeof(struct page *)),
244 					  gup_flags, page_list);
245 		if (pinned < 0) {
246 			ret = pinned;
247 			goto umem_release;
248 		}
249 
250 		cur_base += pinned * PAGE_SIZE;
251 		npages -= pinned;
252 		ret = sg_alloc_append_table_from_pages(
253 			&umem->sgt_append, page_list, pinned, 0,
254 			pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
255 			npages, GFP_KERNEL);
256 		if (ret) {
257 			unpin_user_pages_dirty_lock(page_list, pinned, 0);
258 			goto umem_release;
259 		}
260 	}
261 
262 	ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
263 				       DMA_BIDIRECTIONAL, umem->dma_attrs);
264 	if (ret)
265 		goto umem_release;
266 	goto out;
267 
268 umem_release:
269 	__ib_umem_release(device, umem, 0);
270 	atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
271 out:
272 	free_page((unsigned long) page_list);
273 umem_kfree:
274 	if (ret) {
275 		mmdrop(umem->owning_mm);
276 		kfree(umem);
277 	}
278 	return ret ? ERR_PTR(ret) : umem;
279 }
280 
281 /**
282  * ib_umem_get_desc - Pin a umem from a buffer descriptor.
283  * @device: IB device.
284  * @desc:   buffer descriptor (VA or DMABUF).
285  * @access: IB access flags.
286  *
287  * Return: caller-owned umem on success, ERR_PTR(...) on error.
288  */
289 struct ib_umem *ib_umem_get_desc(struct ib_device *device,
290 				 const struct ib_uverbs_buffer_desc *desc,
291 				 int access)
292 {
293 	struct ib_umem_dmabuf *umem_dmabuf;
294 
295 	if (desc->flags & ~IB_UVERBS_BUFFER_DESC_FLAGS_KNOWN_MASK)
296 		return ERR_PTR(-EINVAL);
297 
298 	if (overflows_type(desc->addr, unsigned long) ||
299 	    overflows_type(desc->length, size_t))
300 		return ERR_PTR(-EOVERFLOW);
301 
302 	switch (desc->type) {
303 	case IB_UVERBS_BUFFER_TYPE_DMABUF:
304 		umem_dmabuf = ib_umem_dmabuf_get_pinned(device, desc->addr,
305 							desc->length, desc->fd,
306 							access);
307 		if (IS_ERR(umem_dmabuf))
308 			return ERR_CAST(umem_dmabuf);
309 		return &umem_dmabuf->umem;
310 	case IB_UVERBS_BUFFER_TYPE_VA:
311 		return __ib_umem_get_va(device, desc->addr, desc->length,
312 					access);
313 	default:
314 		return ERR_PTR(-EINVAL);
315 	}
316 }
317 EXPORT_SYMBOL(ib_umem_get_desc);
318 
319 /*
320  * Per-command legacy buffer-desc filler.
321  * Returns 0 on success (desc filled), -ENODATA if no legacy attrs apply,
322  * negative errno on validation failure.
323  */
324 typedef int (*ib_umem_buf_desc_filler_t)(const struct uverbs_attr_bundle *attrs,
325 					 struct ib_uverbs_buffer_desc *desc);
326 
327 /*
328  * ib_umem_resolve_desc - Resolve a buffer descriptor from a per-command UMEM
329  *                        attribute and/or a legacy attr filler.
330  *
331  * Return:
332  *    0       @desc filled.
333  *   -ENOENT  no source produced a buffer.
334  *   -EINVAL  both the UMEM attribute and the legacy filler produced a buffer.
335  *   -errno   propagated from attr read / filler validation.
336  */
337 static int ib_umem_resolve_desc(const struct uverbs_attr_bundle *attrs,
338 				u16 attr_id,
339 				ib_umem_buf_desc_filler_t legacy_filler,
340 				struct ib_uverbs_buffer_desc *desc)
341 {
342 	bool have_desc = false;
343 	int ret;
344 
345 	if (!attrs)
346 		return -ENOENT;
347 
348 	ret = uverbs_get_buffer_desc(attrs, attr_id, desc);
349 	if (!ret)
350 		have_desc = true;
351 	else if (ret != -ENOENT)
352 		return ret;
353 
354 	if (legacy_filler) {
355 		struct ib_uverbs_buffer_desc legacy_desc = {};
356 
357 		ret = legacy_filler(attrs, &legacy_desc);
358 		if (!ret) {
359 			if (have_desc)
360 				return -EINVAL;
361 			*desc = legacy_desc;
362 			have_desc = true;
363 		} else if (ret != -ENODATA) {
364 			return ret;
365 		}
366 	}
367 
368 	return have_desc ? 0 : -ENOENT;
369 }
370 
371 /*
372  * ib_umem_get_desc_check - Pin a umem from @desc and verify it meets
373  *                          @min_size.
374  */
375 static struct ib_umem *
376 ib_umem_get_desc_check(struct ib_device *device,
377 		       const struct ib_uverbs_buffer_desc *desc,
378 		       size_t min_size, int access)
379 {
380 	struct ib_umem *umem;
381 
382 	umem = ib_umem_get_desc(device, desc, access);
383 	if (IS_ERR(umem))
384 		return umem;
385 	if (umem->length < min_size) {
386 		ib_umem_release(umem);
387 		return ERR_PTR(-EINVAL);
388 	}
389 	return umem;
390 }
391 
392 /*
393  * ib_umem_get_from_attrs - Pin a umem from a per-command UMEM attribute
394  *                          and/or a legacy attr filler.
395  *
396  * Return: caller-owned umem on success; NULL when no source supplied a
397  * buffer; ERR_PTR(...) on error.
398  */
399 static struct ib_umem *
400 ib_umem_get_from_attrs(struct ib_device *device,
401 		       const struct uverbs_attr_bundle *attrs,
402 		       u16 attr_id, ib_umem_buf_desc_filler_t legacy_filler,
403 		       size_t size, int access)
404 {
405 	struct ib_uverbs_buffer_desc desc = {};
406 	int ret;
407 
408 	ret = ib_umem_resolve_desc(attrs, attr_id, legacy_filler, &desc);
409 	if (ret == -ENOENT)
410 		return NULL;
411 	if (ret)
412 		return ERR_PTR(ret);
413 	return ib_umem_get_desc_check(device, &desc, size, access);
414 }
415 
416 /*
417  * ib_umem_get_from_attrs_or_va - Pin a umem from a per-command UMEM
418  *                                attribute and/or a legacy attr filler,
419  *                                falling back to a UHW VA when no source
420  *                                matched.
421  *
422  * @size is always consumed: it is the length to pin on the VA fallback
423  * path AND the post-pin minimum-length check on the attr / legacy paths.
424  * Callers must always pass a meaningful, validated value.
425  *
426  * Return: caller-owned umem on success, ERR_PTR(...) on error.
427  */
428 static struct ib_umem *
429 ib_umem_get_from_attrs_or_va(struct ib_device *device,
430 			     const struct uverbs_attr_bundle *attrs,
431 			     u16 attr_id,
432 			     ib_umem_buf_desc_filler_t legacy_filler,
433 			     u64 addr, size_t size, int access)
434 {
435 	struct ib_uverbs_buffer_desc desc = {};
436 	int ret;
437 
438 	ret = ib_umem_resolve_desc(attrs, attr_id, legacy_filler, &desc);
439 	if (ret == -ENOENT)
440 		desc = (struct ib_uverbs_buffer_desc){
441 			.type	= IB_UVERBS_BUFFER_TYPE_VA,
442 			.addr	= addr,
443 			.length	= size,
444 		};
445 	else if (ret)
446 		return ERR_PTR(ret);
447 	return ib_umem_get_desc_check(device, &desc, size, access);
448 }
449 
450 /**
451  * ib_umem_get_attr - Pin a umem from a per-command UMEM attribute.
452  * @device:  IB device.
453  * @attrs:   uverbs attribute bundle (may be NULL).
454  * @attr_id: per-command UMEM attribute id.
455  * @size:    minimum required umem length.
456  * @access:  IB access flags.
457  *
458  * Return: caller-owned umem on success; NULL when no source supplied
459  * a buffer; ERR_PTR(...) on error.
460  */
461 struct ib_umem *ib_umem_get_attr(struct ib_device *device,
462 				 const struct uverbs_attr_bundle *attrs,
463 				 u16 attr_id, size_t size, int access)
464 {
465 	return ib_umem_get_from_attrs(device, attrs, attr_id, NULL, size,
466 				      access);
467 }
468 EXPORT_SYMBOL(ib_umem_get_attr);
469 
470 /**
471  * ib_umem_get_attr_or_va - Pin a umem from a per-command UMEM attribute,
472  *                          falling back to a UHW VA.
473  * @device:  IB device.
474  * @attrs:   uverbs attribute bundle (may be NULL).
475  * @attr_id: per-command UMEM attribute id.
476  * @addr:    UHW user VA used when no per-command attribute matched.
477  * @size:    on the attr / legacy paths, the minimum required umem length
478  *           validated post-pin; on the VA fallback path, the length to pin.
479  * @access:  IB access flags.
480  *
481  * Like ib_umem_get_attr(), but pins @addr/@size when no per-command
482  * UMEM attribute is supplied.
483  *
484  * IMPORTANT: @size is always consumed. On the attr / legacy paths it is
485  * used as the post-pin minimum-length check; on the VA fallback path it
486  * is the length to pin. Callers MUST pass a meaningful, validated value
487  * even when they expect an attribute-supplied buffer to be used.
488  *
489  * Every in-tree caller passes the same value for the two roles of @size
490  * because no driver today distinguishes a user-passed buffer length from
491  * a driver-computed minimum. Drivers that currently accept a user-supplied
492  * length without cross-checking it against a driver minimum (vmw_pvrdma
493  * CQ/QP/SRQ, qedr CQ/QP/SRQ, mana WQ/QP, ionic CQ/QP), once tightened to
494  * compute and check a real minimum, will want to introduce a separate
495  * helper that passes these as distinct values.
496  *
497  * Return: caller-owned umem on success, ERR_PTR(...) on error.
498  */
499 struct ib_umem *ib_umem_get_attr_or_va(struct ib_device *device,
500 				       const struct uverbs_attr_bundle *attrs,
501 				       u16 attr_id, u64 addr, size_t size,
502 				       int access)
503 {
504 	return ib_umem_get_from_attrs_or_va(device, attrs, attr_id, NULL, addr,
505 					    size, access);
506 }
507 EXPORT_SYMBOL(ib_umem_get_attr_or_va);
508 
509 static int uverbs_create_cq_get_buffer_desc(const struct uverbs_attr_bundle *attrs,
510 					    struct ib_uverbs_buffer_desc *desc)
511 {
512 	struct ib_device *ib_dev = attrs->context->device;
513 	int ret;
514 
515 	if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA)) {
516 		ret = uverbs_copy_from(&desc->addr, attrs,
517 				       UVERBS_ATTR_CREATE_CQ_BUFFER_VA);
518 		if (ret)
519 			return ret;
520 		ret = uverbs_copy_from(&desc->length, attrs,
521 				       UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
522 		if (ret)
523 			return ret;
524 		if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD) ||
525 		    uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
526 		    !ib_dev->ops.create_user_cq)
527 			return -EINVAL;
528 		desc->type = IB_UVERBS_BUFFER_TYPE_VA;
529 		return 0;
530 	}
531 
532 	if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD)) {
533 		ret = uverbs_get_raw_fd(&desc->fd, attrs,
534 					UVERBS_ATTR_CREATE_CQ_BUFFER_FD);
535 		if (ret)
536 			return ret;
537 
538 		ret = uverbs_copy_from(&desc->addr, attrs,
539 				       UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET);
540 		if (ret)
541 			return ret;
542 		ret = uverbs_copy_from(&desc->length, attrs,
543 				       UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
544 		if (ret)
545 			return ret;
546 		if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA) ||
547 		    !ib_dev->ops.create_user_cq)
548 			return -EINVAL;
549 		desc->type = IB_UVERBS_BUFFER_TYPE_DMABUF;
550 		return 0;
551 	}
552 
553 	if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
554 	    uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH))
555 		return -EINVAL;
556 	return -ENODATA;
557 }
558 
559 /**
560  * ib_umem_get_cq_buf - Pin a CQ buffer umem from per-command attributes.
561  * @device:  IB device.
562  * @attrs:   uverbs attribute bundle (may be NULL).
563  * @size:    minimum required CQ buffer length.
564  * @access:  IB access flags.
565  *
566  * Resolves the CQ buffer from the new UMEM attribute or the legacy
567  * CQ buffer attributes. There is no UHW VA fallback, so the caller
568  * must arrange its own backing (typically an in-kernel allocation)
569  * when no source is available.
570  *
571  * Return: caller-owned umem on success; NULL when no source supplied
572  * a buffer; ERR_PTR(...) on error.
573  */
574 struct ib_umem *ib_umem_get_cq_buf(struct ib_device *device,
575 				   const struct uverbs_attr_bundle *attrs,
576 				   size_t size, int access)
577 {
578 	return ib_umem_get_from_attrs(device, attrs,
579 				      UVERBS_ATTR_CREATE_CQ_BUF_UMEM,
580 				      uverbs_create_cq_get_buffer_desc,
581 				      size, access);
582 }
583 EXPORT_SYMBOL(ib_umem_get_cq_buf);
584 
585 /**
586  * ib_umem_get_cq_buf_or_va - Pin a CQ buffer umem with UHW VA fallback.
587  * @device:  IB device.
588  * @attrs:   uverbs attribute bundle (may be NULL).
589  * @addr:    UHW user VA used when no per-command attribute matched.
590  * @size:    on the attr / legacy paths, the minimum required umem length
591  *           validated post-pin; on the VA fallback path, the length to pin.
592  * @access:  IB access flags.
593  *
594  * Like ib_umem_get_cq_buf(), but pins @addr/@size when neither the
595  * UMEM attribute nor the legacy CQ buffer attributes are supplied.
596  *
597  * See ib_umem_get_attr_or_va() for the note on @size's dual role and
598  * the migration path for drivers that would distinguish a user-supplied
599  * length from a driver-computed minimum.
600  *
601  * Return: caller-owned umem on success, ERR_PTR(...) on error.
602  */
603 struct ib_umem *ib_umem_get_cq_buf_or_va(struct ib_device *device,
604 					 const struct uverbs_attr_bundle *attrs,
605 					 u64 addr, size_t size, int access)
606 {
607 	return ib_umem_get_from_attrs_or_va(device, attrs,
608 					    UVERBS_ATTR_CREATE_CQ_BUF_UMEM,
609 					    uverbs_create_cq_get_buffer_desc,
610 					    addr, size, access);
611 }
612 EXPORT_SYMBOL(ib_umem_get_cq_buf_or_va);
613 
614 /**
615  * ib_umem_release - release pinned memory
616  * @umem: umem struct to release
617  */
618 void ib_umem_release(struct ib_umem *umem)
619 {
620 	if (IS_ERR_OR_NULL(umem))
621 		return;
622 	if (umem->is_dmabuf)
623 		return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
624 	if (umem->is_odp)
625 		return ib_umem_odp_release(to_ib_umem_odp(umem));
626 
627 	__ib_umem_release(umem->ibdev, umem, 1);
628 
629 	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
630 	mmdrop(umem->owning_mm);
631 	kfree(umem);
632 }
633 EXPORT_SYMBOL(ib_umem_release);
634 
635 /*
636  * Copy from the given ib_umem's pages to the given buffer.
637  *
638  * umem - the umem to copy from
639  * offset - offset to start copying from
640  * dst - destination buffer
641  * length - buffer length
642  *
643  * Returns 0 on success, or an error code.
644  */
645 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
646 		      size_t length)
647 {
648 	size_t end = offset + length;
649 	int ret;
650 
651 	if (offset > umem->length || length > umem->length - offset) {
652 		pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
653 		       __func__, offset, umem->length, end);
654 		return -EINVAL;
655 	}
656 
657 	ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
658 				 umem->sgt_append.sgt.orig_nents, dst, length,
659 				 offset + ib_umem_offset(umem));
660 
661 	if (ret < 0)
662 		return ret;
663 	else if (ret != length)
664 		return -EINVAL;
665 	else
666 		return 0;
667 }
668 EXPORT_SYMBOL(ib_umem_copy_from);
669 
670 /*
671  * Called during rereg mr if the driver is able to re-use a umem for
672  * IB_MR_REREG_ACCESS.
673  */
674 int ib_umem_check_rereg(struct ib_umem *umem, int flags, int new_access_flags)
675 {
676 	if (!umem)
677 		return 0;
678 
679 	if ((flags & IB_MR_REREG_ACCESS) && !(flags & IB_MR_REREG_TRANS))
680 		if (ib_access_writable(new_access_flags) && !umem->writable)
681 			return -EACCES;
682 	return 0;
683 }
684 EXPORT_SYMBOL(ib_umem_check_rereg);
685