xref: /linux/drivers/infiniband/sw/rdmavt/mr.c (revision 0bbb3b7496eabb6779962a998a8a91f4a8e589ff)
1 /*
2  * Copyright(c) 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include <linux/slab.h>
49 #include <linux/vmalloc.h>
50 #include <rdma/ib_umem.h>
51 #include <rdma/rdma_vt.h>
52 #include "vt.h"
53 #include "mr.h"
54 #include "trace.h"
55 
56 /**
57  * rvt_driver_mr_init - Init MR resources per driver
58  * @rdi: rvt dev struct
59  *
60  * Do any intilization needed when a driver registers with rdmavt.
61  *
62  * Return: 0 on success or errno on failure
63  */
64 int rvt_driver_mr_init(struct rvt_dev_info *rdi)
65 {
66 	unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
67 	unsigned lk_tab_size;
68 	int i;
69 
70 	/*
71 	 * The top hfi1_lkey_table_size bits are used to index the
72 	 * table.  The lower 8 bits can be owned by the user (copied from
73 	 * the LKEY).  The remaining bits act as a generation number or tag.
74 	 */
75 	if (!lkey_table_size)
76 		return -EINVAL;
77 
78 	spin_lock_init(&rdi->lkey_table.lock);
79 
80 	/* ensure generation is at least 4 bits */
81 	if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
82 		rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
83 			    lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
84 		rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
85 		lkey_table_size = rdi->dparms.lkey_table_size;
86 	}
87 	rdi->lkey_table.max = 1 << lkey_table_size;
88 	rdi->lkey_table.shift = 32 - lkey_table_size;
89 	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
90 	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
91 			       vmalloc_node(lk_tab_size, rdi->dparms.node);
92 	if (!rdi->lkey_table.table)
93 		return -ENOMEM;
94 
95 	RCU_INIT_POINTER(rdi->dma_mr, NULL);
96 	for (i = 0; i < rdi->lkey_table.max; i++)
97 		RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
98 
99 	return 0;
100 }
101 
102 /**
103  *rvt_mr_exit: clean up MR
104  *@rdi: rvt dev structure
105  *
106  * called when drivers have unregistered or perhaps failed to register with us
107  */
108 void rvt_mr_exit(struct rvt_dev_info *rdi)
109 {
110 	if (rdi->dma_mr)
111 		rvt_pr_err(rdi, "DMA MR not null!\n");
112 
113 	vfree(rdi->lkey_table.table);
114 }
115 
116 static void rvt_deinit_mregion(struct rvt_mregion *mr)
117 {
118 	int i = mr->mapsz;
119 
120 	mr->mapsz = 0;
121 	while (i)
122 		kfree(mr->map[--i]);
123 }
124 
125 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
126 			    int count)
127 {
128 	int m, i = 0;
129 	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
130 
131 	mr->mapsz = 0;
132 	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
133 	for (; i < m; i++) {
134 		mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
135 					  dev->dparms.node);
136 		if (!mr->map[i]) {
137 			rvt_deinit_mregion(mr);
138 			return -ENOMEM;
139 		}
140 		mr->mapsz++;
141 	}
142 	init_completion(&mr->comp);
143 	/* count returning the ptr to user */
144 	atomic_set(&mr->refcount, 1);
145 	atomic_set(&mr->lkey_invalid, 0);
146 	mr->pd = pd;
147 	mr->max_segs = count;
148 	return 0;
149 }
150 
151 /**
152  * rvt_alloc_lkey - allocate an lkey
153  * @mr: memory region that this lkey protects
154  * @dma_region: 0->normal key, 1->restricted DMA key
155  *
156  * Returns 0 if successful, otherwise returns -errno.
157  *
158  * Increments mr reference count as required.
159  *
160  * Sets the lkey field mr for non-dma regions.
161  *
162  */
163 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
164 {
165 	unsigned long flags;
166 	u32 r;
167 	u32 n;
168 	int ret = 0;
169 	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
170 	struct rvt_lkey_table *rkt = &dev->lkey_table;
171 
172 	rvt_get_mr(mr);
173 	spin_lock_irqsave(&rkt->lock, flags);
174 
175 	/* special case for dma_mr lkey == 0 */
176 	if (dma_region) {
177 		struct rvt_mregion *tmr;
178 
179 		tmr = rcu_access_pointer(dev->dma_mr);
180 		if (!tmr) {
181 			rcu_assign_pointer(dev->dma_mr, mr);
182 			mr->lkey_published = 1;
183 		} else {
184 			rvt_put_mr(mr);
185 		}
186 		goto success;
187 	}
188 
189 	/* Find the next available LKEY */
190 	r = rkt->next;
191 	n = r;
192 	for (;;) {
193 		if (!rcu_access_pointer(rkt->table[r]))
194 			break;
195 		r = (r + 1) & (rkt->max - 1);
196 		if (r == n)
197 			goto bail;
198 	}
199 	rkt->next = (r + 1) & (rkt->max - 1);
200 	/*
201 	 * Make sure lkey is never zero which is reserved to indicate an
202 	 * unrestricted LKEY.
203 	 */
204 	rkt->gen++;
205 	/*
206 	 * bits are capped to ensure enough bits for generation number
207 	 */
208 	mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
209 		((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
210 		 << 8);
211 	if (mr->lkey == 0) {
212 		mr->lkey |= 1 << 8;
213 		rkt->gen++;
214 	}
215 	rcu_assign_pointer(rkt->table[r], mr);
216 	mr->lkey_published = 1;
217 success:
218 	spin_unlock_irqrestore(&rkt->lock, flags);
219 out:
220 	return ret;
221 bail:
222 	rvt_put_mr(mr);
223 	spin_unlock_irqrestore(&rkt->lock, flags);
224 	ret = -ENOMEM;
225 	goto out;
226 }
227 
228 /**
229  * rvt_free_lkey - free an lkey
230  * @mr: mr to free from tables
231  */
232 static void rvt_free_lkey(struct rvt_mregion *mr)
233 {
234 	unsigned long flags;
235 	u32 lkey = mr->lkey;
236 	u32 r;
237 	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
238 	struct rvt_lkey_table *rkt = &dev->lkey_table;
239 	int freed = 0;
240 
241 	spin_lock_irqsave(&rkt->lock, flags);
242 	if (!mr->lkey_published)
243 		goto out;
244 	if (lkey == 0) {
245 		RCU_INIT_POINTER(dev->dma_mr, NULL);
246 	} else {
247 		r = lkey >> (32 - dev->dparms.lkey_table_size);
248 		RCU_INIT_POINTER(rkt->table[r], NULL);
249 	}
250 	mr->lkey_published = 0;
251 	freed++;
252 out:
253 	spin_unlock_irqrestore(&rkt->lock, flags);
254 	if (freed) {
255 		synchronize_rcu();
256 		rvt_put_mr(mr);
257 	}
258 }
259 
260 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
261 {
262 	struct rvt_mr *mr;
263 	int rval = -ENOMEM;
264 	int m;
265 
266 	/* Allocate struct plus pointers to first level page tables. */
267 	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
268 	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
269 	if (!mr)
270 		goto bail;
271 
272 	rval = rvt_init_mregion(&mr->mr, pd, count);
273 	if (rval)
274 		goto bail;
275 	/*
276 	 * ib_reg_phys_mr() will initialize mr->ibmr except for
277 	 * lkey and rkey.
278 	 */
279 	rval = rvt_alloc_lkey(&mr->mr, 0);
280 	if (rval)
281 		goto bail_mregion;
282 	mr->ibmr.lkey = mr->mr.lkey;
283 	mr->ibmr.rkey = mr->mr.lkey;
284 done:
285 	return mr;
286 
287 bail_mregion:
288 	rvt_deinit_mregion(&mr->mr);
289 bail:
290 	kfree(mr);
291 	mr = ERR_PTR(rval);
292 	goto done;
293 }
294 
295 static void __rvt_free_mr(struct rvt_mr *mr)
296 {
297 	rvt_deinit_mregion(&mr->mr);
298 	rvt_free_lkey(&mr->mr);
299 	kfree(mr);
300 }
301 
302 /**
303  * rvt_get_dma_mr - get a DMA memory region
304  * @pd: protection domain for this memory region
305  * @acc: access flags
306  *
307  * Return: the memory region on success, otherwise returns an errno.
308  * Note that all DMA addresses should be created via the functions in
309  * struct dma_virt_ops.
310  */
311 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
312 {
313 	struct rvt_mr *mr;
314 	struct ib_mr *ret;
315 	int rval;
316 
317 	if (ibpd_to_rvtpd(pd)->user)
318 		return ERR_PTR(-EPERM);
319 
320 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
321 	if (!mr) {
322 		ret = ERR_PTR(-ENOMEM);
323 		goto bail;
324 	}
325 
326 	rval = rvt_init_mregion(&mr->mr, pd, 0);
327 	if (rval) {
328 		ret = ERR_PTR(rval);
329 		goto bail;
330 	}
331 
332 	rval = rvt_alloc_lkey(&mr->mr, 1);
333 	if (rval) {
334 		ret = ERR_PTR(rval);
335 		goto bail_mregion;
336 	}
337 
338 	mr->mr.access_flags = acc;
339 	ret = &mr->ibmr;
340 done:
341 	return ret;
342 
343 bail_mregion:
344 	rvt_deinit_mregion(&mr->mr);
345 bail:
346 	kfree(mr);
347 	goto done;
348 }
349 
350 /**
351  * rvt_reg_user_mr - register a userspace memory region
352  * @pd: protection domain for this memory region
353  * @start: starting userspace address
354  * @length: length of region to register
355  * @mr_access_flags: access flags for this memory region
356  * @udata: unused by the driver
357  *
358  * Return: the memory region on success, otherwise returns an errno.
359  */
360 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
361 			      u64 virt_addr, int mr_access_flags,
362 			      struct ib_udata *udata)
363 {
364 	struct rvt_mr *mr;
365 	struct ib_umem *umem;
366 	struct scatterlist *sg;
367 	int n, m, entry;
368 	struct ib_mr *ret;
369 
370 	if (length == 0)
371 		return ERR_PTR(-EINVAL);
372 
373 	umem = ib_umem_get(pd->uobject->context, start, length,
374 			   mr_access_flags, 0);
375 	if (IS_ERR(umem))
376 		return (void *)umem;
377 
378 	n = umem->nmap;
379 
380 	mr = __rvt_alloc_mr(n, pd);
381 	if (IS_ERR(mr)) {
382 		ret = (struct ib_mr *)mr;
383 		goto bail_umem;
384 	}
385 
386 	mr->mr.user_base = start;
387 	mr->mr.iova = virt_addr;
388 	mr->mr.length = length;
389 	mr->mr.offset = ib_umem_offset(umem);
390 	mr->mr.access_flags = mr_access_flags;
391 	mr->umem = umem;
392 
393 	if (is_power_of_2(umem->page_size))
394 		mr->mr.page_shift = ilog2(umem->page_size);
395 	m = 0;
396 	n = 0;
397 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
398 		void *vaddr;
399 
400 		vaddr = page_address(sg_page(sg));
401 		if (!vaddr) {
402 			ret = ERR_PTR(-EINVAL);
403 			goto bail_inval;
404 		}
405 		mr->mr.map[m]->segs[n].vaddr = vaddr;
406 		mr->mr.map[m]->segs[n].length = umem->page_size;
407 		trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
408 		n++;
409 		if (n == RVT_SEGSZ) {
410 			m++;
411 			n = 0;
412 		}
413 	}
414 	return &mr->ibmr;
415 
416 bail_inval:
417 	__rvt_free_mr(mr);
418 
419 bail_umem:
420 	ib_umem_release(umem);
421 
422 	return ret;
423 }
424 
425 /**
426  * rvt_dereg_mr - unregister and free a memory region
427  * @ibmr: the memory region to free
428  *
429  *
430  * Note that this is called to free MRs created by rvt_get_dma_mr()
431  * or rvt_reg_user_mr().
432  *
433  * Returns 0 on success.
434  */
435 int rvt_dereg_mr(struct ib_mr *ibmr)
436 {
437 	struct rvt_mr *mr = to_imr(ibmr);
438 	struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
439 	int ret = 0;
440 	unsigned long timeout;
441 
442 	rvt_free_lkey(&mr->mr);
443 
444 	rvt_put_mr(&mr->mr); /* will set completion if last */
445 	timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
446 	if (!timeout) {
447 		rvt_pr_err(rdi,
448 			   "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
449 			   mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
450 		rvt_get_mr(&mr->mr);
451 		ret = -EBUSY;
452 		goto out;
453 	}
454 	rvt_deinit_mregion(&mr->mr);
455 	if (mr->umem)
456 		ib_umem_release(mr->umem);
457 	kfree(mr);
458 out:
459 	return ret;
460 }
461 
462 /**
463  * rvt_alloc_mr - Allocate a memory region usable with the
464  * @pd: protection domain for this memory region
465  * @mr_type: mem region type
466  * @max_num_sg: Max number of segments allowed
467  *
468  * Return: the memory region on success, otherwise return an errno.
469  */
470 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
471 			   enum ib_mr_type mr_type,
472 			   u32 max_num_sg)
473 {
474 	struct rvt_mr *mr;
475 
476 	if (mr_type != IB_MR_TYPE_MEM_REG)
477 		return ERR_PTR(-EINVAL);
478 
479 	mr = __rvt_alloc_mr(max_num_sg, pd);
480 	if (IS_ERR(mr))
481 		return (struct ib_mr *)mr;
482 
483 	return &mr->ibmr;
484 }
485 
486 /**
487  * rvt_set_page - page assignment function called by ib_sg_to_pages
488  * @ibmr: memory region
489  * @addr: dma address of mapped page
490  *
491  * Return: 0 on success
492  */
493 static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
494 {
495 	struct rvt_mr *mr = to_imr(ibmr);
496 	u32 ps = 1 << mr->mr.page_shift;
497 	u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
498 	int m, n;
499 
500 	if (unlikely(mapped_segs == mr->mr.max_segs))
501 		return -ENOMEM;
502 
503 	if (mr->mr.length == 0) {
504 		mr->mr.user_base = addr;
505 		mr->mr.iova = addr;
506 	}
507 
508 	m = mapped_segs / RVT_SEGSZ;
509 	n = mapped_segs % RVT_SEGSZ;
510 	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
511 	mr->mr.map[m]->segs[n].length = ps;
512 	trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
513 	mr->mr.length += ps;
514 
515 	return 0;
516 }
517 
518 /**
519  * rvt_map_mr_sg - map sg list and set it the memory region
520  * @ibmr: memory region
521  * @sg: dma mapped scatterlist
522  * @sg_nents: number of entries in sg
523  * @sg_offset: offset in bytes into sg
524  *
525  * Return: number of sg elements mapped to the memory region
526  */
527 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
528 		  int sg_nents, unsigned int *sg_offset)
529 {
530 	struct rvt_mr *mr = to_imr(ibmr);
531 
532 	mr->mr.length = 0;
533 	mr->mr.page_shift = PAGE_SHIFT;
534 	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
535 			      rvt_set_page);
536 }
537 
538 /**
539  * rvt_fast_reg_mr - fast register physical MR
540  * @qp: the queue pair where the work request comes from
541  * @ibmr: the memory region to be registered
542  * @key: updated key for this memory region
543  * @access: access flags for this memory region
544  *
545  * Returns 0 on success.
546  */
547 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
548 		    int access)
549 {
550 	struct rvt_mr *mr = to_imr(ibmr);
551 
552 	if (qp->ibqp.pd != mr->mr.pd)
553 		return -EACCES;
554 
555 	/* not applicable to dma MR or user MR */
556 	if (!mr->mr.lkey || mr->umem)
557 		return -EINVAL;
558 
559 	if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
560 		return -EINVAL;
561 
562 	ibmr->lkey = key;
563 	ibmr->rkey = key;
564 	mr->mr.lkey = key;
565 	mr->mr.access_flags = access;
566 	atomic_set(&mr->mr.lkey_invalid, 0);
567 
568 	return 0;
569 }
570 EXPORT_SYMBOL(rvt_fast_reg_mr);
571 
572 /**
573  * rvt_invalidate_rkey - invalidate an MR rkey
574  * @qp: queue pair associated with the invalidate op
575  * @rkey: rkey to invalidate
576  *
577  * Returns 0 on success.
578  */
579 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
580 {
581 	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
582 	struct rvt_lkey_table *rkt = &dev->lkey_table;
583 	struct rvt_mregion *mr;
584 
585 	if (rkey == 0)
586 		return -EINVAL;
587 
588 	rcu_read_lock();
589 	mr = rcu_dereference(
590 		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
591 	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
592 		goto bail;
593 
594 	atomic_set(&mr->lkey_invalid, 1);
595 	rcu_read_unlock();
596 	return 0;
597 
598 bail:
599 	rcu_read_unlock();
600 	return -EINVAL;
601 }
602 EXPORT_SYMBOL(rvt_invalidate_rkey);
603 
604 /**
605  * rvt_alloc_fmr - allocate a fast memory region
606  * @pd: the protection domain for this memory region
607  * @mr_access_flags: access flags for this memory region
608  * @fmr_attr: fast memory region attributes
609  *
610  * Return: the memory region on success, otherwise returns an errno.
611  */
612 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
613 			     struct ib_fmr_attr *fmr_attr)
614 {
615 	struct rvt_fmr *fmr;
616 	int m;
617 	struct ib_fmr *ret;
618 	int rval = -ENOMEM;
619 
620 	/* Allocate struct plus pointers to first level page tables. */
621 	m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
622 	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
623 	if (!fmr)
624 		goto bail;
625 
626 	rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
627 	if (rval)
628 		goto bail;
629 
630 	/*
631 	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
632 	 * rkey.
633 	 */
634 	rval = rvt_alloc_lkey(&fmr->mr, 0);
635 	if (rval)
636 		goto bail_mregion;
637 	fmr->ibfmr.rkey = fmr->mr.lkey;
638 	fmr->ibfmr.lkey = fmr->mr.lkey;
639 	/*
640 	 * Resources are allocated but no valid mapping (RKEY can't be
641 	 * used).
642 	 */
643 	fmr->mr.access_flags = mr_access_flags;
644 	fmr->mr.max_segs = fmr_attr->max_pages;
645 	fmr->mr.page_shift = fmr_attr->page_shift;
646 
647 	ret = &fmr->ibfmr;
648 done:
649 	return ret;
650 
651 bail_mregion:
652 	rvt_deinit_mregion(&fmr->mr);
653 bail:
654 	kfree(fmr);
655 	ret = ERR_PTR(rval);
656 	goto done;
657 }
658 
659 /**
660  * rvt_map_phys_fmr - set up a fast memory region
661  * @ibmfr: the fast memory region to set up
662  * @page_list: the list of pages to associate with the fast memory region
663  * @list_len: the number of pages to associate with the fast memory region
664  * @iova: the virtual address of the start of the fast memory region
665  *
666  * This may be called from interrupt context.
667  *
668  * Return: 0 on success
669  */
670 
671 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
672 		     int list_len, u64 iova)
673 {
674 	struct rvt_fmr *fmr = to_ifmr(ibfmr);
675 	struct rvt_lkey_table *rkt;
676 	unsigned long flags;
677 	int m, n, i;
678 	u32 ps;
679 	struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
680 
681 	i = atomic_read(&fmr->mr.refcount);
682 	if (i > 2)
683 		return -EBUSY;
684 
685 	if (list_len > fmr->mr.max_segs)
686 		return -EINVAL;
687 
688 	rkt = &rdi->lkey_table;
689 	spin_lock_irqsave(&rkt->lock, flags);
690 	fmr->mr.user_base = iova;
691 	fmr->mr.iova = iova;
692 	ps = 1 << fmr->mr.page_shift;
693 	fmr->mr.length = list_len * ps;
694 	m = 0;
695 	n = 0;
696 	for (i = 0; i < list_len; i++) {
697 		fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
698 		fmr->mr.map[m]->segs[n].length = ps;
699 		trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
700 		if (++n == RVT_SEGSZ) {
701 			m++;
702 			n = 0;
703 		}
704 	}
705 	spin_unlock_irqrestore(&rkt->lock, flags);
706 	return 0;
707 }
708 
709 /**
710  * rvt_unmap_fmr - unmap fast memory regions
711  * @fmr_list: the list of fast memory regions to unmap
712  *
713  * Return: 0 on success.
714  */
715 int rvt_unmap_fmr(struct list_head *fmr_list)
716 {
717 	struct rvt_fmr *fmr;
718 	struct rvt_lkey_table *rkt;
719 	unsigned long flags;
720 	struct rvt_dev_info *rdi;
721 
722 	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
723 		rdi = ib_to_rvt(fmr->ibfmr.device);
724 		rkt = &rdi->lkey_table;
725 		spin_lock_irqsave(&rkt->lock, flags);
726 		fmr->mr.user_base = 0;
727 		fmr->mr.iova = 0;
728 		fmr->mr.length = 0;
729 		spin_unlock_irqrestore(&rkt->lock, flags);
730 	}
731 	return 0;
732 }
733 
734 /**
735  * rvt_dealloc_fmr - deallocate a fast memory region
736  * @ibfmr: the fast memory region to deallocate
737  *
738  * Return: 0 on success.
739  */
740 int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
741 {
742 	struct rvt_fmr *fmr = to_ifmr(ibfmr);
743 	int ret = 0;
744 	unsigned long timeout;
745 
746 	rvt_free_lkey(&fmr->mr);
747 	rvt_put_mr(&fmr->mr); /* will set completion if last */
748 	timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
749 	if (!timeout) {
750 		rvt_get_mr(&fmr->mr);
751 		ret = -EBUSY;
752 		goto out;
753 	}
754 	rvt_deinit_mregion(&fmr->mr);
755 	kfree(fmr);
756 out:
757 	return ret;
758 }
759 
760 /**
761  * rvt_lkey_ok - check IB SGE for validity and initialize
762  * @rkt: table containing lkey to check SGE against
763  * @pd: protection domain
764  * @isge: outgoing internal SGE
765  * @sge: SGE to check
766  * @acc: access flags
767  *
768  * Check the IB SGE for validity and initialize our internal version
769  * of it.
770  *
771  * Return: 1 if valid and successful, otherwise returns 0.
772  *
773  * increments the reference count upon success
774  *
775  */
776 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
777 		struct rvt_sge *isge, struct ib_sge *sge, int acc)
778 {
779 	struct rvt_mregion *mr;
780 	unsigned n, m;
781 	size_t off;
782 
783 	/*
784 	 * We use LKEY == zero for kernel virtual addresses
785 	 * (see rvt_get_dma_mr() and dma_virt_ops).
786 	 */
787 	rcu_read_lock();
788 	if (sge->lkey == 0) {
789 		struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
790 
791 		if (pd->user)
792 			goto bail;
793 		mr = rcu_dereference(dev->dma_mr);
794 		if (!mr)
795 			goto bail;
796 		rvt_get_mr(mr);
797 		rcu_read_unlock();
798 
799 		isge->mr = mr;
800 		isge->vaddr = (void *)sge->addr;
801 		isge->length = sge->length;
802 		isge->sge_length = sge->length;
803 		isge->m = 0;
804 		isge->n = 0;
805 		goto ok;
806 	}
807 	mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
808 	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
809 		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
810 		goto bail;
811 
812 	off = sge->addr - mr->user_base;
813 	if (unlikely(sge->addr < mr->user_base ||
814 		     off + sge->length > mr->length ||
815 		     (mr->access_flags & acc) != acc))
816 		goto bail;
817 	rvt_get_mr(mr);
818 	rcu_read_unlock();
819 
820 	off += mr->offset;
821 	if (mr->page_shift) {
822 		/*
823 		 * page sizes are uniform power of 2 so no loop is necessary
824 		 * entries_spanned_by_off is the number of times the loop below
825 		 * would have executed.
826 		*/
827 		size_t entries_spanned_by_off;
828 
829 		entries_spanned_by_off = off >> mr->page_shift;
830 		off -= (entries_spanned_by_off << mr->page_shift);
831 		m = entries_spanned_by_off / RVT_SEGSZ;
832 		n = entries_spanned_by_off % RVT_SEGSZ;
833 	} else {
834 		m = 0;
835 		n = 0;
836 		while (off >= mr->map[m]->segs[n].length) {
837 			off -= mr->map[m]->segs[n].length;
838 			n++;
839 			if (n >= RVT_SEGSZ) {
840 				m++;
841 				n = 0;
842 			}
843 		}
844 	}
845 	isge->mr = mr;
846 	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
847 	isge->length = mr->map[m]->segs[n].length - off;
848 	isge->sge_length = sge->length;
849 	isge->m = m;
850 	isge->n = n;
851 ok:
852 	return 1;
853 bail:
854 	rcu_read_unlock();
855 	return 0;
856 }
857 EXPORT_SYMBOL(rvt_lkey_ok);
858 
859 /**
860  * rvt_rkey_ok - check the IB virtual address, length, and RKEY
861  * @qp: qp for validation
862  * @sge: SGE state
863  * @len: length of data
864  * @vaddr: virtual address to place data
865  * @rkey: rkey to check
866  * @acc: access flags
867  *
868  * Return: 1 if successful, otherwise 0.
869  *
870  * increments the reference count upon success
871  */
872 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
873 		u32 len, u64 vaddr, u32 rkey, int acc)
874 {
875 	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
876 	struct rvt_lkey_table *rkt = &dev->lkey_table;
877 	struct rvt_mregion *mr;
878 	unsigned n, m;
879 	size_t off;
880 
881 	/*
882 	 * We use RKEY == zero for kernel virtual addresses
883 	 * (see rvt_get_dma_mr() and dma_virt_ops).
884 	 */
885 	rcu_read_lock();
886 	if (rkey == 0) {
887 		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
888 		struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
889 
890 		if (pd->user)
891 			goto bail;
892 		mr = rcu_dereference(rdi->dma_mr);
893 		if (!mr)
894 			goto bail;
895 		rvt_get_mr(mr);
896 		rcu_read_unlock();
897 
898 		sge->mr = mr;
899 		sge->vaddr = (void *)vaddr;
900 		sge->length = len;
901 		sge->sge_length = len;
902 		sge->m = 0;
903 		sge->n = 0;
904 		goto ok;
905 	}
906 
907 	mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
908 	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
909 		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
910 		goto bail;
911 
912 	off = vaddr - mr->iova;
913 	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
914 		     (mr->access_flags & acc) == 0))
915 		goto bail;
916 	rvt_get_mr(mr);
917 	rcu_read_unlock();
918 
919 	off += mr->offset;
920 	if (mr->page_shift) {
921 		/*
922 		 * page sizes are uniform power of 2 so no loop is necessary
923 		 * entries_spanned_by_off is the number of times the loop below
924 		 * would have executed.
925 		*/
926 		size_t entries_spanned_by_off;
927 
928 		entries_spanned_by_off = off >> mr->page_shift;
929 		off -= (entries_spanned_by_off << mr->page_shift);
930 		m = entries_spanned_by_off / RVT_SEGSZ;
931 		n = entries_spanned_by_off % RVT_SEGSZ;
932 	} else {
933 		m = 0;
934 		n = 0;
935 		while (off >= mr->map[m]->segs[n].length) {
936 			off -= mr->map[m]->segs[n].length;
937 			n++;
938 			if (n >= RVT_SEGSZ) {
939 				m++;
940 				n = 0;
941 			}
942 		}
943 	}
944 	sge->mr = mr;
945 	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
946 	sge->length = mr->map[m]->segs[n].length - off;
947 	sge->sge_length = len;
948 	sge->m = m;
949 	sge->n = n;
950 ok:
951 	return 1;
952 bail:
953 	rcu_read_unlock();
954 	return 0;
955 }
956 EXPORT_SYMBOL(rvt_rkey_ok);
957