xref: /linux/drivers/infiniband/hw/mlx5/mr.c (revision 957e3facd147510f2cf8780e38606f1d707f0e33)
1 /*
2  * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include "mlx5_ib.h"
41 
42 enum {
43 	MAX_PENDING_REG_MR = 8,
44 };
45 
46 enum {
47 	MLX5_UMR_ALIGN	= 2048
48 };
49 
50 static __be64 *mr_align(__be64 *ptr, int align)
51 {
52 	unsigned long mask = align - 1;
53 
54 	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
55 }
56 
57 static int order2idx(struct mlx5_ib_dev *dev, int order)
58 {
59 	struct mlx5_mr_cache *cache = &dev->cache;
60 
61 	if (order < cache->ent[0].order)
62 		return 0;
63 	else
64 		return order - cache->ent[0].order;
65 }
66 
67 static void reg_mr_callback(int status, void *context)
68 {
69 	struct mlx5_ib_mr *mr = context;
70 	struct mlx5_ib_dev *dev = mr->dev;
71 	struct mlx5_mr_cache *cache = &dev->cache;
72 	int c = order2idx(dev, mr->order);
73 	struct mlx5_cache_ent *ent = &cache->ent[c];
74 	u8 key;
75 	unsigned long flags;
76 	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
77 	int err;
78 
79 	spin_lock_irqsave(&ent->lock, flags);
80 	ent->pending--;
81 	spin_unlock_irqrestore(&ent->lock, flags);
82 	if (status) {
83 		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
84 		kfree(mr);
85 		dev->fill_delay = 1;
86 		mod_timer(&dev->delay_timer, jiffies + HZ);
87 		return;
88 	}
89 
90 	if (mr->out.hdr.status) {
91 		mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
92 			     mr->out.hdr.status,
93 			     be32_to_cpu(mr->out.hdr.syndrome));
94 		kfree(mr);
95 		dev->fill_delay = 1;
96 		mod_timer(&dev->delay_timer, jiffies + HZ);
97 		return;
98 	}
99 
100 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
101 	key = dev->mdev->priv.mkey_key++;
102 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
103 	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
104 
105 	cache->last_add = jiffies;
106 
107 	spin_lock_irqsave(&ent->lock, flags);
108 	list_add_tail(&mr->list, &ent->head);
109 	ent->cur++;
110 	ent->size++;
111 	spin_unlock_irqrestore(&ent->lock, flags);
112 
113 	write_lock_irqsave(&table->lock, flags);
114 	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
115 				&mr->mmr);
116 	if (err)
117 		pr_err("Error inserting to mr tree. 0x%x\n", -err);
118 	write_unlock_irqrestore(&table->lock, flags);
119 }
120 
121 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
122 {
123 	struct mlx5_mr_cache *cache = &dev->cache;
124 	struct mlx5_cache_ent *ent = &cache->ent[c];
125 	struct mlx5_create_mkey_mbox_in *in;
126 	struct mlx5_ib_mr *mr;
127 	int npages = 1 << ent->order;
128 	int err = 0;
129 	int i;
130 
131 	in = kzalloc(sizeof(*in), GFP_KERNEL);
132 	if (!in)
133 		return -ENOMEM;
134 
135 	for (i = 0; i < num; i++) {
136 		if (ent->pending >= MAX_PENDING_REG_MR) {
137 			err = -EAGAIN;
138 			break;
139 		}
140 
141 		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
142 		if (!mr) {
143 			err = -ENOMEM;
144 			break;
145 		}
146 		mr->order = ent->order;
147 		mr->umred = 1;
148 		mr->dev = dev;
149 		in->seg.status = 1 << 6;
150 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
151 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
152 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
153 		in->seg.log2_page_size = 12;
154 
155 		spin_lock_irq(&ent->lock);
156 		ent->pending++;
157 		spin_unlock_irq(&ent->lock);
158 		err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
159 					    sizeof(*in), reg_mr_callback,
160 					    mr, &mr->out);
161 		if (err) {
162 			spin_lock_irq(&ent->lock);
163 			ent->pending--;
164 			spin_unlock_irq(&ent->lock);
165 			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
166 			kfree(mr);
167 			break;
168 		}
169 	}
170 
171 	kfree(in);
172 	return err;
173 }
174 
175 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
176 {
177 	struct mlx5_mr_cache *cache = &dev->cache;
178 	struct mlx5_cache_ent *ent = &cache->ent[c];
179 	struct mlx5_ib_mr *mr;
180 	int err;
181 	int i;
182 
183 	for (i = 0; i < num; i++) {
184 		spin_lock_irq(&ent->lock);
185 		if (list_empty(&ent->head)) {
186 			spin_unlock_irq(&ent->lock);
187 			return;
188 		}
189 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
190 		list_del(&mr->list);
191 		ent->cur--;
192 		ent->size--;
193 		spin_unlock_irq(&ent->lock);
194 		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
195 		if (err)
196 			mlx5_ib_warn(dev, "failed destroy mkey\n");
197 		else
198 			kfree(mr);
199 	}
200 }
201 
202 static ssize_t size_write(struct file *filp, const char __user *buf,
203 			  size_t count, loff_t *pos)
204 {
205 	struct mlx5_cache_ent *ent = filp->private_data;
206 	struct mlx5_ib_dev *dev = ent->dev;
207 	char lbuf[20];
208 	u32 var;
209 	int err;
210 	int c;
211 
212 	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
213 		return -EFAULT;
214 
215 	c = order2idx(dev, ent->order);
216 	lbuf[sizeof(lbuf) - 1] = 0;
217 
218 	if (sscanf(lbuf, "%u", &var) != 1)
219 		return -EINVAL;
220 
221 	if (var < ent->limit)
222 		return -EINVAL;
223 
224 	if (var > ent->size) {
225 		do {
226 			err = add_keys(dev, c, var - ent->size);
227 			if (err && err != -EAGAIN)
228 				return err;
229 
230 			usleep_range(3000, 5000);
231 		} while (err);
232 	} else if (var < ent->size) {
233 		remove_keys(dev, c, ent->size - var);
234 	}
235 
236 	return count;
237 }
238 
239 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
240 			 loff_t *pos)
241 {
242 	struct mlx5_cache_ent *ent = filp->private_data;
243 	char lbuf[20];
244 	int err;
245 
246 	if (*pos)
247 		return 0;
248 
249 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
250 	if (err < 0)
251 		return err;
252 
253 	if (copy_to_user(buf, lbuf, err))
254 		return -EFAULT;
255 
256 	*pos += err;
257 
258 	return err;
259 }
260 
261 static const struct file_operations size_fops = {
262 	.owner	= THIS_MODULE,
263 	.open	= simple_open,
264 	.write	= size_write,
265 	.read	= size_read,
266 };
267 
268 static ssize_t limit_write(struct file *filp, const char __user *buf,
269 			   size_t count, loff_t *pos)
270 {
271 	struct mlx5_cache_ent *ent = filp->private_data;
272 	struct mlx5_ib_dev *dev = ent->dev;
273 	char lbuf[20];
274 	u32 var;
275 	int err;
276 	int c;
277 
278 	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
279 		return -EFAULT;
280 
281 	c = order2idx(dev, ent->order);
282 	lbuf[sizeof(lbuf) - 1] = 0;
283 
284 	if (sscanf(lbuf, "%u", &var) != 1)
285 		return -EINVAL;
286 
287 	if (var > ent->size)
288 		return -EINVAL;
289 
290 	ent->limit = var;
291 
292 	if (ent->cur < ent->limit) {
293 		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
294 		if (err)
295 			return err;
296 	}
297 
298 	return count;
299 }
300 
301 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
302 			  loff_t *pos)
303 {
304 	struct mlx5_cache_ent *ent = filp->private_data;
305 	char lbuf[20];
306 	int err;
307 
308 	if (*pos)
309 		return 0;
310 
311 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
312 	if (err < 0)
313 		return err;
314 
315 	if (copy_to_user(buf, lbuf, err))
316 		return -EFAULT;
317 
318 	*pos += err;
319 
320 	return err;
321 }
322 
323 static const struct file_operations limit_fops = {
324 	.owner	= THIS_MODULE,
325 	.open	= simple_open,
326 	.write	= limit_write,
327 	.read	= limit_read,
328 };
329 
330 static int someone_adding(struct mlx5_mr_cache *cache)
331 {
332 	int i;
333 
334 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
335 		if (cache->ent[i].cur < cache->ent[i].limit)
336 			return 1;
337 	}
338 
339 	return 0;
340 }
341 
342 static void __cache_work_func(struct mlx5_cache_ent *ent)
343 {
344 	struct mlx5_ib_dev *dev = ent->dev;
345 	struct mlx5_mr_cache *cache = &dev->cache;
346 	int i = order2idx(dev, ent->order);
347 	int err;
348 
349 	if (cache->stopped)
350 		return;
351 
352 	ent = &dev->cache.ent[i];
353 	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
354 		err = add_keys(dev, i, 1);
355 		if (ent->cur < 2 * ent->limit) {
356 			if (err == -EAGAIN) {
357 				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
358 					    i + 2);
359 				queue_delayed_work(cache->wq, &ent->dwork,
360 						   msecs_to_jiffies(3));
361 			} else if (err) {
362 				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
363 					     i + 2, err);
364 				queue_delayed_work(cache->wq, &ent->dwork,
365 						   msecs_to_jiffies(1000));
366 			} else {
367 				queue_work(cache->wq, &ent->work);
368 			}
369 		}
370 	} else if (ent->cur > 2 * ent->limit) {
371 		if (!someone_adding(cache) &&
372 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
373 			remove_keys(dev, i, 1);
374 			if (ent->cur > ent->limit)
375 				queue_work(cache->wq, &ent->work);
376 		} else {
377 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
378 		}
379 	}
380 }
381 
382 static void delayed_cache_work_func(struct work_struct *work)
383 {
384 	struct mlx5_cache_ent *ent;
385 
386 	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
387 	__cache_work_func(ent);
388 }
389 
390 static void cache_work_func(struct work_struct *work)
391 {
392 	struct mlx5_cache_ent *ent;
393 
394 	ent = container_of(work, struct mlx5_cache_ent, work);
395 	__cache_work_func(ent);
396 }
397 
398 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
399 {
400 	struct mlx5_mr_cache *cache = &dev->cache;
401 	struct mlx5_ib_mr *mr = NULL;
402 	struct mlx5_cache_ent *ent;
403 	int c;
404 	int i;
405 
406 	c = order2idx(dev, order);
407 	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
408 		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
409 		return NULL;
410 	}
411 
412 	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
413 		ent = &cache->ent[i];
414 
415 		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
416 
417 		spin_lock_irq(&ent->lock);
418 		if (!list_empty(&ent->head)) {
419 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
420 					      list);
421 			list_del(&mr->list);
422 			ent->cur--;
423 			spin_unlock_irq(&ent->lock);
424 			if (ent->cur < ent->limit)
425 				queue_work(cache->wq, &ent->work);
426 			break;
427 		}
428 		spin_unlock_irq(&ent->lock);
429 
430 		queue_work(cache->wq, &ent->work);
431 
432 		if (mr)
433 			break;
434 	}
435 
436 	if (!mr)
437 		cache->ent[c].miss++;
438 
439 	return mr;
440 }
441 
442 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
443 {
444 	struct mlx5_mr_cache *cache = &dev->cache;
445 	struct mlx5_cache_ent *ent;
446 	int shrink = 0;
447 	int c;
448 
449 	c = order2idx(dev, mr->order);
450 	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
451 		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
452 		return;
453 	}
454 	ent = &cache->ent[c];
455 	spin_lock_irq(&ent->lock);
456 	list_add_tail(&mr->list, &ent->head);
457 	ent->cur++;
458 	if (ent->cur > 2 * ent->limit)
459 		shrink = 1;
460 	spin_unlock_irq(&ent->lock);
461 
462 	if (shrink)
463 		queue_work(cache->wq, &ent->work);
464 }
465 
466 static void clean_keys(struct mlx5_ib_dev *dev, int c)
467 {
468 	struct mlx5_mr_cache *cache = &dev->cache;
469 	struct mlx5_cache_ent *ent = &cache->ent[c];
470 	struct mlx5_ib_mr *mr;
471 	int err;
472 
473 	cancel_delayed_work(&ent->dwork);
474 	while (1) {
475 		spin_lock_irq(&ent->lock);
476 		if (list_empty(&ent->head)) {
477 			spin_unlock_irq(&ent->lock);
478 			return;
479 		}
480 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
481 		list_del(&mr->list);
482 		ent->cur--;
483 		ent->size--;
484 		spin_unlock_irq(&ent->lock);
485 		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
486 		if (err)
487 			mlx5_ib_warn(dev, "failed destroy mkey\n");
488 		else
489 			kfree(mr);
490 	}
491 }
492 
493 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
494 {
495 	struct mlx5_mr_cache *cache = &dev->cache;
496 	struct mlx5_cache_ent *ent;
497 	int i;
498 
499 	if (!mlx5_debugfs_root)
500 		return 0;
501 
502 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
503 	if (!cache->root)
504 		return -ENOMEM;
505 
506 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
507 		ent = &cache->ent[i];
508 		sprintf(ent->name, "%d", ent->order);
509 		ent->dir = debugfs_create_dir(ent->name,  cache->root);
510 		if (!ent->dir)
511 			return -ENOMEM;
512 
513 		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
514 						 &size_fops);
515 		if (!ent->fsize)
516 			return -ENOMEM;
517 
518 		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
519 						  &limit_fops);
520 		if (!ent->flimit)
521 			return -ENOMEM;
522 
523 		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
524 					       &ent->cur);
525 		if (!ent->fcur)
526 			return -ENOMEM;
527 
528 		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
529 						&ent->miss);
530 		if (!ent->fmiss)
531 			return -ENOMEM;
532 	}
533 
534 	return 0;
535 }
536 
537 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
538 {
539 	if (!mlx5_debugfs_root)
540 		return;
541 
542 	debugfs_remove_recursive(dev->cache.root);
543 }
544 
545 static void delay_time_func(unsigned long ctx)
546 {
547 	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
548 
549 	dev->fill_delay = 0;
550 }
551 
552 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
553 {
554 	struct mlx5_mr_cache *cache = &dev->cache;
555 	struct mlx5_cache_ent *ent;
556 	int limit;
557 	int err;
558 	int i;
559 
560 	cache->wq = create_singlethread_workqueue("mkey_cache");
561 	if (!cache->wq) {
562 		mlx5_ib_warn(dev, "failed to create work queue\n");
563 		return -ENOMEM;
564 	}
565 
566 	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
567 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
568 		INIT_LIST_HEAD(&cache->ent[i].head);
569 		spin_lock_init(&cache->ent[i].lock);
570 
571 		ent = &cache->ent[i];
572 		INIT_LIST_HEAD(&ent->head);
573 		spin_lock_init(&ent->lock);
574 		ent->order = i + 2;
575 		ent->dev = dev;
576 
577 		if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
578 			limit = dev->mdev->profile->mr_cache[i].limit;
579 		else
580 			limit = 0;
581 
582 		INIT_WORK(&ent->work, cache_work_func);
583 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
584 		ent->limit = limit;
585 		queue_work(cache->wq, &ent->work);
586 	}
587 
588 	err = mlx5_mr_cache_debugfs_init(dev);
589 	if (err)
590 		mlx5_ib_warn(dev, "cache debugfs failure\n");
591 
592 	return 0;
593 }
594 
595 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
596 {
597 	int i;
598 
599 	dev->cache.stopped = 1;
600 	flush_workqueue(dev->cache.wq);
601 
602 	mlx5_mr_cache_debugfs_cleanup(dev);
603 
604 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
605 		clean_keys(dev, i);
606 
607 	destroy_workqueue(dev->cache.wq);
608 	del_timer_sync(&dev->delay_timer);
609 
610 	return 0;
611 }
612 
613 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
614 {
615 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
616 	struct mlx5_core_dev *mdev = dev->mdev;
617 	struct mlx5_create_mkey_mbox_in *in;
618 	struct mlx5_mkey_seg *seg;
619 	struct mlx5_ib_mr *mr;
620 	int err;
621 
622 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
623 	if (!mr)
624 		return ERR_PTR(-ENOMEM);
625 
626 	in = kzalloc(sizeof(*in), GFP_KERNEL);
627 	if (!in) {
628 		err = -ENOMEM;
629 		goto err_free;
630 	}
631 
632 	seg = &in->seg;
633 	seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
634 	seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
635 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
636 	seg->start_addr = 0;
637 
638 	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
639 				    NULL);
640 	if (err)
641 		goto err_in;
642 
643 	kfree(in);
644 	mr->ibmr.lkey = mr->mmr.key;
645 	mr->ibmr.rkey = mr->mmr.key;
646 	mr->umem = NULL;
647 
648 	return &mr->ibmr;
649 
650 err_in:
651 	kfree(in);
652 
653 err_free:
654 	kfree(mr);
655 
656 	return ERR_PTR(err);
657 }
658 
659 static int get_octo_len(u64 addr, u64 len, int page_size)
660 {
661 	u64 offset;
662 	int npages;
663 
664 	offset = addr & (page_size - 1);
665 	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
666 	return (npages + 1) / 2;
667 }
668 
669 static int use_umr(int order)
670 {
671 	return order <= 17;
672 }
673 
674 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
675 			     struct ib_sge *sg, u64 dma, int n, u32 key,
676 			     int page_shift, u64 virt_addr, u64 len,
677 			     int access_flags)
678 {
679 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
680 	struct ib_mr *mr = dev->umrc.mr;
681 
682 	sg->addr = dma;
683 	sg->length = ALIGN(sizeof(u64) * n, 64);
684 	sg->lkey = mr->lkey;
685 
686 	wr->next = NULL;
687 	wr->send_flags = 0;
688 	wr->sg_list = sg;
689 	if (n)
690 		wr->num_sge = 1;
691 	else
692 		wr->num_sge = 0;
693 
694 	wr->opcode = MLX5_IB_WR_UMR;
695 	wr->wr.fast_reg.page_list_len = n;
696 	wr->wr.fast_reg.page_shift = page_shift;
697 	wr->wr.fast_reg.rkey = key;
698 	wr->wr.fast_reg.iova_start = virt_addr;
699 	wr->wr.fast_reg.length = len;
700 	wr->wr.fast_reg.access_flags = access_flags;
701 	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
702 }
703 
704 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
705 			       struct ib_send_wr *wr, u32 key)
706 {
707 	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
708 	wr->opcode = MLX5_IB_WR_UMR;
709 	wr->wr.fast_reg.rkey = key;
710 }
711 
712 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
713 {
714 	struct mlx5_ib_umr_context *context;
715 	struct ib_wc wc;
716 	int err;
717 
718 	while (1) {
719 		err = ib_poll_cq(cq, 1, &wc);
720 		if (err < 0) {
721 			pr_warn("poll cq error %d\n", err);
722 			return;
723 		}
724 		if (err == 0)
725 			break;
726 
727 		context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
728 		context->status = wc.status;
729 		complete(&context->done);
730 	}
731 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
732 }
733 
734 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
735 				  u64 virt_addr, u64 len, int npages,
736 				  int page_shift, int order, int access_flags)
737 {
738 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
739 	struct device *ddev = dev->ib_dev.dma_device;
740 	struct umr_common *umrc = &dev->umrc;
741 	struct mlx5_ib_umr_context umr_context;
742 	struct ib_send_wr wr, *bad;
743 	struct mlx5_ib_mr *mr;
744 	struct ib_sge sg;
745 	int size = sizeof(u64) * npages;
746 	int err = 0;
747 	int i;
748 
749 	for (i = 0; i < 1; i++) {
750 		mr = alloc_cached_mr(dev, order);
751 		if (mr)
752 			break;
753 
754 		err = add_keys(dev, order2idx(dev, order), 1);
755 		if (err && err != -EAGAIN) {
756 			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
757 			break;
758 		}
759 	}
760 
761 	if (!mr)
762 		return ERR_PTR(-EAGAIN);
763 
764 	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
765 	if (!mr->pas) {
766 		err = -ENOMEM;
767 		goto free_mr;
768 	}
769 
770 	mlx5_ib_populate_pas(dev, umem, page_shift,
771 			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
772 
773 	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
774 				 DMA_TO_DEVICE);
775 	if (dma_mapping_error(ddev, mr->dma)) {
776 		err = -ENOMEM;
777 		goto free_pas;
778 	}
779 
780 	memset(&wr, 0, sizeof(wr));
781 	wr.wr_id = (u64)(unsigned long)&umr_context;
782 	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
783 
784 	mlx5_ib_init_umr_context(&umr_context);
785 	down(&umrc->sem);
786 	err = ib_post_send(umrc->qp, &wr, &bad);
787 	if (err) {
788 		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
789 		goto unmap_dma;
790 	} else {
791 		wait_for_completion(&umr_context.done);
792 		if (umr_context.status != IB_WC_SUCCESS) {
793 			mlx5_ib_warn(dev, "reg umr failed\n");
794 			err = -EFAULT;
795 		}
796 	}
797 
798 	mr->mmr.iova = virt_addr;
799 	mr->mmr.size = len;
800 	mr->mmr.pd = to_mpd(pd)->pdn;
801 
802 unmap_dma:
803 	up(&umrc->sem);
804 	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
805 
806 free_pas:
807 	kfree(mr->pas);
808 
809 free_mr:
810 	if (err) {
811 		free_cached_mr(dev, mr);
812 		return ERR_PTR(err);
813 	}
814 
815 	return mr;
816 }
817 
818 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
819 				     u64 length, struct ib_umem *umem,
820 				     int npages, int page_shift,
821 				     int access_flags)
822 {
823 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
824 	struct mlx5_create_mkey_mbox_in *in;
825 	struct mlx5_ib_mr *mr;
826 	int inlen;
827 	int err;
828 
829 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
830 	if (!mr)
831 		return ERR_PTR(-ENOMEM);
832 
833 	inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
834 	in = mlx5_vzalloc(inlen);
835 	if (!in) {
836 		err = -ENOMEM;
837 		goto err_1;
838 	}
839 	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
840 
841 	in->seg.flags = convert_access(access_flags) |
842 		MLX5_ACCESS_MODE_MTT;
843 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
844 	in->seg.start_addr = cpu_to_be64(virt_addr);
845 	in->seg.len = cpu_to_be64(length);
846 	in->seg.bsfs_octo_size = 0;
847 	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
848 	in->seg.log2_page_size = page_shift;
849 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
850 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
851 							 1 << page_shift));
852 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
853 				    NULL, NULL);
854 	if (err) {
855 		mlx5_ib_warn(dev, "create mkey failed\n");
856 		goto err_2;
857 	}
858 	mr->umem = umem;
859 	kvfree(in);
860 
861 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
862 
863 	return mr;
864 
865 err_2:
866 	kvfree(in);
867 
868 err_1:
869 	kfree(mr);
870 
871 	return ERR_PTR(err);
872 }
873 
874 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
875 				  u64 virt_addr, int access_flags,
876 				  struct ib_udata *udata)
877 {
878 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
879 	struct mlx5_ib_mr *mr = NULL;
880 	struct ib_umem *umem;
881 	int page_shift;
882 	int npages;
883 	int ncont;
884 	int order;
885 	int err;
886 
887 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
888 		    start, virt_addr, length, access_flags);
889 	umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
890 			   0);
891 	if (IS_ERR(umem)) {
892 		mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
893 		return (void *)umem;
894 	}
895 
896 	mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
897 	if (!npages) {
898 		mlx5_ib_warn(dev, "avoid zero region\n");
899 		err = -EINVAL;
900 		goto error;
901 	}
902 
903 	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
904 		    npages, ncont, order, page_shift);
905 
906 	if (use_umr(order)) {
907 		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
908 			     order, access_flags);
909 		if (PTR_ERR(mr) == -EAGAIN) {
910 			mlx5_ib_dbg(dev, "cache empty for order %d", order);
911 			mr = NULL;
912 		}
913 	}
914 
915 	if (!mr)
916 		mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
917 				access_flags);
918 
919 	if (IS_ERR(mr)) {
920 		err = PTR_ERR(mr);
921 		goto error;
922 	}
923 
924 	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
925 
926 	mr->umem = umem;
927 	mr->npages = npages;
928 	spin_lock(&dev->mr_lock);
929 	dev->mdev->priv.reg_pages += npages;
930 	spin_unlock(&dev->mr_lock);
931 	mr->ibmr.lkey = mr->mmr.key;
932 	mr->ibmr.rkey = mr->mmr.key;
933 
934 	return &mr->ibmr;
935 
936 error:
937 	ib_umem_release(umem);
938 	return ERR_PTR(err);
939 }
940 
941 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
942 {
943 	struct umr_common *umrc = &dev->umrc;
944 	struct mlx5_ib_umr_context umr_context;
945 	struct ib_send_wr wr, *bad;
946 	int err;
947 
948 	memset(&wr, 0, sizeof(wr));
949 	wr.wr_id = (u64)(unsigned long)&umr_context;
950 	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
951 
952 	mlx5_ib_init_umr_context(&umr_context);
953 	down(&umrc->sem);
954 	err = ib_post_send(umrc->qp, &wr, &bad);
955 	if (err) {
956 		up(&umrc->sem);
957 		mlx5_ib_dbg(dev, "err %d\n", err);
958 		goto error;
959 	} else {
960 		wait_for_completion(&umr_context.done);
961 		up(&umrc->sem);
962 	}
963 	if (umr_context.status != IB_WC_SUCCESS) {
964 		mlx5_ib_warn(dev, "unreg umr failed\n");
965 		err = -EFAULT;
966 		goto error;
967 	}
968 	return 0;
969 
970 error:
971 	return err;
972 }
973 
974 int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
975 {
976 	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
977 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
978 	struct ib_umem *umem = mr->umem;
979 	int npages = mr->npages;
980 	int umred = mr->umred;
981 	int err;
982 
983 	if (!umred) {
984 		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
985 		if (err) {
986 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
987 				     mr->mmr.key, err);
988 			return err;
989 		}
990 	} else {
991 		err = unreg_umr(dev, mr);
992 		if (err) {
993 			mlx5_ib_warn(dev, "failed unregister\n");
994 			return err;
995 		}
996 		free_cached_mr(dev, mr);
997 	}
998 
999 	if (umem) {
1000 		ib_umem_release(umem);
1001 		spin_lock(&dev->mr_lock);
1002 		dev->mdev->priv.reg_pages -= npages;
1003 		spin_unlock(&dev->mr_lock);
1004 	}
1005 
1006 	if (!umred)
1007 		kfree(mr);
1008 
1009 	return 0;
1010 }
1011 
1012 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1013 				struct ib_mr_init_attr *mr_init_attr)
1014 {
1015 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1016 	struct mlx5_create_mkey_mbox_in *in;
1017 	struct mlx5_ib_mr *mr;
1018 	int access_mode, err;
1019 	int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
1020 
1021 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1022 	if (!mr)
1023 		return ERR_PTR(-ENOMEM);
1024 
1025 	in = kzalloc(sizeof(*in), GFP_KERNEL);
1026 	if (!in) {
1027 		err = -ENOMEM;
1028 		goto err_free;
1029 	}
1030 
1031 	in->seg.status = 1 << 6; /* free */
1032 	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1033 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1034 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1035 	access_mode = MLX5_ACCESS_MODE_MTT;
1036 
1037 	if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
1038 		u32 psv_index[2];
1039 
1040 		in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1041 							   MLX5_MKEY_BSF_EN);
1042 		in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1043 		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1044 		if (!mr->sig) {
1045 			err = -ENOMEM;
1046 			goto err_free_in;
1047 		}
1048 
1049 		/* create mem & wire PSVs */
1050 		err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1051 					   2, psv_index);
1052 		if (err)
1053 			goto err_free_sig;
1054 
1055 		access_mode = MLX5_ACCESS_MODE_KLM;
1056 		mr->sig->psv_memory.psv_idx = psv_index[0];
1057 		mr->sig->psv_wire.psv_idx = psv_index[1];
1058 
1059 		mr->sig->sig_status_checked = true;
1060 		mr->sig->sig_err_exists = false;
1061 		/* Next UMR, Arm SIGERR */
1062 		++mr->sig->sigerr_count;
1063 	}
1064 
1065 	in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1066 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1067 				    NULL, NULL, NULL);
1068 	if (err)
1069 		goto err_destroy_psv;
1070 
1071 	mr->ibmr.lkey = mr->mmr.key;
1072 	mr->ibmr.rkey = mr->mmr.key;
1073 	mr->umem = NULL;
1074 	kfree(in);
1075 
1076 	return &mr->ibmr;
1077 
1078 err_destroy_psv:
1079 	if (mr->sig) {
1080 		if (mlx5_core_destroy_psv(dev->mdev,
1081 					  mr->sig->psv_memory.psv_idx))
1082 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1083 				     mr->sig->psv_memory.psv_idx);
1084 		if (mlx5_core_destroy_psv(dev->mdev,
1085 					  mr->sig->psv_wire.psv_idx))
1086 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1087 				     mr->sig->psv_wire.psv_idx);
1088 	}
1089 err_free_sig:
1090 	kfree(mr->sig);
1091 err_free_in:
1092 	kfree(in);
1093 err_free:
1094 	kfree(mr);
1095 	return ERR_PTR(err);
1096 }
1097 
1098 int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
1099 {
1100 	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1101 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1102 	int err;
1103 
1104 	if (mr->sig) {
1105 		if (mlx5_core_destroy_psv(dev->mdev,
1106 					  mr->sig->psv_memory.psv_idx))
1107 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1108 				     mr->sig->psv_memory.psv_idx);
1109 		if (mlx5_core_destroy_psv(dev->mdev,
1110 					  mr->sig->psv_wire.psv_idx))
1111 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1112 				     mr->sig->psv_wire.psv_idx);
1113 		kfree(mr->sig);
1114 	}
1115 
1116 	err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
1117 	if (err) {
1118 		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1119 			     mr->mmr.key, err);
1120 		return err;
1121 	}
1122 
1123 	kfree(mr);
1124 
1125 	return err;
1126 }
1127 
1128 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
1129 					int max_page_list_len)
1130 {
1131 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1132 	struct mlx5_create_mkey_mbox_in *in;
1133 	struct mlx5_ib_mr *mr;
1134 	int err;
1135 
1136 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1137 	if (!mr)
1138 		return ERR_PTR(-ENOMEM);
1139 
1140 	in = kzalloc(sizeof(*in), GFP_KERNEL);
1141 	if (!in) {
1142 		err = -ENOMEM;
1143 		goto err_free;
1144 	}
1145 
1146 	in->seg.status = 1 << 6; /* free */
1147 	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1148 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1149 	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1150 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1151 	/*
1152 	 * TBD not needed - issue 197292 */
1153 	in->seg.log2_page_size = PAGE_SHIFT;
1154 
1155 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1156 				    NULL, NULL);
1157 	kfree(in);
1158 	if (err)
1159 		goto err_free;
1160 
1161 	mr->ibmr.lkey = mr->mmr.key;
1162 	mr->ibmr.rkey = mr->mmr.key;
1163 	mr->umem = NULL;
1164 
1165 	return &mr->ibmr;
1166 
1167 err_free:
1168 	kfree(mr);
1169 	return ERR_PTR(err);
1170 }
1171 
1172 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1173 							       int page_list_len)
1174 {
1175 	struct mlx5_ib_fast_reg_page_list *mfrpl;
1176 	int size = page_list_len * sizeof(u64);
1177 
1178 	mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1179 	if (!mfrpl)
1180 		return ERR_PTR(-ENOMEM);
1181 
1182 	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1183 	if (!mfrpl->ibfrpl.page_list)
1184 		goto err_free;
1185 
1186 	mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1187 						     size, &mfrpl->map,
1188 						     GFP_KERNEL);
1189 	if (!mfrpl->mapped_page_list)
1190 		goto err_free;
1191 
1192 	WARN_ON(mfrpl->map & 0x3f);
1193 
1194 	return &mfrpl->ibfrpl;
1195 
1196 err_free:
1197 	kfree(mfrpl->ibfrpl.page_list);
1198 	kfree(mfrpl);
1199 	return ERR_PTR(-ENOMEM);
1200 }
1201 
1202 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1203 {
1204 	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1205 	struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1206 	int size = page_list->max_page_list_len * sizeof(u64);
1207 
1208 	dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1209 			  mfrpl->map);
1210 	kfree(mfrpl->ibfrpl.page_list);
1211 	kfree(mfrpl);
1212 }
1213 
1214 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1215 			    struct ib_mr_status *mr_status)
1216 {
1217 	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1218 	int ret = 0;
1219 
1220 	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1221 		pr_err("Invalid status check mask\n");
1222 		ret = -EINVAL;
1223 		goto done;
1224 	}
1225 
1226 	mr_status->fail_status = 0;
1227 	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1228 		if (!mmr->sig) {
1229 			ret = -EINVAL;
1230 			pr_err("signature status check requested on a non-signature enabled MR\n");
1231 			goto done;
1232 		}
1233 
1234 		mmr->sig->sig_status_checked = true;
1235 		if (!mmr->sig->sig_err_exists)
1236 			goto done;
1237 
1238 		if (ibmr->lkey == mmr->sig->err_item.key)
1239 			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1240 			       sizeof(mr_status->sig_err));
1241 		else {
1242 			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1243 			mr_status->sig_err.sig_err_offset = 0;
1244 			mr_status->sig_err.key = mmr->sig->err_item.key;
1245 		}
1246 
1247 		mmr->sig->sig_err_exists = false;
1248 		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1249 	}
1250 
1251 done:
1252 	return ret;
1253 }
1254